diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
33 files changed, 1077 insertions, 608 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 2934414d99..00a42111da 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -14,6 +14,7 @@ DRIVER_SOURCES = \ intel_decode.c \ intel_extensions.c \ intel_fbo.c \ + intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_regions.c \ intel_screen.c \ @@ -22,6 +23,7 @@ DRIVER_SOURCES = \ intel_pixel_bitmap.c \ intel_pixel_copy.c \ intel_pixel_draw.c \ + intel_pixel_read.c \ intel_state.c \ intel_swapbuffers.c \ intel_tex.c \ @@ -69,6 +71,7 @@ DRIVER_SOURCES = \ brw_vs_constval.c \ brw_vs_emit.c \ brw_vs_state.c \ + brw_vs_surface_state.c \ brw_vtbl.c \ brw_wm.c \ brw_wm_debug.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 577497bf6b..ec0b045de4 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -131,6 +131,7 @@ struct brw_context; #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_INPUT_VARYING 0x200 #define BRW_NEW_PSP 0x800 +#define BRW_NEW_WM_SURFACES 0x1000 #define BRW_NEW_FENCE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 @@ -450,8 +451,6 @@ struct brw_context struct { struct brw_state_flags dirty; - struct brw_tracked_state **atoms; - GLuint nr_atoms; GLuint nr_color_regions; struct intel_region *color_regions[MAX_DRAW_BUFFERS]; @@ -471,7 +470,8 @@ struct brw_context int validated_bo_count; } state; - struct brw_cache cache; + struct brw_cache cache; /** non-surface items */ + struct brw_cache surface_cache; /* surface items */ struct brw_cached_batch_item *cached_batch_items; struct { @@ -555,11 +555,6 @@ struct brw_context GLuint vs_size; GLuint total_size; - /* Dynamic tracker which changes to reflect the state referenced - * by active fp and vp program parameters: - */ - struct brw_tracked_state tracked_state; - dri_bo *curbe_bo; /** Offset within curbe_bo of space for current curbe entry */ GLuint curbe_offset; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 9197fede2d..a1a6c53d0e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -36,6 +36,7 @@ #include "main/macros.h" #include "main/enums.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" #include "intel_regions.h" @@ -188,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw) GLfloat *buf; GLuint i; - /* Update our own dependency flags. This works because this - * function will also be called whenever fp or vp changes. - */ - brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); - brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags; - brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; - if (sz == 0) { if (brw->curbe.last_buf) { free(brw->curbe.last_buf); @@ -335,78 +329,11 @@ static void prepare_constant_buffer(struct brw_context *brw) */ } - -/** - * Copy Mesa program parameters into given constant buffer. - */ -static void -update_constant_buffer(struct brw_context *brw, - const struct gl_program_parameter_list *params, - dri_bo *const_buffer) -{ - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - /* copy Mesa program constants into the buffer */ - if (const_buffer && size > 0) { - - assert(const_buffer); - assert(const_buffer->size >= size); - - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(const_buffer); - memcpy(const_buffer->virtual, params->ParameterValues, size); - drm_intel_gem_bo_unmap_gtt(const_buffer); - } - else { - dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); - } - - if (0) { - int i; - for (i = 0; i < params->NumParameters; i++) { - float *p = params->ParameterValues[i]; - printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]); - } - } - } -} - - -/** Copy current vertex program's parameters into the constant buffer */ -static void -update_vertex_constant_buffer(struct brw_context *brw) -{ - struct brw_vertex_program *vp = - (struct brw_vertex_program *) brw->vertex_program; - if (0) { - printf("update VS constants in buffer %p\n", vp->const_buffer); - printf("program %u\n", vp->program.Base.Id); - } - if (vp->use_const_buffer) - update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); -} - - -/** Copy current fragment program's parameters into the constant buffer */ -static void -update_fragment_constant_buffer(struct brw_context *brw) -{ - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - if (fp->use_const_buffer) - update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); -} - - static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - update_vertex_constant_buffer(brw); - update_fragment_constant_buffer(brw); - BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); @@ -428,7 +355,7 @@ static void emit_constant_buffer(struct brw_context *brw) */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */ + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 5342622a73..47dc99dcf4 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -416,6 +416,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, out: UNLOCK_HARDWARE(intel); + brw_state_cache_check_size(brw); + if (warn) fprintf(stderr, "i965: Single primitive emit potentially exceeded " "available aperture space\n"); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index b91b20bec6..3ef56a0068 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -343,7 +343,7 @@ static void brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; @@ -362,11 +362,11 @@ static void brw_prepare_vertices(struct brw_context *brw) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ - while (tmp) { - GLuint i = _mesa_ffsll(tmp)-1; + while (vs_inputs) { + GLuint i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<<i); + vs_inputs &= ~(1 << i); enabled[nr_enabled++] = input; } @@ -477,17 +477,17 @@ static void brw_emit_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint i; GLuint nr_enabled = 0; /* Accumulate the list of enabled arrays. */ - while (tmp) { - i = _mesa_ffsll(tmp)-1; + while (vs_inputs) { + i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<<i); + vs_inputs &= ~(1 << i); enabled[nr_enabled++] = input; } @@ -635,7 +635,7 @@ static void brw_emit_indices(struct brw_context *brw) if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count; + ib_size = get_size(index_buffer->type) * index_buffer->count - 1; /* Emit the indexbuffer packet: */ diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 62c98bd8bb..b7116d45f5 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -97,7 +97,7 @@ struct brw_glsl_call; #define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 4000 +#define BRW_EU_MAX_INSN 10000 struct brw_compile { struct brw_instruction store[BRW_EU_MAX_INSN]; @@ -171,9 +171,9 @@ static INLINE struct brw_reg brw_reg( GLuint file, { struct brw_reg reg; if (type == BRW_GENERAL_REGISTER_FILE) - assert(nr < 128); + assert(nr < BRW_MAX_GRF); else if (type == BRW_MESSAGE_REGISTER_FILE) - assert(nr < 9); + assert(nr < BRW_MAX_MRF); else if (type == BRW_ARCHITECTURE_REGISTER_FILE) assert(nr <= BRW_ARF_IP); @@ -538,6 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) static INLINE struct brw_reg brw_message_reg( GLuint nr ) { + assert(nr < BRW_MAX_MRF); return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); @@ -855,12 +856,10 @@ void brw_math( struct brw_compile *p, void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ); void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLboolean relAddr, GLuint location, GLuint bind_table_index ); @@ -875,7 +874,6 @@ void brw_dp_READ_4_vs( struct brw_compile *p, void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ); /* If/else/endif. Works by manipulating the execution flags on each diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 60ea44f7a9..0ee208d7a4 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -484,6 +484,10 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, { struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + insn->header.execution_size = 1; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_DISABLE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; @@ -865,9 +869,9 @@ void brw_math_16( struct brw_compile *p, */ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); @@ -877,7 +881,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -912,9 +916,9 @@ void brw_dp_WRITE_16( struct brw_compile *p, */ void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); @@ -924,7 +928,7 @@ void brw_dp_READ_16( struct brw_compile *p, brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -958,21 +962,26 @@ void brw_dp_READ_16( struct brw_compile *p, */ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLboolean relAddr, GLuint location, GLuint bind_table_index ) { + /* XXX: relAddr not implemented */ + GLuint msg_reg_nr = 1; { + struct brw_reg b; brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); - /* set message header global offset field (reg 0, element 2) */ - /* Note that grf[0] will be copied to mrf[1] implicitly by the SEND instr */ - brw_MOV(p, - retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), - brw_imm_d(location)); + /* Setup MRF[1] with location/offset into const buffer */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + brw_MOV(p, b, brw_imm_ud(location)); brw_pop_insn_state(p); } @@ -988,7 +997,7 @@ void brw_dp_READ_4( struct brw_compile *p, dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); brw_set_dest(insn, dest); - brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(insn, bind_table_index, @@ -1227,7 +1236,7 @@ void brw_urb_WRITE(struct brw_compile *p, { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - assert(msg_length < 16); + assert(msg_length < BRW_MAX_MRF); brw_set_dest(insn, dest); brw_set_src0(insn, src0); diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index 299357409c..d27c6c24ca 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -37,6 +37,9 @@ #include "tnl/tnl.h" #include "brw_context.h" #include "brw_fallback.h" +#include "intel_chipset.h" +#include "intel_fbo.h" +#include "intel_regions.h" #include "glapi/glapi.h" @@ -44,6 +47,7 @@ static GLboolean do_check_fallback(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; GLcontext *ctx = &brw->intel.ctx; GLuint i; @@ -81,6 +85,33 @@ static GLboolean do_check_fallback(struct brw_context *brw) return GL_TRUE; } + /* _NEW_BUFFERS */ + if (IS_965(intel->intelScreen->deviceID) && + !IS_G4X(intel->intelScreen->deviceID)) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * I would have done this, but there's also a nasty requirement that + * the depth and the color surfaces all be of the same LOD, which + * may be a worse requirement than this alignment. (Also, we may + * want to just demote the texture to untiled, instead). + */ + if (irb->region && irb->region->tiling != I915_TILING_NONE && + (irb->region->draw_offset & 4095)) { + DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); + return GL_TRUE; + } + } + } return GL_FALSE; } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 9bc5c35139..67c39e509c 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -118,7 +118,10 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + else + OUT_BATCH(0); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ @@ -241,6 +244,8 @@ static void emit_depthbuffer(struct brw_context *brw) return; } + assert(region->tiling != I915_TILING_X); + BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index c3c85978f4..e1c2c7777b 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -166,6 +166,9 @@ static void upload_sf_prog(struct brw_context *brw) key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + /* _NEW_HINT */ + key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* _NEW_POLYGON */ if (key.do_twoside_color) { /* If we're rendering to a FBO, we have to invert the polygon @@ -188,7 +191,7 @@ static void upload_sf_prog(struct brw_context *brw) const struct brw_tracked_state brw_sf_prog = { .dirty = { - .mesa = (_NEW_LIGHT|_NEW_POLYGON|_NEW_POINT), + .mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index 1c0fb70fe0..6426b6df9f 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -51,7 +51,8 @@ struct brw_sf_prog_key { GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; - GLuint pad:10; + GLuint linear_color:1; /**< linear interp vs. perspective interp */ + GLuint pad:25; GLenum SpriteOrigin; }; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index 862835f157..42726d4da4 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -161,16 +161,16 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) brw_push_insn_state(p); - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1)); + brw_MUL(p, c->pv, c->pv, brw_imm_d(nr*2+1)); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); copy_colors(c, c->vert[2], c->vert[0]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1)); + brw_JMPI(p, ip, ip, brw_imm_d(nr*4+1)); copy_colors(c, c->vert[0], c->vert[1]); copy_colors(c, c->vert[2], c->vert[1]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*2)); + brw_JMPI(p, ip, ip, brw_imm_d(nr*2)); copy_colors(c, c->vert[0], c->vert[2]); copy_colors(c, c->vert[1], c->vert[2]); @@ -195,11 +195,11 @@ static void do_flatshade_line( struct brw_sf_compile *c ) brw_push_insn_state(p); - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1)); + brw_MUL(p, c->pv, c->pv, brw_imm_d(nr+1)); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr)); + brw_JMPI(p, ip, ip, brw_imm_d(nr)); copy_colors(c, c->vert[0], c->vert[1]); brw_pop_insn_state(p); @@ -218,7 +218,7 @@ static void alloc_regs( struct brw_sf_compile *c ) /* Values computed by fixed function unit: */ - c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); c->det = brw_vec1_grf(1, 2); c->dx0 = brw_vec1_grf(1, 3); c->dx2 = brw_vec1_grf(1, 4); @@ -295,9 +295,6 @@ static void invert_det( struct brw_sf_compile *c) } -#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ - FRAG_BIT_COL0 | \ - FRAG_BIT_COL1) static GLboolean calculate_masks( struct brw_sf_compile *c, GLuint reg, @@ -306,9 +303,16 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, GLushort *pc_linear) { GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); - GLuint persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS; + GLuint persp_mask; GLuint linear_mask; + if (c->key.do_flat_shading || c->key.linear_color) + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | + FRAG_BIT_COL0 | + FRAG_BIT_COL1); + else + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS); + if (c->key.do_flat_shading) linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1); else @@ -674,7 +678,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1<<_3DPRIM_POLYGON) | (1<<_3DPRIM_RECTLIST) | (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); @@ -695,7 +699,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1<<_3DPRIM_LINESTRIP_CONT) | (1<<_3DPRIM_LINESTRIP_BF) | (1<<_3DPRIM_LINESTRIP_CONT_BF))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); @@ -708,7 +712,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 81b0a45998..bf9f6cae55 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -72,11 +72,13 @@ const struct brw_tracked_state brw_sf_vp; const struct brw_tracked_state brw_state_base_address; const struct brw_tracked_state brw_urb_fence; const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_surfaces; const struct brw_tracked_state brw_vs_prog; const struct brw_tracked_state brw_vs_unit; const struct brw_tracked_state brw_wm_input_sizes; const struct brw_tracked_state brw_wm_prog; const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_constant_surface; const struct brw_tracked_state brw_wm_surfaces; const struct brw_tracked_state brw_wm_unit; @@ -91,6 +93,20 @@ const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { + GLenum target, depthmode; + dri_bo *bo; + GLint format, internal_format; + GLint first_level, last_level; + GLint width, height, depth; + GLint pitch, cpp; + uint32_t tiling; + GLuint offset; +}; + /*********************************************************************** * brw_state.c */ @@ -135,8 +151,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache, void *aux_return); void brw_state_cache_check_size( struct brw_context *brw ); -void brw_init_cache( struct brw_context *brw ); -void brw_destroy_cache( struct brw_context *brw ); +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c @@ -150,4 +166,9 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, void brw_destroy_batch_cache( struct brw_context *brw ); void brw_clear_batch_cache_flush( struct brw_context *brw ); +/* brw_wm_surface_state.c */ +dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index d5b5166406..e40d7a0416 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -56,9 +56,9 @@ * incorrect program is run for the other instance. */ +#include "main/imports.h" #include "brw_state.h" #include "intel_batchbuffer.h" -#include "main/imports.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: @@ -69,8 +69,10 @@ #include "brw_sf.h" #include "brw_gs.h" -static GLuint hash_key( const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) + +static GLuint +hash_key(const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size, return hash; } + /** * Marks a new buffer as being chosen for the given cache id. */ @@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } + static struct brw_cache_item * search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, GLuint hash, const void *key, GLuint key_size, @@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, } -static void rehash( struct brw_cache *cache ) +static void +rehash(struct brw_cache *cache) { struct brw_cache_item **items; struct brw_cache_item *c, *next; @@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache ) cache->size = size; } + /** * Returns the buffer object matching cache_id and key, or NULL. */ -dri_bo *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs, - void *aux_return ) +dri_bo * +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return) { struct brw_cache_item *item; GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); @@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache, return item->bo; } + dri_bo * brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, @@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache, return bo; } -/* This doesn't really work with aux data. Use search/upload instead + +/** + * This doesn't really work with aux data. Use search/upload instead */ dri_bo * brw_cache_data_sz(struct brw_cache *cache, @@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache, return bo; } + /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. * @@ -319,21 +330,22 @@ enum pool_type { DW_GENERAL_STATE }; + static void -brw_init_cache_id( struct brw_context *brw, - const char *name, - enum brw_cache_id id, - GLuint key_size, - GLuint aux_size) +brw_init_cache_id(struct brw_cache *cache, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) { - struct brw_cache *cache = &brw->cache; - cache->name[id] = strdup(name); cache->key_size[id] = key_size; cache->aux_size[id] = aux_size; } -void brw_init_cache( struct brw_context *brw ) + +static void +brw_init_non_surface_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->cache; @@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw ) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * - sizeof(struct brw_cache_item)); + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP, sizeof(struct brw_cc_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT, sizeof(struct brw_cc_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG, sizeof(struct brw_wm_prog_key), sizeof(struct brw_wm_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR, sizeof(struct brw_sampler_default_color), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER, 0, /* variable key/data size */ 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT, sizeof(struct brw_wm_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG, sizeof(struct brw_sf_prog_key), sizeof(struct brw_sf_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP, sizeof(struct brw_sf_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT, sizeof(struct brw_sf_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT, sizeof(struct brw_vs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG, sizeof(struct brw_vs_prog_key), sizeof(struct brw_vs_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT, sizeof(struct brw_clip_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG, sizeof(struct brw_clip_prog_key), sizeof(struct brw_clip_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT, sizeof(struct brw_gs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); +} + + +static void +brw_init_surface_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->surface_cache; + + cache->brw = brw; - brw_init_cache_id(brw, + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE, sizeof(struct brw_surface_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND, 0, 0); } + +void +brw_init_caches(struct brw_context *brw) +{ + brw_init_non_surface_cache(brw); + brw_init_surface_cache(brw); +} + + static void -brw_clear_cache( struct brw_context *brw ) +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { struct brw_cache_item *c, *next; GLuint i; @@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw ) if (INTEL_DEBUG & DEBUG_STATE) _mesa_printf("%s\n", __FUNCTION__); - for (i = 0; i < brw->cache.size; i++) { - for (c = brw->cache.items[i]; c; c = next) { + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { int j; next = c->next; @@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw ) free((void *)c->key); free(c); } - brw->cache.items[i] = NULL; + cache->items[i] = NULL; } - brw->cache.n_items = 0; + cache->n_items = 0; if (brw->curbe.last_buf) { _mesa_free(brw->curbe.last_buf); @@ -483,25 +517,46 @@ brw_clear_cache( struct brw_context *brw ) brw->state.dirty.cache |= ~0; } -void brw_state_cache_check_size( struct brw_context *brw ) + +void +brw_state_cache_check_size(struct brw_context *brw) { + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. */ if (brw->cache.n_items > 1000) - brw_clear_cache(brw); + brw_clear_cache(brw, &brw->cache); + + if (brw->surface_cache.n_items > 1000) + brw_clear_cache(brw, &brw->surface_cache); } -void brw_destroy_cache( struct brw_context *brw ) + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { GLuint i; - brw_clear_cache(brw); + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { - dri_bo_unreference(brw->cache.last_bo[i]); - free(brw->cache.name[i]); + dri_bo_unreference(cache->last_bo[i]); + free(cache->name[i]); } - free(brw->cache.items); - brw->cache.items = NULL; - brw->cache.size = 0; + free(cache->items); + cache->items = NULL; + cache->size = 0; +} + + +void +brw_destroy_caches(struct brw_context *brw) +{ + brw_destroy_cache(brw, &brw->cache); + brw_destroy_cache(brw, &brw->surface_cache); } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index a713262269..e94fa7d2b4 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -126,6 +126,8 @@ static void dump_wm_surface_state(struct brw_context *brw) surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); state_out(name, surf, surfoff, 4, "mip base %d\n", surf->ss4.min_lod); + state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", + surf->ss5.x_offset, surf->ss5.y_offset); dri_bo_unmap(surf_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 5de1450e61..c6dfea4743 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -59,11 +59,12 @@ const struct brw_tracked_state *atoms[] = &brw_curbe_offsets, &brw_recalculate_urb_fence, - &brw_cc_vp, &brw_cc_unit, - &brw_wm_surfaces, /* must do before samplers */ + &brw_vs_surfaces, /* must do before unit */ + &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ &brw_wm_samplers, &brw_wm_unit, @@ -88,54 +89,26 @@ const struct brw_tracked_state *atoms[] = &brw_line_stipple, &brw_aa_line_parameters, - /* Ordering of the commands below is documented as fixed. - */ -#if 0 - &brw_pipelined_state_pointers, - &brw_urb_fence, - &brw_constant_buffer_state, -#else + &brw_psp_urb_cbs, -#endif &brw_drawing_rect, &brw_indices, &brw_vertices, - NULL, /* brw_constant_buffer */ + &brw_constant_buffer }; void brw_init_state( struct brw_context *brw ) { - GLuint i; - - brw_init_cache(brw); - - brw->state.atoms = _mesa_malloc(sizeof(atoms)); - brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); - _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms)); - - /* Patch in a pointer to the dynamic state atom: - */ - for (i = 0; i < brw->state.nr_atoms; i++) - if (brw->state.atoms[i] == NULL) - brw->state.atoms[i] = &brw->curbe.tracked_state; - - _mesa_memcpy(&brw->curbe.tracked_state, - &brw_constant_buffer, - sizeof(brw_constant_buffer)); + brw_init_caches(brw); } void brw_destroy_state( struct brw_context *brw ) { - if (brw->state.atoms) { - _mesa_free(brw->state.atoms); - brw->state.atoms = NULL; - } - - brw_destroy_cache(brw); + brw_destroy_caches(brw); brw_destroy_batch_cache(brw); } @@ -218,6 +191,7 @@ static struct dirty_bit_map mesa_bits[] = { DEFINE_BIT(_NEW_MULTISAMPLE), DEFINE_BIT(_NEW_TRACK_MATRIX), DEFINE_BIT(_NEW_PROGRAM), + DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), {0, 0, 0} }; @@ -336,7 +310,7 @@ void brw_validate_state( struct brw_context *brw ) /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; @@ -367,8 +341,8 @@ void brw_upload_state(struct brw_context *brw) _mesa_memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < brw->state.nr_atoms; i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; assert(atom->dirty.mesa || @@ -397,7 +371,7 @@ void brw_upload_state(struct brw_context *brw) } else { for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 89e2981203..39e75a0455 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -33,6 +33,14 @@ #ifndef BRW_STRUCTS_H #define BRW_STRUCTS_H + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + + /* Command packets: */ struct header @@ -1075,7 +1083,7 @@ struct brw_surface_state GLuint y_offset:4; GLuint pad0:1; GLuint x_offset:7; - } ss5; /* NEW in Integrated Graphics Device */ + } ss5; /* New in G4X */ }; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 51a617fcb4..67d8d96947 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -28,7 +28,6 @@ * Authors: * Keith Whitwell <keith@tungstengraphics.com> */ - /* Code to layout images in a mipmap tree for i965. */ @@ -40,14 +39,15 @@ #define FILE_DEBUG_FLAG DEBUG_MIPTREE -GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt ) +GLboolean brw_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling) { - /* XXX: these vary depending on image format: - */ -/* GLint align_w = 4; */ + /* XXX: these vary depending on image format: */ + /* GLint align_w = 4; */ switch (mt->target) { - case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_3D: { GLuint width = mt->width0; GLuint height = mt->height0; @@ -59,25 +59,25 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t GLuint align_w = 4; mt->total_height = 0; - + if (mt->compressed) { align_w = intel_compressed_alignment(mt->internal_format); mt->pitch = ALIGN(width, align_w); pack_y_pitch = (height + 3) / 4; } else { - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); - pack_y_pitch = ALIGN(mt->height0, align_h); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); + pack_y_pitch = ALIGN(mt->height0, align_h); } - pack_x_pitch = mt->pitch; + pack_x_pitch = width; pack_x_nr = 1; - for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + for (level = mt->first_level ; level <= mt->last_level ; level++) { GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; GLint x = 0; GLint y = 0; GLint q, j; - + intel_miptree_set_level_info(mt, level, nr_images, 0, mt->total_height, width, height, depth); @@ -89,7 +89,7 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t } x = 0; - y += pack_y_pitch; + y += pack_y_pitch; } @@ -98,40 +98,40 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t height = minify(height); depth = minify(depth); - if (mt->compressed) { - pack_y_pitch = (height + 3) / 4; - - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); - pack_x_nr <<= 1; - } - } else { - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); - } - } + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } } break; } default: - i945_miptree_layout_2d(intel, mt); + i945_miptree_layout_2d(intel, mt, tiling); break; } - DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - mt->pitch, + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, mt->total_height, mt->cpp, mt->pitch * mt->total_height * mt->cpp ); - + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 7673dd36eb..47bc45c912 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -143,7 +143,19 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; - + + brw->urb.constrained = 0; + + if (BRW_IS_G4X(brw)) { + brw->urb.nr_vs_entries = 64; + if (check_urb_layout(brw)) { + goto done; + } else { + brw->urb.constrained = 1; + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + } + } + if (!check_urb_layout(brw)) { brw->urb.nr_vs_entries = limits[VS].min_nr_entries; brw->urb.nr_gs_entries = limits[GS].min_nr_entries; @@ -169,9 +181,8 @@ static void recalculate_urb_fence( struct brw_context *brw ) if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) _mesa_printf("URB CONSTRAINED\n"); } - else - brw->urb.constrained = 0; +done: if (INTEL_DEBUG & DEBUG_URB) _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", brw->urb.vs_start, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 1e4f66091e..4a591365c9 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -58,6 +58,7 @@ struct brw_vs_compile { GLuint first_output; GLuint nr_outputs; + GLuint first_overflow_output; /**< VERT_ATTRIB_x */ GLuint first_tmp; GLuint last_tmp; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index b69616d6e5..9467295d34 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -69,13 +69,18 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; -#if 0 - if (c->vp->program.Base.Parameters->NumParameters >= 6) - c->vp->use_const_buffer = 1; + /* Determine whether to use a real constant buffer or use a block + * of GRF registers for constants. The later is faster but only + * works if everything fits in the GRF. + * XXX this heuristic/check may need some fine tuning... + */ + if (c->vp->program.Base.Parameters->NumParameters + + c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) + c->vp->use_const_buffer = GL_TRUE; else -#endif c->vp->use_const_buffer = GL_FALSE; - /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ + + /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/ /* r0 -- reserved as usual */ @@ -124,15 +129,16 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } } - /* Allocate outputs: TODO: could organize the non-position outputs - * to go straight into message regs. + /* Allocate outputs. The non-position outputs go straight into message regs. */ c->nr_outputs = 0; c->first_output = reg; + c->first_overflow_output = 0; mrf = 4; for (i = 0; i < VERT_RESULT_MAX; i++) { if (c->prog_data.outputs_written & (1 << i)) { c->nr_outputs++; + assert(i < Elements(c->regs[PROGRAM_OUTPUT])); if (i == VERT_RESULT_HPOS) { c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; @@ -143,8 +149,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) mrf++; /* just a placeholder? XXX fix later stages & remove this */ } else { - c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); - mrf++; + if (mrf < 16) { + c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } + else { + /* too many vertex results to fit in MRF, use GRF for overflow */ + if (!c->first_overflow_output) + c->first_overflow_output = i; + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } } } } @@ -709,10 +724,11 @@ get_constant(struct brw_vs_compile *c, struct brw_compile *p = &c->func; struct brw_reg const_reg; struct brw_reg const2_reg; + const GLboolean relAddr = src->RelAddr; assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index || src->RelAddr) { + if (c->current_const[argIndex].index != src->Index || relAddr) { struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; c->current_const[argIndex].index = src->Index; @@ -725,13 +741,13 @@ get_constant(struct brw_vs_compile *c, brw_dp_READ_4_vs(p, c->current_const[argIndex].reg,/* writeback dest */ 0, /* oword */ - src->RelAddr, /* relative indexing? */ + relAddr, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); - if (src->RelAddr) { + if (relAddr) { /* second read */ const2_reg = get_tmp(c); @@ -742,7 +758,7 @@ get_constant(struct brw_vs_compile *c, brw_dp_READ_4_vs(p, const2_reg, /* writeback dest */ 1, /* oword */ - src->RelAddr, /* relative indexing? */ + relAddr, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER @@ -752,7 +768,7 @@ get_constant(struct brw_vs_compile *c, const_reg = c->current_const[argIndex].reg; - if (src->RelAddr) { + if (relAddr) { /* merge the two Owords into the constant register */ /* const_reg[7..4] = const2_reg[7..4] */ brw_MOV(p, @@ -1061,6 +1077,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) struct brw_reg m0 = brw_message_reg(0); struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; + int eot; if (c->key.copy_edgeflag) { brw_MOV(p, @@ -1070,7 +1087,9 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Build ndc coords */ ndc = get_tmp(c); + /* ndc = 1.0 / pos.w */ emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + /* ndc.xyz = pos * ndc */ brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); /* Update the header for point size, user clipping flags, and -ve rhw @@ -1137,18 +1156,51 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_MOV(p, offset(m0, 2), ndc); brw_MOV(p, offset(m0, 3), pos); + eot = (c->first_overflow_output == 0); + brw_urb_WRITE(p, brw_null_reg(), /* dest */ 0, /* starting mrf reg nr */ c->r0, /* src */ 0, /* allocate */ 1, /* used */ - c->nr_outputs + 3, /* msg len */ + MIN2(c->nr_outputs + 3, (BRW_MAX_MRF-1)), /* msg len */ 0, /* response len */ - 1, /* eot */ + eot, /* eot */ 1, /* writes complete */ 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); + + if (c->first_overflow_output > 0) { + /* Not all of the vertex outputs/results fit into the MRF. + * Move the overflowed attributes from the GRF to the MRF and + * issue another brw_urb_WRITE(). + */ + /* XXX I'm not 100% sure about which MRF regs to use here. Starting + * at mrf[4] atm... + */ + GLuint i, mrf = 0; + for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) { + if (c->prog_data.outputs_written & (1 << i)) { + /* move from GRF to MRF */ + brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); + mrf++; + } + } + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 4, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + mrf+1, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + BRW_MAX_MRF-1, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + } } @@ -1177,15 +1229,15 @@ post_vs_emit( struct brw_vs_compile *c, */ void brw_vs_emit(struct brw_vs_compile *c ) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 +#define MAX_LOOP_DEPTH 32 struct brw_compile *p = &c->func; - GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn, if_insn = 0; + const GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn, if_depth = 0, loop_depth = 0; GLuint end_offset = 0; struct brw_instruction *end_inst, *last_inst; - struct brw_instruction *if_inst[MAX_IFSN]; - struct brw_indirect stack_index = brw_indirect(0, 0); - + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; GLuint file; @@ -1219,7 +1271,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) for (insn = 0; insn < nr_insns; insn++) { - struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; struct brw_reg args[3], dst; GLuint i; @@ -1232,7 +1284,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) */ if (inst->Opcode != OPCODE_SWZ) for (i = 0; i < 3; i++) { - struct prog_src_register *src = &inst->SrcReg[i]; + const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) @@ -1376,16 +1428,51 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_xpd(p, dst, args[0], args[1]); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; +#if 0 + case OPCODE_BGNLOOP: + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + break; + case OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_ENDLOOP: + { + struct brw_instruction *inst0, *inst1; + loop_depth--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + inst0->bits3.if_else.pop_count = 0; + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = inst1 - inst0; + inst0->bits3.if_else.pop_count = 0; + } + } + } + break; +#else + (void) loop_inst; + (void) loop_depth; +#endif case OPCODE_BRA: brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c new file mode 100644 index 0000000000..89f47522a1 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -0,0 +1,226 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "main/mtypes.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "shader/prog_parameter.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/* Creates a new VS constant buffer reflecting the current VS program's + * constants, if needed by the VS program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_vs_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_VERTEX_PROGRAM */ + if (!vp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} + +/** + * Update the surface state for a VS constant buffer. + * + * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer. + */ +static void +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + + assert(surf == 0); + + /* If we're in this state update atom, we need to update VS constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(vp->const_buffer); + vp->const_buffer = brw_vs_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (vp->const_buffer == 0) { + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = NULL; + return; + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = vp->const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < BRW_VS_MAX_SURF; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + /* The presumed offsets were set in the data values for + * brw_upload_cache. + */ + drm_intel_bo_emit_reloc(bind_bo, i * 4, + brw->vs.surf_bo[i], 0, + I915_GEM_DOMAIN_INSTRUCTION, 0); + } + } + + free(data); + } + + return bind_bo; +} + +/** + * Vertex shader surfaces (constant buffer). + * + * This consumes the state updates for the constant buffer needing + * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and + * CACHE_NEW_SURF_BIND for the binding table upload. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + int i; + int nr_surfaces = 0; + + brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + nr_surfaces = i + 1; + } + } + + if (brw->vs.nr_surfaces != nr_surfaces) { + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; + brw->vs.nr_surfaces = nr_surfaces; + } + + /* Note that we don't end up updating the bind_bo if we don't have a + * surface to be pointing at. This should be relatively harmless, as it + * just slightly increases our working set size. + */ + if (brw->vs.nr_surfaces != 0) { + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + } +} + +const struct brw_tracked_state brw_vs_surfaces = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_VERTEX_PROGRAM), + .cache = 0 + }, + .prepare = prepare_vs_surfaces, +}; + + + diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index ba03afd6c1..ac11790151 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -177,14 +177,6 @@ static void brw_note_fence( struct intel_context *intel, GLuint fence ) brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; } - -static void brw_note_unlock( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - brw_state_cache_check_size(brw); -} - - /* called from intelWaitForIdle() and intelFlush() * * For now, just flush everything. Could be smarter later. @@ -194,7 +186,7 @@ static GLuint brw_flush_cmd( void ) struct brw_mi_flush flush; flush.opcode = CMD_MI_FLUSH; flush.pad = 0; - flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE; + flush.flags = BRW_FLUSH_STATE_CACHE; return *(GLuint *)&flush; } @@ -215,7 +207,6 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.invalidate_state = brw_invalidate_state; brw->intel.vtbl.note_fence = brw_note_fence; - brw->intel.vtbl.note_unlock = brw_note_unlock; brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 8a3b7df9c7..14e05be4f6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -272,6 +272,9 @@ static void brw_wm_populate_key( struct brw_context *brw, /* _NEW_LIGHT */ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + /* _NEW_HINT */ + key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* _NEW_TEXTURE */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; @@ -351,6 +354,7 @@ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | _NEW_DEPTH | + _NEW_HINT | _NEW_STENCIL | _NEW_POLYGON | _NEW_LINE | diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 295fed851b..ba497432c6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -63,6 +63,7 @@ struct brw_wm_prog_key { GLuint computes_depth:1; /* could be derived from program string */ GLuint source_depth_to_render_target:1; GLuint flat_shade:1; + GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint runtime_check_aads_emit:1; GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ @@ -241,15 +242,20 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + GLuint cur_inst; /**< index of current instruction */ + + GLboolean out_of_regs; /**< ran out of GRF registers? */ + /** Mapping from Mesa registers to hardware registers */ struct { GLboolean inited; struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + GLboolean used_grf[BRW_WM_MAX_GRF]; + GLuint first_free_grf; struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; /**< Index of next free GRF register */ GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 72fc21d2eb..4a8b9f99b7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -353,6 +353,19 @@ static void emit_mad( struct brw_compile *p, } } +static void emit_trunc( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_RNDZ(p, dst[i], arg0[i]); + } + } +} static void emit_lrp( struct brw_compile *p, const struct brw_reg *dst, @@ -1009,7 +1022,7 @@ static void emit_fb_write( struct brw_wm_compile *c, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); @@ -1044,7 +1057,6 @@ static void emit_spill( struct brw_wm_compile *c, */ brw_dp_WRITE_16(p, retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), - 1, slot); } @@ -1072,7 +1084,6 @@ static void emit_unspill( struct brw_wm_compile *c, brw_dp_READ_16(p, retype(vec16(reg), BRW_REGISTER_TYPE_UW), - 1, slot); } @@ -1224,6 +1235,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_dph(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_TRUNC: + emit_trunc(p, dst, dst_flags, args[0]); + break; + case OPCODE_LRP: emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 49aad281d7..b9e8dd2e96 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -354,13 +354,25 @@ static void emit_interp( struct brw_wm_compile *c, src_undef()); } else { - emit_op(c, - WM_LINTERP, - dst, - 0, - interp, - deltas, - src_undef()); + if (c->key.linear_color) { + emit_op(c, + WM_LINTERP, + dst, + 0, + interp, + deltas, + src_undef()); + } + else { + /* perspective-corrected color interpolation */ + emit_op(c, + WM_PINTERP, + dst, + 0, + interp, + deltas, + get_pixel_w(c)); + } } break; case FRAG_ATTRIB_FOGC: diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 094c1af2fe..6e6f2bac7b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1,5 +1,7 @@ #include "main/macros.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_optimize.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" @@ -21,7 +23,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { case OPCODE_IF: - case OPCODE_TRUNC: case OPCODE_ENDIF: case OPCODE_CAL: case OPCODE_BRK: @@ -42,6 +43,83 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) } + +static void +reclaim_temps(struct brw_wm_compile *c); + + +/** Mark GRF register as used. */ +static void +prealloc_grf(struct brw_wm_compile *c, int r) +{ + c->used_grf[r] = GL_TRUE; +} + + +/** Mark given GRF register as not in use. */ +static void +release_grf(struct brw_wm_compile *c, int r) +{ + /*assert(c->used_grf[r]);*/ + c->used_grf[r] = GL_FALSE; + c->first_free_grf = MIN2(c->first_free_grf, r); +} + + +/** Return index of a free GRF, mark it as used. */ +static int +alloc_grf(struct brw_wm_compile *c) +{ + GLuint r; + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + /* no free temps, try to reclaim some */ + reclaim_temps(c); + c->first_free_grf = 0; + + /* try alloc again */ + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + for (r = 0; r < BRW_WM_MAX_GRF; r++) { + assert(c->used_grf[r]); + } + + /* really, no free GRF regs found */ + if (!c->out_of_regs) { + /* print warning once per compilation */ + _mesa_warning(NULL, "i965: ran out of registers for fragment program"); + c->out_of_regs = GL_TRUE; + } + + return -1; +} + + +/** Return number of GRF registers used */ +static int +num_grf_used(const struct brw_wm_compile *c) +{ + int r; + for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) + if (c->used_grf[r]) + return r + 1; + return 0; +} + + + /** * Record the mapping of a Mesa register to a hardware register. */ @@ -68,11 +146,23 @@ static int get_scalar_dst_index(const struct prog_instruction *inst) static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; - if(c->tmp_index == c->tmp_max) - c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; - + + /* if we need to allocate another temp, grow the tmp_regs[] array */ + if (c->tmp_index == c->tmp_max) { + int r = alloc_grf(c); + if (r < 0) { + /*printf("Out of temps in %s\n", __FUNCTION__);*/ + r = 50; /* XXX random register! */ + } + c->tmp_regs[ c->tmp_max++ ] = r; + } + + /* form the GRF register */ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); + /*printf("alloc_temp %d\n", reg.nr);*/ + assert(reg.nr < BRW_WM_MAX_GRF); return reg; + } /** @@ -130,35 +220,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, return brw_null_reg(); } + assert(index < 256); + assert(component < 4); + /* see if we've already allocated a HW register for this Mesa register */ if (c->wm_regs[file][index][component].inited) { - /* yes, re-use */ - reg = c->wm_regs[file][index][component].reg; + /* yes, re-use */ + reg = c->wm_regs[file][index][component].reg; } else { /* no, allocate new register */ - reg = brw_vec8_grf(c->reg_index, 0); - } + int grf = alloc_grf(c); + /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ + if (grf < 0) { + /* totally out of temps */ + grf = 51; /* XXX random register! */ + } - /* if this is a new register allocation, record it in the table */ - if (!c->wm_regs[file][index][component].inited) { - set_reg(c, file, index, component, reg); - c->reg_index++; - } + reg = brw_vec8_grf(grf, 0); + /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - if (c->reg_index >= BRW_WM_MAX_GRF - 12) { - /* ran out of temporary registers! */ -#if 1 - /* This is a big hack for now. - * Return bad register index, just don't hang the GPU. - */ - _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); - c->reg_index = BRW_WM_MAX_GRF - 13; -#else - return brw_null_reg(); -#endif + set_reg(c, file, index, component, reg); } - + if (neg & (1 << component)) { reg = negate(reg); } @@ -168,6 +252,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, } + +/** + * This is called if we run out of GRF registers. Examine the live intervals + * of temp regs in the program and free those which won't be used again. + */ +static void +reclaim_temps(struct brw_wm_compile *c) +{ + GLint intBegin[MAX_PROGRAM_TEMPS]; + GLint intEnd[MAX_PROGRAM_TEMPS]; + int index; + + /*printf("Reclaim temps:\n");*/ + + _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, + intBegin, intEnd); + + for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { + if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { + /* program temp[i] can be freed */ + int component; + /*printf(" temp[%d] is dead\n", index);*/ + for (component = 0; component < 4; component++) { + if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { + int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; + release_grf(c, r); + /* + printf(" Reclaim temp %d, reg %d at inst %d\n", + index, r, c->cur_inst); + */ + c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; + } + } + } + } +} + + + + /** * Preallocate registers. This sets up the Mesa to hardware register * mapping for certain registers, such as constants (uniforms/state vars) @@ -179,6 +303,10 @@ static void prealloc_reg(struct brw_wm_compile *c) struct brw_reg reg; int urb_read_length = 0; GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + GLuint reg_index = 0; + + memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); + c->first_free_grf = 0; for (i = 0; i < 4; i++) { if (i < c->key.nr_depth_regs) @@ -187,14 +315,20 @@ static void prealloc_reg(struct brw_wm_compile *c) reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2 * c->key.nr_depth_regs; + reg_index += 2 * c->key.nr_depth_regs; /* constants */ { - const int nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_temps = c->fp->program.Base.NumTemporaries; /* use a real constant buffer, or just use a section of the GRF? */ - c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ + /* XXX this heuristic may need adjustment... */ + if ((nr_params + nr_temps) * 4 + reg_index > 80) + c->fp->use_const_buffer = GL_TRUE; + else + c->fp->use_const_buffer = GL_FALSE; + /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ if (c->fp->use_const_buffer) { /* We'll use a real constant buffer and fetch constants from @@ -216,7 +350,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (i = 0; i < nr_params; i++) { /* loop over XYZW channels */ for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + reg = brw_vec1_grf(reg_index + index / 8, index % 8); /* Save pointer to parameter/constant value. * Constants will be copied in prepare_constant_buffer() */ @@ -226,7 +360,7 @@ static void prealloc_reg(struct brw_wm_compile *c) } /* number of constant regs used (each reg is float[8]) */ c->nr_creg = 2 * ((4 * nr_params + 15) / 16); - c->reg_index += c->nr_creg; + reg_index += c->nr_creg; } } @@ -242,23 +376,31 @@ static void prealloc_reg(struct brw_wm_compile *c) fp_input = -1; if (fp_input >= 0 && inputs & (1 << fp_input)) { - urb_read_length = c->reg_index; - reg = brw_vec8_grf(c->reg_index, 0); + urb_read_length = reg_index; + reg = brw_vec8_grf(reg_index, 0); for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); } if (c->key.vp_outputs_written & (1 << i)) { - c->reg_index += 2; + reg_index += 2; } } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = urb_read_length; c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index += 2; + + /* mark GRF regs [0..reg_index-1] as in-use */ + for (i = 0; i < reg_index; i++) + prealloc_grf(c, i); + + /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ + prealloc_grf(c, 126); + prealloc_grf(c, 127); /* An instruction may reference up to three constants. * They'll be found in these registers. @@ -267,12 +409,12 @@ static void prealloc_reg(struct brw_wm_compile *c) if (c->fp->use_const_buffer) { for (i = 0; i < 3; i++) { c->current_const[i].index = -1; - c->current_const[i].reg = alloc_tmp(c); + c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); } } #if 0 printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); - printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); + printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); #endif } @@ -294,23 +436,20 @@ static void fetch_constants(struct brw_wm_compile *c, if (src->File == PROGRAM_STATE_VAR || src->File == PROGRAM_CONSTANT || src->File == PROGRAM_UNIFORM) { - if (c->current_const[i].index != src->Index) { - c->current_const[i].index = src->Index; + c->current_const[i].index = src->Index; #if 0 - printf(" fetch const[%d] for arg %d into reg %d\n", - src->Index, i, c->current_const[i].reg.nr); + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, i, c->current_const[i].reg.nr); #endif - /* need to fetch the constant now */ - brw_dp_READ_4(p, - c->current_const[i].reg, /* writeback dest */ - 1, /* msg_reg */ - src->RelAddr, /* relative indexing? */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ - ); - } + /* need to fetch the constant now */ + brw_dp_READ_4(p, + c->current_const[i].reg, /* writeback dest */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ + ); } } } @@ -379,6 +518,14 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, const GLuint nr = 1; const GLuint component = GET_SWZ(src->Swizzle, channel); + /* Extended swizzle terms */ + if (component == SWIZZLE_ZERO) { + return brw_imm_f(0.0F); + } + else if (component == SWIZZLE_ONE) { + return brw_imm_f(1.0F); + } + if (c->fp->use_const_buffer && (src->File == PROGRAM_STATE_VAR || src->File == PROGRAM_CONSTANT || @@ -676,27 +823,26 @@ static void emit_fb_write(struct brw_wm_compile *c, } if (c->key.dest_depth_reg) { - GLuint comp = c->key.dest_depth_reg / 2; - GLuint off = c->key.dest_depth_reg % 2; + const GLuint comp = c->key.dest_depth_reg / 2; + const GLuint off = c->key.dest_depth_reg % 2; - assert(comp == 1); - assert(off == 0); -#if 0 - /* XXX do we need this code? comp always 1, off always 0, it seems */ if (off != 0) { + /* XXX this code needs review/testing */ + struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); + struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); + brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_MOV(p, brw_message_reg(nr+1), arg1_1); brw_pop_insn_state(p); } else -#endif { - struct brw_reg src = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src); + struct brw_reg src = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src); } nr += 2; } @@ -2598,15 +2744,15 @@ static void post_wm_emit( struct brw_wm_compile *c ) static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 - struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH]; - struct brw_instruction *inst0, *inst1; - int i, if_insn = 0, loop_insn = 0; + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + GLuint i, if_depth = 0, loop_depth = 0; struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); - c->reg_index = 0; + c->out_of_regs = GL_FALSE; + prealloc_reg(c); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); @@ -2614,6 +2760,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { const struct prog_instruction *inst = &c->prog_instructions[i]; + c->cur_inst = i; + #if 0 _mesa_printf("Inst %d: ", i); _mesa_print_instruction(inst); @@ -2773,15 +2921,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_kil(c); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; case OPCODE_BGNSUB: brw_save_label(p, inst->Comment, p->nr_insn); @@ -2815,7 +2963,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) break; case OPCODE_BGNLOOP: /* XXX may need to invalidate the current_constant regs */ - loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: brw_BREAK(p); @@ -2826,36 +2974,35 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: - loop_insn--; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]); - /* patch all the BREAK instructions from - last BEGINLOOP */ - while (inst0 > loop_inst[loop_insn]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { + { + struct brw_instruction *inst0, *inst1; + loop_depth--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; inst0->bits3.if_else.pop_count = 0; - } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { inst0->bits3.if_else.jump_count = inst1 - inst0; inst0->bits3.if_else.pop_count = 0; - } - } - break; + } + } + } + break; default: _mesa_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } + if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); - - if (c->reg_index >= BRW_WM_MAX_GRF) { - _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()"); - /* XXX we need to do some proper error recovery here */ - } } @@ -2879,6 +3026,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_print_program(c, "brw_wm_glsl_emit done"); } - c->prog_data.total_grf = c->reg_index; + c->prog_data.total_grf = num_grf_used(c); c->prog_data.total_scratch = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index bd60ac9b31..8fd067abe7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -116,6 +116,10 @@ const struct { { C, 0, 1, 1, 1 } }; +/** + * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES + * \param lookup bitmask of IZ_* flags + */ void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, struct brw_wm_prog_key *key ) diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index ab9aa2f10d..3436a24717 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -159,6 +159,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_FRC: case OPCODE_MOV: case OPCODE_SWZ: + case OPCODE_TRUNC: read0 = writemask; break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 805df8a4af..096f74394e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -176,22 +176,6 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, } } - -/** - * Use same key for WM and VS surfaces. - */ -struct brw_surface_key { - GLenum target, depthmode; - dri_bo *bo; - GLint format, internal_format; - GLint first_level, last_level; - GLint width, height, depth; - GLint pitch, cpp; - uint32_t tiling; - GLuint offset; -}; - - static void brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) { @@ -268,7 +252,7 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.cube_neg_z = 1; } - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -321,10 +305,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.tiling = intelObj->mt->region->tiling; dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); } @@ -336,7 +321,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ -static dri_bo * +dri_bo * brw_create_constant_surface( struct brw_context *brw, struct brw_surface_key *key ) { @@ -362,7 +347,7 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -380,39 +365,70 @@ brw_create_constant_surface( struct brw_context *brw, return bo; } +/* Creates a new WM constant buffer reflecting the current fragment program's + * constants, if needed by the fragment program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_wm_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = fp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (!fp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} /** * Update the surface state for a WM constant buffer. * The constant buffer will be (re)allocated here if needed. */ -static dri_bo * +static void brw_update_wm_constant_surface( GLcontext *ctx, - GLuint surf, - dri_bo *const_buffer, - const struct gl_program_parameter_list *params) + GLuint surf) { struct brw_context *brw = brw_context(ctx); struct brw_surface_key key; - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = + fp->program.Base.Parameters; - /* free old const buffer if too small */ - if (const_buffer && const_buffer->size < size) { - dri_bo_unreference(const_buffer); - const_buffer = NULL; - } + /* If we're in this state update atom, we need to update WM constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); - /* alloc new buffer if needed */ - if (!const_buffer) { - const_buffer = - drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64); + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + return; } memset(&key, 0, sizeof(key)); key.format = MESA_FORMAT_RGBA_FLOAT32; key.internal_format = GL_RGBA; - key.bo = const_buffer; + key.bo = fp->const_buffer; key.depthmode = GL_NONE; key.pitch = params->NumParameters; key.width = params->NumParameters; @@ -427,77 +443,59 @@ brw_update_wm_constant_surface( GLcontext *ctx, */ dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), &key.bo, key.bo ? 1 : 0, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); } - - return const_buffer; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } - /** - * Update the surface state for a VS constant buffer. - * The constant buffer will be (re)allocated here if needed. + * Updates surface / buffer for fragment shader constant buffer, if + * one is required. + * + * This consumes the state updates for the constant buffer, and produces + * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for + * inclusion in the binding table. */ -static dri_bo * -brw_update_vs_constant_surface( GLcontext *ctx, - GLuint surf, - dri_bo *const_buffer, - const struct gl_program_parameter_list *params) +static void prepare_wm_constant_surface(struct brw_context *brw ) { - struct brw_context *brw = brw_context(ctx); - struct brw_surface_key key; - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - assert(surf == 0); - - /* free old const buffer if too small */ - if (const_buffer && const_buffer->size < size) { - dri_bo_unreference(const_buffer); - const_buffer = NULL; - } - - /* alloc new buffer if needed */ - if (!const_buffer) { - const_buffer = - drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); - } - - memset(&key, 0, sizeof(key)); - - key.format = MESA_FORMAT_RGBA_FLOAT32; - key.internal_format = GL_RGBA; - key.bo = const_buffer; - key.depthmode = GL_NONE; - key.pitch = params->NumParameters; - key.width = params->NumParameters; - key.height = 1; - key.depth = 1; - key.cpp = 16; + GLcontext *ctx = &brw->intel.ctx; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - /* - printf("%s:\n", __FUNCTION__); - printf(" width %d height %d depth %d cpp %d pitch %d\n", - key.width, key.height, key.depth, key.cpp, key.pitch); - */ + drm_intel_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); - dri_bo_unreference(brw->vs.surf_bo[surf]); - brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->vs.surf_bo[surf] == NULL) { - brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + if (brw->wm.surf_bo[surf] != NULL) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + } + return; } - return const_buffer; + brw_update_wm_constant_surface(ctx, surf); } +const struct brw_tracked_state brw_wm_constant_surface = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .cache = 0 + }, + .prepare = prepare_wm_constant_surface, +}; + /** * Sets up a surface state structure to point at the given region. @@ -507,7 +505,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, static void brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, - unsigned int unit, GLboolean cached) + unsigned int unit) { GLcontext *ctx = &brw->intel.ctx; dri_bo *region_bo = NULL; @@ -567,12 +565,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw, ctx->Color.BlendEnabled); dri_bo_unreference(brw->wm.surf_bo[unit]); - brw->wm.surf_bo[unit] = NULL; - if (cached) - brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - NULL); + brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); if (brw->wm.surf_bo[unit] == NULL) { struct brw_surface_state surf; @@ -581,7 +578,27 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.surface_format = key.surface_format; surf.ss0.surface_type = key.surface_type; - surf.ss1.base_addr = key.draw_offset; + if (key.tiling == I915_TILING_NONE) { + surf.ss1.base_addr = key.draw_offset; + } else { + uint32_t tile_offset = key.draw_offset % 4096; + + surf.ss1.base_addr = key.draw_offset - tile_offset; + + assert(BRW_IS_G4X(brw) || tile_offset == 0); + if (BRW_IS_G4X(brw)) { + if (key.tiling == I915_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 512 / 2; + } else { + surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 128 / 2; + } + } + } if (region_bo != NULL) surf.ss1.base_addr += region_bo->offset; /* reloc */ @@ -598,7 +615,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.writedisable_alpha = !key.color_mask[3]; /* Key size will never match key size for textures, so we're safe. */ - brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), @@ -611,7 +629,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], offsetof(struct brw_surface_state, ss1), region_bo, - key.draw_offset, + surf.ss1.base_addr, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } @@ -630,7 +648,7 @@ brw_wm_get_binding_table(struct brw_context *brw) assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, NULL); @@ -646,7 +664,7 @@ brw_wm_get_binding_table(struct brw_context *brw) else data[i] = 0; - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, data, data_size, @@ -682,27 +700,17 @@ static void prepare_wm_surfaces(struct brw_context *brw ) for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { brw_update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i], - i, - GL_FALSE); + i); } } else { - brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE); + brw_update_renderbuffer_surface(brw, NULL, 0); } old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; - /* Update surface / buffer for fragment shader constant buffer */ - { - const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - fp->const_buffer = - brw_update_wm_constant_surface(ctx, surf, fp->const_buffer, - fp->program.Base.Parameters); - - brw->wm.nr_surfaces = surf + 1; - } + if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) + brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { @@ -735,100 +743,16 @@ static void prepare_wm_surfaces(struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; } - -/** - * Constructs the binding table for the VS surface state. - */ -static dri_bo * -brw_vs_get_binding_table(struct brw_context *brw) -{ - dri_bo *bind_bo; - - assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); - - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, brw->vs.nr_surfaces, - NULL); - - if (bind_bo == NULL) { - GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint); - uint32_t *data = malloc(data_size); - int i; - - for (i = 0; i < brw->vs.nr_surfaces; i++) - if (brw->vs.surf_bo[i]) - data[i] = brw->vs.surf_bo[i]->offset; - else - data[i] = 0; - - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, brw->vs.nr_surfaces, - data, data_size, - NULL, NULL); - - /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_VS_MAX_SURF; i++) { - if (brw->vs.surf_bo[i] != NULL) { - dri_bo_emit_reloc(bind_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(GLuint), - brw->vs.surf_bo[i]); - } - } - - free(data); - } - - return bind_bo; -} - - -/** - * Vertex shader surfaces. Just constant buffer for now. Could add vertex - * shader textures in the future. - */ -static void prepare_vs_surfaces(struct brw_context *brw ) -{ - GLcontext *ctx = &brw->intel.ctx; - - /* Update surface / buffer for vertex shader constant buffer */ - { - const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER; - struct brw_vertex_program *vp = - (struct brw_vertex_program *) brw->vertex_program; - vp->const_buffer = - brw_update_vs_constant_surface(ctx, surf, vp->const_buffer, - vp->program.Base.Parameters); - - brw->vs.nr_surfaces = 1; - } - - dri_bo_unreference(brw->vs.bind_bo); - brw->vs.bind_bo = brw_vs_get_binding_table(brw); - - if (1) - brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; -} - - -static void -prepare_surfaces(struct brw_context *brw) -{ - prepare_wm_surfaces(brw); - prepare_vs_surfaces(brw); -} - - const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, - .brw = BRW_NEW_CONTEXT, + .mesa = (_NEW_COLOR | + _NEW_TEXTURE | + _NEW_BUFFERS), + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_WM_SURFACES), .cache = 0 }, - .prepare = prepare_surfaces, + .prepare = prepare_wm_surfaces, }; diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c new file mode 120000 index 0000000000..4c6b37ada0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_generatemipmap.c @@ -0,0 +1 @@ +../intel/intel_generatemipmap.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c new file mode 120000 index 0000000000..cc4589f4d4 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -0,0 +1 @@ +../intel/intel_pixel_read.c
\ No newline at end of file |