diff options
author | Alan Hourihane <alanh@tungstengraphics.com> | 2008-11-01 22:57:26 +0000 |
---|---|---|
committer | Alan Hourihane <alanh@tungstengraphics.com> | 2008-11-01 22:57:26 +0000 |
commit | 81c862205e32b163a9f5ecf3f59e4cdcccee36c6 (patch) | |
tree | 50a5f45f1a0823758e72968d1f1dfd6e0d1098e0 /src/mesa/drivers/dri/i965 | |
parent | 14e1505cce24ee294cb98683504cc4537c20f34a (diff) | |
parent | bbffed0857634912c7a1f13882eba303ae2bf4e1 (diff) |
Merge commit 'origin/master' into gallium-0.2
Conflicts:
src/mesa/shader/slang/library/slang_vertex_builtin_gc.h
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 22 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_curbe.c | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_draw.c | 55 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_draw_upload.c | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_misc_state.c | 86 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_queryobj.c | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 19 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_batch.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 47 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vtbl.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.h | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_glsl.c | 412 |
15 files changed, 589 insertions, 108 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 474158b484..e2bc08a6cb 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -39,6 +39,7 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_draw.h" +#include "brw_state.h" #include "brw_vs.h" #include "intel_tex.h" #include "intel_blit.h" diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1c6a0dede0..e3904be977 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -433,7 +433,6 @@ struct brw_context GLuint primitive; GLboolean emit_state_always; - GLboolean wrap; GLboolean tmp_fallback; GLboolean no_batch_wrap; @@ -445,6 +444,19 @@ struct brw_context GLuint nr_draw_regions; struct intel_region *draw_regions[MAX_DRAW_BUFFERS]; struct intel_region *depth_region; + + /** + * List of buffers accumulated in brw_validate_state to receive + * dri_bo_check_aperture treatment before exec, so we can know if we + * should flush the batch and try again before emitting primitives. + * + * This can be a fixed number as we only have a limited number of + * objects referenced from the batchbuffer in a primitive emit, + * consisting of the vertex buffers, pipelined state pointers, + * the CURBE, the depth buffer, and a query BO. + */ + dri_bo *validated_bos[VERT_ATTRIB_MAX + 16]; + int validated_bo_count; } state; struct brw_state_pointers attribs; @@ -680,14 +692,6 @@ void brw_emit_query_begin(struct brw_context *brw); void brw_emit_query_end(struct brw_context *brw); /*====================================================================== - * brw_state.c - */ -void brw_validate_state( struct brw_context *brw ); -void brw_init_state( struct brw_context *brw ); -void brw_destroy_state( struct brw_context *brw ); - - -/*====================================================================== * brw_state_dump.c */ void brw_debug_batch(struct intel_context *intel); diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 6ffa221d66..c7bac7b0c5 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -307,6 +307,7 @@ static void prepare_constant_buffer(struct brw_context *brw) dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf); } + brw_add_validated_bo(brw, brw->curbe.curbe_bo); /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the @@ -328,15 +329,6 @@ static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - dri_bo *aper_array[] = { - brw->intel.batch->buf, - brw->curbe.curbe_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) { - intel_batchbuffer_flush(intel->batch); - return; - } BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 6c71b4abcf..d87b8f8a84 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -256,6 +256,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; + GLboolean warn = GL_FALSE; GLuint i; if (ctx->NewState) @@ -282,30 +283,25 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, LOCK_HARDWARE(intel); - if (brw->intel.numClipRects == 0) { + if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) { UNLOCK_HARDWARE(intel); return GL_TRUE; } + /* Flush the batch if it's approaching full, so that we don't wrap while + * we've got validated state that needs to be in the same batch as the + * primitives. This fraction is just a guess (minimal full state plus + * a primitive is around 512 bytes), and would be better if we had + * an upper bound of how much we might emit in a single + * brw_try_draw_prims(). + */ + intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, + LOOP_CLIPRECTS); { - /* Flush the batch if it's approaching full, so that we don't wrap while - * we've got validated state that needs to be in the same batch as the - * primitives. This fraction is just a guess (minimal full state plus - * a primitive is around 512 bytes), and would be better if we had - * an upper bound of how much we might emit in a single - * brw_try_draw_prims(). - */ - if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4 - /* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */ - || intel->batch->cliprect_mode != LOOP_CLIPRECTS) - intel_batchbuffer_flush(intel->batch); - /* Set the first primitive early, ahead of validate_state: */ brw_set_prim(brw, prim[0].mode); - /* XXX: Need to separate validate and upload of state. - */ brw_validate_state( brw ); /* Various fallback checks: @@ -316,6 +312,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (check_fallbacks( brw, prim, nr_prims )) goto out; + /* Check that we can fit our state in with our existing batchbuffer, or + * flush otherwise. + */ + if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count)) { + static GLboolean warned; + intel_batchbuffer_flush(intel->batch); + + /* Validate the state after we flushed the batch (which would have + * changed the set of dirty state). If we still fail to + * check_aperture, warn of what's happening, but attempt to continue + * on since it may succeed anyway, and the user would probably rather + * see a failure and a warning than a fallback. + */ + brw_validate_state(brw); + if (!warned && + dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count)) { + warn = GL_TRUE; + warned = GL_TRUE; + } + } + + brw_upload_state(brw); + for (i = 0; i < nr_prims; i++) { brw_emit_prim(brw, &prim[i]); } @@ -326,6 +347,10 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, out: UNLOCK_HARDWARE(intel); + if (warn) + fprintf(stderr, "i965: Single primitive emit potentially exceeded " + "available aperture space\n"); + if (!retval) DBG("%s failed\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 7b88b5eaa1..4080c5e322 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -250,10 +250,10 @@ static void get_space( struct brw_context *brw, wrap_buffers(brw, size); } + assert(*bo_return == NULL); dri_bo_reference(brw->vb.upload.bo); *bo_return = brw->vb.upload.bo; *offset_return = brw->vb.upload.offset; - brw->vb.upload.offset += size; } @@ -359,6 +359,14 @@ static void brw_prepare_vertices(struct brw_context *brw) input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; } else { + if (input->bo != NULL) { + /* Already-uploaded vertex data is present from a previous + * prepare_vertices, but we had to re-validate state due to + * check_aperture failing and a new batch being produced. + */ + continue; + } + /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ @@ -417,6 +425,12 @@ static void brw_prepare_vertices(struct brw_context *brw) } brw_prepare_query_begin(brw); + + for (i = 0; i < nr_enabled; i++) { + struct brw_vertex_element *input = enabled[i]; + + brw_add_validated_bo(brw, input->bo); + } } static void brw_emit_vertices(struct brw_context *brw) @@ -512,7 +526,7 @@ static void brw_prepare_indices(struct brw_context *brw) struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; GLuint ib_size; - dri_bo *bo; + dri_bo *bo = NULL; struct gl_buffer_object *bufferobj; GLuint offset; @@ -561,6 +575,8 @@ static void brw_prepare_indices(struct brw_context *brw) dri_bo_unreference(brw->ib.bo); brw->ib.bo = bo; brw->ib.offset = offset; + + brw_add_validated_bo(brw, brw->ib.bo); } static void brw_emit_indices(struct brw_context *brw) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 207b8b7ca3..8cbe4215fb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -65,7 +65,7 @@ struct brw_reg GLuint abs:1; /* source only */ GLuint vstride:4; /* source only */ GLuint width:3; /* src only, align1 only */ - GLuint hstride:2; /* src only, align1 only */ + GLuint hstride:2; /* align1 only */ GLuint address_mode:1; /* relative addressing, hopefully! */ GLuint pad0:1; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 0bfbec9d14..58d97465d1 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -64,7 +64,9 @@ static void brw_set_dest( struct brw_instruction *insn, if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits1.da1.dest_subreg_nr = dest.subnr; - insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; } else { insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; @@ -78,7 +80,9 @@ static void brw_set_dest( struct brw_instruction *insn, */ if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; - insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; } else { insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index afa8694ebb..5bba8c84ec 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -71,6 +71,38 @@ const struct brw_tracked_state brw_blend_constant_color = { .emit = upload_blend_constant_color }; +/* Constant single cliprect for framebuffer object or DRI2 drawing */ +static void upload_drawing_rect(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + + if (!intel->constant_cliprect) + return; + + BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); + OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); + OUT_BATCH(0); /* xmin, ymin */ + OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | + ((ctx->DrawBuffer->Height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +const struct brw_tracked_state brw_drawing_rect = { + .dirty = { + .mesa = _NEW_BUFFERS, + .brw = 0, + .cache = 0 + }, + .emit = upload_drawing_rect +}; + +static void prepare_binding_table_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->wm.bind_bo); +} + /** * Upload the binding table pointers, which point each stage's array of surface * state pointers. @@ -81,15 +113,6 @@ const struct brw_tracked_state brw_blend_constant_color = { static void upload_binding_table_pointers(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - dri_bo *aper_array[] = { - intel->batch->buf, - brw->wm.bind_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) { - intel_batchbuffer_flush(intel->batch); - return; - } BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); @@ -109,6 +132,7 @@ const struct brw_tracked_state brw_binding_table_pointers = { .brw = BRW_NEW_BATCH, .cache = CACHE_NEW_SURF_BIND, }, + .prepare = prepare_binding_table_pointers, .emit = upload_binding_table_pointers, }; @@ -142,23 +166,18 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_PSP; } -static void upload_psp_urb_cbs(struct brw_context *brw ) + +static void prepare_psp_urb_cbs(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; - dri_bo *aper_array[] = { - intel->batch->buf, - brw->vs.state_bo, - brw->gs.state_bo, - brw->clip.state_bo, - brw->wm.state_bo, - brw->cc.state_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) { - intel_batchbuffer_flush(intel->batch); - return; - } + brw_add_validated_bo(brw, brw->vs.state_bo); + brw_add_validated_bo(brw, brw->gs.state_bo); + brw_add_validated_bo(brw, brw->clip.state_bo); + brw_add_validated_bo(brw, brw->wm.state_bo); + brw_add_validated_bo(brw, brw->cc.state_bo); +} +static void upload_psp_urb_cbs(struct brw_context *brw ) +{ upload_pipelined_state_pointers(brw); brw_upload_urb_fence(brw); brw_upload_constant_buffer_state(brw); @@ -176,9 +195,18 @@ const struct brw_tracked_state brw_psp_urb_cbs = { CACHE_NEW_WM_UNIT | CACHE_NEW_CC_UNIT) }, + .prepare = prepare_psp_urb_cbs, .emit = upload_psp_urb_cbs, }; +static void prepare_depthbuffer(struct brw_context *brw) +{ + struct intel_region *region = brw->state.depth_region; + + if (region != NULL) + brw_add_validated_bo(brw, region->buffer); +} + static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; @@ -200,10 +228,6 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } else { unsigned int format; - dri_bo *aper_array[] = { - intel->batch->buf, - region->buffer - }; switch (region->cpp) { case 2: @@ -220,11 +244,6 @@ static void emit_depthbuffer(struct brw_context *brw) return; } - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) { - intel_batchbuffer_flush(intel->batch); - return; - } - BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | @@ -253,6 +272,7 @@ const struct brw_tracked_state brw_depthbuffer = { .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH, .cache = 0, }, + .prepare = prepare_depthbuffer, .emit = emit_depthbuffer, }; diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index a1a1353dee..cb9169e2ee 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -42,6 +42,7 @@ #include "main/imports.h" #include "brw_context.h" +#include "brw_state.h" #include "intel_batchbuffer.h" #include "intel_reg.h" @@ -163,10 +164,6 @@ void brw_prepare_query_begin(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - dri_bo *aper_array[] = { - intel->batch->buf, - brw->query.bo, - }; /* Skip if we're not doing any queries. */ if (is_empty_list(&brw->query.active_head)) @@ -182,8 +179,7 @@ brw_prepare_query_begin(struct brw_context *brw) brw->query.index = 0; } - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) - intel_batchbuffer_flush(intel->batch); + brw_add_validated_bo(brw, brw->query.bo); } /** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 4c04036ef0..bb22c03eeb 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -35,6 +35,16 @@ #include "brw_context.h" +static inline void +brw_add_validated_bo(struct brw_context *brw, dri_bo *bo) +{ + assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos)); + + if (bo != NULL) { + dri_bo_reference(bo); + brw->state.validated_bos[brw->state.validated_bo_count++] = bo; + } +}; const struct brw_tracked_state brw_blend_constant_color; const struct brw_tracked_state brw_cc_unit; @@ -79,10 +89,19 @@ const struct brw_tracked_state brw_pipe_control; const struct brw_tracked_state brw_clear_surface_cache; const struct brw_tracked_state brw_clear_batch_cache; +const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; /*********************************************************************** + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_upload_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + +/*********************************************************************** * brw_state_cache.c */ dri_bo *brw_cache_data(struct brw_cache *cache, diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 94ef924868..dc87859f3f 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw ) { clear_batch_cache(brw); - brw->wrap = 0; - /* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ brw->state.dirty.mesa |= ~0; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index b6a52843a8..16b0496f47 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -99,6 +99,7 @@ const struct brw_tracked_state *atoms[] = &brw_psp_urb_cbs, #endif + &brw_drawing_rect, &brw_indices, &brw_vertices, @@ -168,6 +169,18 @@ static void xor_states( struct brw_state_flags *result, result->cache = a->cache ^ b->cache; } +static void +brw_clear_validated_bos(struct brw_context *brw) +{ + int i; + + /* Clear the last round of validated bos */ + for (i = 0; i < brw->state.validated_bo_count; i++) { + dri_bo_unreference(brw->state.validated_bos[i]); + brw->state.validated_bos[i] = NULL; + } + brw->state.validated_bo_count = 0; +} /*********************************************************************** * Emit all state: @@ -176,14 +189,14 @@ void brw_validate_state( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; - GLuint i, count, pass = 0; - dri_bo *last_batch_bo = NULL; + GLuint i; + + brw_clear_validated_bos(brw); state->mesa |= brw->intel.NewGLState; brw->intel.NewGLState = 0; - if (brw->wrap) - state->brw |= BRW_NEW_CONTEXT; + brw_add_validated_bo(brw, intel->batch->buf); if (brw->emit_state_always) { state->mesa |= ~0; @@ -210,8 +223,6 @@ void brw_validate_state( struct brw_context *brw ) brw->intel.Fallback = 0; - count = 0; - /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = brw->state.atoms[i]; @@ -225,19 +236,15 @@ void brw_validate_state( struct brw_context *brw ) } } } +} - if (brw->intel.Fallback) - return; - /* We're about to try to set up a coherent state in the batchbuffer for - * the emission of primitives. If we exceed the aperture size in any of the - * emit() calls, we need to go back to square 1 and try setting up again. - */ -got_flushed: - dri_bo_unreference(last_batch_bo); - last_batch_bo = intel->batch->buf; - dri_bo_reference(last_batch_bo); - assert(pass++ <= 2); +void brw_upload_state(struct brw_context *brw) +{ + struct brw_state_flags *state = &brw->state.dirty; + int i; + + brw_clear_validated_bos(brw); if (INTEL_DEBUG) { /* Debug version which enforces various sanity checks on the @@ -262,8 +269,6 @@ got_flushed: if (check_state(state, &atom->dirty)) { if (atom->emit) { atom->emit( brw ); - if (intel->batch->buf != last_batch_bo) - goto got_flushed; } } @@ -288,15 +293,11 @@ got_flushed: if (check_state(state, &atom->dirty)) { if (atom->emit) { atom->emit( brw ); - if (intel->batch->buf != last_batch_bo) - goto got_flushed; } } } } - dri_bo_unreference(last_batch_bo); - if (!brw->intel.Fallback) memset(state, 0, sizeof(*state)); } diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index a64e437860..2d4c81274e 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -62,7 +62,6 @@ dri_bo_release(dri_bo **bo) */ static void brw_destroy_context( struct intel_context *intel ) { - GLcontext *ctx = &intel->ctx; struct brw_context *brw = brw_context(&intel->ctx); int i; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 297617ee2d..896390c17b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -157,6 +157,7 @@ struct brw_wm_instruction { #define BRW_WM_MAX_PARAM 256 #define BRW_WM_MAX_CONST 256 #define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS +#define BRW_WM_MAX_SUBROUTINE 16 @@ -246,7 +247,10 @@ struct brw_wm_compile { struct brw_reg stack; struct brw_reg emit_mask_reg; GLuint reg_index; + GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; + GLuint tmp_max; + GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 4d5e11f4b6..0ea8c3d50e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -4,6 +4,10 @@ #include "brw_eu.h" #include "brw_wm.h" +enum _subroutine { + SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 +}; + /* Only guess, need a flag in gl_fragment_program later */ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { @@ -19,6 +23,10 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) case OPCODE_RET: case OPCODE_DDX: case OPCODE_DDY: + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: case OPCODE_BGNLOOP: return GL_TRUE; default: @@ -47,13 +55,26 @@ static int get_scalar_dst_index(struct prog_instruction *inst) static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; - reg = brw_vec8_grf(c->tmp_index--, 0); + if(c->tmp_index == c->tmp_max) + c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; + + reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); return reg; } -static void release_tmps(struct brw_wm_compile *c) +static int mark_tmps(struct brw_wm_compile *c) +{ + return c->tmp_index; +} + +static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index ) +{ + return brw_vec8_grf( c->tmp_regs[ index ], 0 ); +} + +static void release_tmps(struct brw_wm_compile *c, int mark) { - c->tmp_index = 127; + c->tmp_index = mark; } static struct brw_reg @@ -155,6 +176,68 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, src->NegateBase, src->Abs); } +/* Subroutines are minimal support for resusable instruction sequences. + They are implemented as simply as possible to minimise overhead: there + is no explicit support for communication between the caller and callee + other than saving the return address in a temporary register, nor is + there any automatic local storage. This implies that great care is + required before attempting reentrancy or any kind of nested + subroutine invocations. */ +static void invoke_subroutine( struct brw_wm_compile *c, + enum _subroutine subroutine, + void (*emit)( struct brw_wm_compile * ) ) +{ + struct brw_compile *p = &c->func; + + assert( subroutine < BRW_WM_MAX_SUBROUTINE ); + + if( c->subroutines[ subroutine ] ) { + /* subroutine previously emitted: reuse existing instructions */ + + int mark = mark_tmps( c ); + struct brw_reg return_address = retype( alloc_tmp( c ), + BRW_REGISTER_TYPE_UD ); + int here = p->nr_insn; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) ); + + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), + brw_imm_d( ( c->subroutines[ subroutine ] - + here - 1 ) << 4 ) ); + brw_pop_insn_state(p); + + release_tmps( c, mark ); + } else { + /* previously unused subroutine: emit, and mark for later reuse */ + + int mark = mark_tmps( c ); + struct brw_reg return_address = retype( alloc_tmp( c ), + BRW_REGISTER_TYPE_UD ); + struct brw_instruction *calc; + int base = p->nr_insn; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) ); + brw_pop_insn_state(p); + + c->subroutines[ subroutine ] = p->nr_insn; + + emit( c ); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV( p, brw_ip_reg(), return_address ); + brw_pop_insn_state(p); + + brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) ); + + release_tmps( c, mark ); + } +} + static void emit_abs( struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -778,6 +861,7 @@ static void emit_lrp(struct brw_wm_compile *c, GLuint mask = inst->DstReg.WriteMask; struct brw_reg dst, tmp1, tmp2, src0, src1, src2; int i; + int mark = mark_tmps(c); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { dst = get_dst_reg(c, inst, i, 1); @@ -804,7 +888,7 @@ static void emit_lrp(struct brw_wm_compile *c, brw_MAC(p, dst, src0, tmp1); brw_set_saturate(p, 0); } - release_tmps(c); + release_tmps(c, mark); } } @@ -957,6 +1041,316 @@ static void emit_ddy(struct brw_wm_compile *c, brw_set_saturate(p, 0); } +static __inline struct brw_reg high_words( struct brw_reg reg ) +{ + return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), + 0, 8, 2 ); +} + +static __inline struct brw_reg low_words( struct brw_reg reg ) +{ + return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); +} + +/* One- and two-dimensional Perlin noise, similar to the description in + _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */ +static void noise1_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param, + x0, x1, /* gradients at each end */ + t, tmp[ 2 ], /* float temporaries */ + itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */ + int i; + int mark = mark_tmps( c ); + + x0 = alloc_tmp( c ); + x1 = alloc_tmp( c ); + t = alloc_tmp( c ); + tmp[ 0 ] = alloc_tmp( c ); + tmp[ 1 ] = alloc_tmp( c ); + itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD ); + itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD ); + itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD ); + itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD ); + itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD ); + + param = lookup_tmp( c, mark - 2 ); + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ + + /* Arrange the two end coordinates into scalars (itmp0/itmp1) to + be hashed. Also compute the remainder (offset within the unit + length), interleaved to reduce register dependency penalties. */ + brw_RNDD( p, itmp[ 0 ], param ); + brw_FRC( p, param, param ); + brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) ); + brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ + brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ + + /* We're now ready to perform the hashing. The two hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 32x16 + bit multiplication, and 16-bit swizzles (which we get for + free). We can't use immediate operands in the multiplies, + because immediates are permitted only in src1 and the 16-bit + factor is permitted only in src0. */ + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + + /* Now we want to initialise the two gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 31 ), but + we correct for that right at the end. */ + brw_ADD( p, t, param, brw_imm_f( -1.0 ) ); + brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) ); + brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) ); + + brw_MUL( p, x0, x0, param ); + brw_MUL( p, x1, x1, t ); + + /* We interpolate between the gradients using the polynomial + 6t^5 - 15t^4 + 10t^3 (Perlin). */ + brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the + pipeline */ + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_MUL( p, param, tmp[ 0 ], param ); + brw_MUL( p, x1, x1, param ); + brw_ADD( p, x0, x0, x1 ); + /* scale by pow( 2, -30 ), to compensate for the format conversion + above and an extra factor of 2 so that a single gradient covers + the [-1,1] range */ + brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise1( struct brw_wm_compile *c, + struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src, param, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src = get_src_reg( c, inst->SrcReg, 0, 1 ); + + param = alloc_tmp( c ); + + brw_MOV( p, param, src ); + + invoke_subroutine( c, SUB_NOISE1, noise1_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV( p, dst, param ); + } + } + if( inst->SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + +static void noise2_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param0, param1, + x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */ + t, tmp[ 4 ], /* float temporaries */ + itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */ + int i; + int mark = mark_tmps( c ); + + x0y0 = alloc_tmp( c ); + x0y1 = alloc_tmp( c ); + x1y0 = alloc_tmp( c ); + x1y1 = alloc_tmp( c ); + t = alloc_tmp( c ); + for( i = 0; i < 4; i++ ) { + tmp[ i ] = alloc_tmp( c ); + itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); + } + itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD ); + itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD ); + itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD ); + + param0 = lookup_tmp( c, mark - 3 ); + param1 = lookup_tmp( c, mark - 2 ); + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to + be hashed. Also compute the remainders (offsets within the unit + square), interleaved to reduce register dependency penalties. */ + brw_RNDD( p, itmp[ 0 ], param0 ); + brw_RNDD( p, itmp[ 1 ], param1 ); + brw_FRC( p, param0, param0 ); + brw_FRC( p, param1, param1 ); + brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ + brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ), + low_words( itmp[ 1 ] ) ); + brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ + brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ + brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) ); + brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) ); + brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) ); + + /* We're now ready to perform the hashing. The four hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 32x16 + bit multiplication, and 16-bit swizzles (which we get for + free). We can't use immediate operands in the multiplies, + because immediates are permitted only in src1 and the 16-bit + factor is permitted only in src0. */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + + /* Now we want to initialise the four gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 15 ), but + we correct for that right at the end. */ + brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); + brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); + brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) ); + brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) ); + + brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param0 ); + brw_MUL( p, x0y1, x0y1, param0 ); + + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 ); + brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t ); + brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t ); + + brw_ADD( p, x0y0, x0y0, tmp[ 0 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 2 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 1 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 3 ] ); + + /* We interpolate between the gradients using the polynomial + 6t^5 - 15t^4 + 10t^3 (Perlin). */ + brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) ); + brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the + pipeline */ + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the + pipeline */ + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_MUL( p, param0, tmp[ 0 ], param0 ); + brw_MUL( p, param1, tmp[ 1 ], param1 ); + + /* Here we interpolate in the y dimension... */ + brw_MUL( p, x0y1, x0y1, param1 ); + brw_MUL( p, x1y1, x1y1, param1 ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. There are horrible register dependencies here, + but we have nothing else to do. */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, param0 ); + brw_ADD( p, x0y0, x0y0, x1y0 ); + + /* scale by pow( 2, -15 ), as described above */ + brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise2( struct brw_wm_compile *c, + struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, param0, param1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); + src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + + param0 = alloc_tmp( c ); + param1 = alloc_tmp( c ); + + brw_MOV( p, param0, src0 ); + brw_MOV( p, param1, src1 ); + + invoke_subroutine( c, SUB_NOISE2, noise2_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV( p, dst, param0 ); + } + } + if( inst->SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + static void emit_wpos_xy(struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -1276,6 +1670,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_MAD: emit_mad(c, inst); break; + case OPCODE_NOISE1: + emit_noise1(c, inst); + break; + case OPCODE_NOISE2: + emit_noise2(c, inst); + break; + /* case OPCODE_NOISE3: */ + /* case OPCODE_NOISE4: */ + /* not yet implemented */ case OPCODE_TEX: emit_tex(c, inst); break; @@ -1368,7 +1771,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { brw_wm_pass_fp(c); - c->tmp_index = 127; brw_wm_emit_glsl(brw, c); c->prog_data.total_grf = c->reg_index; c->prog_data.total_scratch = 0; |