diff options
author | Chia-I Wu <olvaffe@gmail.com> | 2010-01-12 11:25:02 +0800 |
---|---|---|
committer | Chia-I Wu <olvaffe@gmail.com> | 2010-01-12 11:25:02 +0800 |
commit | 562c127693200822f04a145db50add1be2425d7b (patch) | |
tree | 9441774fb212b17ddf2a364f06abc43f166cc00b /src/gallium/drivers/i965/brw_wm_state.c | |
parent | e5d351dcfde58777162552cf5cd2a9cd8299f4cd (diff) | |
parent | 077d6dd7508af88509dd0499c5dfbdaa186b4015 (diff) |
Merge branch 'master' into opengl-es-v2
Conflicts:
src/mesa/main/dd.h
Diffstat (limited to 'src/gallium/drivers/i965/brw_wm_state.c')
-rw-r--r-- | src/gallium/drivers/i965/brw_wm_state.c | 339 |
1 files changed, 339 insertions, 0 deletions
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c new file mode 100644 index 0000000000..ee970ac75b --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -0,0 +1,339 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_math.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_wm.h" +#include "brw_debug.h" +#include "brw_pipe_rast.h" + +/*********************************************************************** + * WM unit - fragment programs and rasterization + */ + +struct brw_wm_unit_key { + unsigned int total_grf, total_scratch; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int dispatch_grf_start_reg; + + unsigned int curbe_offset; + unsigned int urb_size; + + unsigned int max_threads; + + unsigned int nr_surfaces, sampler_count; + GLboolean uses_depth, computes_depth, uses_kill, has_flow_control; + GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; + GLfloat offset_units, offset_factor; +}; + +static void +wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) +{ + const struct brw_fragment_shader *fp = brw->curr.fragment_shader; + + memset(key, 0, sizeof(*key)); + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + key->max_threads = 1; + else { + /* WM maximum threads is number of EUs times number of threads per EU. */ + if (BRW_IS_IGDNG(brw)) + key->max_threads = 12 * 6; + else if (BRW_IS_G4X(brw)) + key->max_threads = 10 * 5; + else + key->max_threads = 8 * 4; + } + + /* CACHE_NEW_WM_PROG */ + key->total_grf = brw->wm.prog_data->total_grf; + key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; + key->curb_entry_read_length = brw->wm.prog_data->curb_read_length; + key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + key->total_scratch = align(brw->wm.prog_data->total_scratch, 1024); + + /* BRW_NEW_URB_FENCE */ + key->urb_size = brw->urb.vsize; + + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.wm_start; + + /* BRW_NEW_NR_SURFACEs */ + key->nr_surfaces = brw->wm.nr_surfaces; + + /* CACHE_NEW_SAMPLER */ + key->sampler_count = brw->wm.sampler_count; + + /* PIPE_NEW_RAST */ + key->polygon_stipple = brw->curr.rast->templ.poly_stipple_enable; + + /* PIPE_NEW_FRAGMENT_PROGRAM */ + key->uses_depth = fp->uses_depth; + key->computes_depth = fp->info.writes_z; + + /* PIPE_NEW_DEPTH_BUFFER + * + * Override for NULL depthbuffer case, required by the Pixel Shader Computed + * Depth field. + */ + if (brw->curr.fb.zsbuf == NULL) + key->computes_depth = 0; + + /* PIPE_NEW_DEPTH_STENCIL_ALPHA */ + key->uses_kill = (fp->info.uses_kill || + brw->curr.zstencil->cc3.alpha_test); + + key->has_flow_control = fp->has_flow_control; + + /* temporary sanity check assertion */ + assert(fp->has_flow_control == 0); + + /* PIPE_NEW_QUERY */ + key->stats_wm = (brw->query.stats_wm != 0); + + /* PIPE_NEW_RAST */ + key->line_stipple = brw->curr.rast->templ.line_stipple_enable; + + + key->offset_enable = (brw->curr.rast->templ.offset_cw || + brw->curr.rast->templ.offset_ccw); + + key->offset_units = brw->curr.rast->templ.offset_units; + key->offset_factor = brw->curr.rast->templ.offset_scale; +} + +/** + * Setup wm hardware state. See page 225 of Volume 2 + */ +static enum pipe_error +wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, + struct brw_winsys_reloc *reloc, + unsigned nr_reloc, + struct brw_winsys_buffer **bo_out) +{ + struct brw_wm_unit_state wm; + enum pipe_error ret; + + memset(&wm, 0, sizeof(wm)); + + wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; + wm.thread0.kernel_start_pointer = 0; /* reloc */ + wm.thread1.depth_coef_urb_read_offset = 1; + wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + if (BRW_IS_IGDNG(brw)) + wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ + else + wm.thread1.binding_table_entry_count = key->nr_surfaces; + + if (key->total_scratch != 0) { + wm.thread2.scratch_space_base_pointer = 0; /* reloc */ + wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; + } else { + wm.thread2.scratch_space_base_pointer = 0; + wm.thread2.per_thread_scratch_space = 0; + } + + wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; + wm.thread3.urb_entry_read_length = key->urb_entry_read_length; + wm.thread3.urb_entry_read_offset = 0; + wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + + if (BRW_IS_IGDNG(brw)) + wm.wm4.sampler_count = 0; /* hardware requirement */ + else + wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + + /* reloc */ + wm.wm4.sampler_state_pointer = 0; + + wm.wm5.program_uses_depth = key->uses_depth; + wm.wm5.program_computes_depth = key->computes_depth; + wm.wm5.program_uses_killpixel = key->uses_kill; + + if (key->has_flow_control) + wm.wm5.enable_8_pix = 1; + else + wm.wm5.enable_16_pix = 1; + + wm.wm5.max_threads = key->max_threads - 1; + wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ + wm.wm5.legacy_line_rast = 0; + wm.wm5.legacy_global_depth_bias = 0; + wm.wm5.early_depth_test = 1; /* never need to disable */ + wm.wm5.line_aa_region_width = 0; + wm.wm5.line_endcap_aa_region_width = 1; + + wm.wm5.polygon_stipple = key->polygon_stipple; + + if (key->offset_enable) { + wm.wm5.depth_offset = 1; + /* Something wierd going on with legacy_global_depth_bias, + * offset_constant, scaling and MRD. This value passes glean + * but gives some odd results elsewere (eg. the + * quad-offset-units test). + */ + wm.global_depth_offset_constant = key->offset_units * 2; + + /* This is the only value that passes glean: + */ + wm.global_depth_offset_scale = key->offset_factor; + } + + wm.wm5.line_stipple = key->line_stipple; + + if ((BRW_DEBUG & DEBUG_STATS) || key->stats_wm) + wm.wm4.stats_enable = 1; + + ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT, + key, sizeof(*key), + reloc, nr_reloc, + &wm, sizeof(wm), + NULL, NULL, + bo_out); + if (ret) + return ret; + + return PIPE_OK; +} + + +static enum pipe_error upload_wm_unit( struct brw_context *brw ) +{ + struct brw_wm_unit_key key; + struct brw_winsys_reloc reloc[3]; + unsigned nr_reloc = 0; + enum pipe_error ret; + unsigned grf_reg_count; + unsigned per_thread_scratch_space; + unsigned stats_enable; + unsigned sampler_count; + + wm_unit_populate_key(brw, &key); + + + /* Allocate the necessary scratch space if we haven't already. Don't + * bother reducing the allocation later, since we use scratch so + * rarely. + */ + assert(key.total_scratch <= 12 * 1024); + if (key.total_scratch) { + GLuint total = key.total_scratch * key.max_threads; + + /* Do we need a new buffer: + */ + if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) + bo_reference(&brw->wm.scratch_bo, NULL); + + if (brw->wm.scratch_bo == NULL) { + ret = brw->sws->bo_alloc(brw->sws, + BRW_BUFFER_TYPE_SHADER_SCRATCH, + total, + 4096, + &brw->wm.scratch_bo); + if (ret) + return ret; + } + } + + + /* XXX: temporary: + */ + grf_reg_count = (align(key.total_grf, 16) / 16 - 1); + per_thread_scratch_space = key.total_scratch / 1024 - 1; + stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm; + sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4; + + /* Emit WM program relocation */ + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_STATE, + grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); + + /* Emit scratch space relocation */ + if (key.total_scratch != 0) { + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_SCRATCH, + per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_bo); + } + + /* Emit sampler state relocation */ + if (key.sampler_count != 0) { + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_STATE, + stats_enable | (sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); + } + + + if (brw_search_cache(&brw->cache, BRW_WM_UNIT, + &key, sizeof(key), + reloc, nr_reloc, + NULL, + &brw->wm.state_bo)) + return PIPE_OK; + + ret = wm_unit_create_from_key(brw, &key, + reloc, nr_reloc, + &brw->wm.state_bo); + if (ret) + return ret; + + return PIPE_OK; +} + +const struct brw_tracked_state brw_wm_unit = { + .dirty = { + .mesa = (PIPE_NEW_FRAGMENT_SHADER | + PIPE_NEW_DEPTH_BUFFER | + PIPE_NEW_RAST | + PIPE_NEW_DEPTH_STENCIL_ALPHA | + PIPE_NEW_QUERY), + + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_WM_SURFACES), + + .cache = (CACHE_NEW_WM_PROG | + CACHE_NEW_SAMPLER) + }, + .prepare = upload_wm_unit, +}; + |