diff options
Diffstat (limited to 'src/gallium/drivers')
63 files changed, 1639 insertions, 2322 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 5a889a6119..58a8b5d0b0 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1834,9 +1834,9 @@ emit_instruction(struct codegen *gen, case TGSI_OPCODE_ENDIF: return emit_ENDIF(gen, inst); - case TGSI_OPCODE_BGNLOOP2: + case TGSI_OPCODE_BGNLOOP: return emit_BGNLOOP(gen, inst); - case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_ENDLOOP: return emit_ENDLOOP(gen, inst); case TGSI_OPCODE_BRK: return emit_BRK(gen, inst); @@ -1875,9 +1875,9 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) assert(gen->num_imm < MAX_TEMPS); for (ch = 0; ch < 4; ch++) { - float val = immed->u.ImmediateFloat32[ch].Float; + float val = immed->u[ch].Float; - if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) { + if (ch > 0 && val == immed->u[ch - 1].Float) { /* re-use previous register */ gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1]; } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index e26594448f..6a63a0e6ce 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -44,13 +44,6 @@ -static unsigned -minify(unsigned d) -{ - return MAX2(1, d>>1); -} - - static void cell_texture_layout(struct cell_texture *ct) { @@ -424,7 +417,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) if (!ctrans->map) return NULL; /* out of memory */ - if (transfer->usage & PIPE_TRANSFER_READ) { + if (transfer->usage == PIPE_TRANSFER_READ || + transfer->usage == PIPE_TRANSFER_READ_WRITE) { /* need to untwiddle the texture to make a linear version */ const uint bpp = pf_get_size(ct->base.format); if (bpp == 4) { @@ -465,7 +459,8 @@ cell_transfer_unmap(struct pipe_screen *screen, PIPE_BUFFER_USAGE_CPU_READ); } - if (transfer->usage & PIPE_TRANSFER_WRITE) { + if (transfer->usage == PIPE_TRANSFER_WRITE || + transfer->usage == PIPE_TRANSFER_READ_WRITE) { /* The user wrote new texture data into the mapped buffer. * We need to convert the new linear data into the twiddled/tiled format. */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index e27df2dfb3..6db9501128 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -952,7 +952,6 @@ exec_instruction( break; case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ FETCH( &r[0], 0, CHAN_X ); r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { @@ -961,7 +960,6 @@ exec_instruction( break; case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ FETCH( &r[0], 0, CHAN_X ); r[0].q = micro_sqrt(r[0].q); r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); @@ -1115,7 +1113,6 @@ exec_instruction( break; case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); @@ -1136,8 +1133,7 @@ exec_instruction( } break; - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ + case TGSI_OPCODE_LRP: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); @@ -1158,21 +1154,11 @@ exec_instruction( ASSERT (0); break; - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ + case TGSI_OPCODE_DP2A: ASSERT (0); break; - case TGSI_OPCODE_INDEX: - ASSERT (0); - break; - - case TGSI_OPCODE_NEGATE: - ASSERT (0); - break; - - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ + case TGSI_OPCODE_FRC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); r[0].q = micro_frc(r[0].q); @@ -1184,8 +1170,7 @@ exec_instruction( ASSERT (0); break; - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ + case TGSI_OPCODE_FLR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); r[0].q = micro_flr(r[0].q); @@ -1201,8 +1186,7 @@ exec_instruction( } break; - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ + case TGSI_OPCODE_EX2: FETCH(&r[0], 0, CHAN_X); r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); @@ -1212,8 +1196,7 @@ exec_instruction( } break; - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ + case TGSI_OPCODE_LG2: FETCH( &r[0], 0, CHAN_X ); r[0].q = micro_lg2(r[0].q); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { @@ -1221,8 +1204,7 @@ exec_instruction( } break; - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ + case TGSI_OPCODE_POW: FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 1, CHAN_X); @@ -1233,7 +1215,7 @@ exec_instruction( } break; - case TGSI_OPCODE_CROSSPRODUCT: + case TGSI_OPCODE_XPD: /* TGSI_OPCODE_XPD */ FETCH(&r[0], 0, CHAN_Y); FETCH(&r[1], 1, CHAN_Z); @@ -1275,10 +1257,6 @@ exec_instruction( } break; - case TGSI_OPCODE_MULTIPLYMATRIX: - ASSERT (0); - break; - case TGSI_OPCODE_ABS: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); @@ -1780,9 +1758,9 @@ exec_instruction( mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; break; - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: /* fall-through (for now) */ - case TGSI_OPCODE_BGNLOOP2: + case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; @@ -1790,9 +1768,9 @@ exec_instruction( mach->ContStack[mach->ContStackTop++] = mach->ContMask; break; - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDFOR: /* fall-through (for now at least) */ - case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ ASSERT(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h index a433cf054d..c6e68ea38a 100644 --- a/src/gallium/drivers/i915simple/i915_batch.h +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -50,8 +50,8 @@ i915_batchbuffer_check( struct i915_batchbuffer *batch, size_t dwords, size_t relocs ) { - /** TODO JB: Check relocs */ - return dwords * 4 <= batch->size - (batch->ptr - batch->map); + return dwords * 4 <= batch->size - (batch->ptr - batch->map) && + relocs <= (batch->max_relocs - batch->relocs); } static INLINE size_t diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index ccf9bb31fb..bf69c8e9f5 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -142,10 +142,14 @@ i915_is_texture_referenced( struct pipe_context *pipe, unsigned face, unsigned level) { /** - * FIXME: Optimize. + * FIXME: Return the corrent result. We can't alays return referenced + * since it causes a double flush within the vbo module. */ - +#if 0 return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +#else + return 0; +#endif } static unsigned int @@ -153,10 +157,14 @@ i915_is_buffer_referenced( struct pipe_context *pipe, struct pipe_buffer *buf) { /** - * FIXME: Optimize. + * FIXME: Return the corrent result. We can't alays return referenced + * since it causes a double flush within the vbo module. */ - +#if 0 return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +#else + return 0; +#endif } diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 961c1bf213..89504ced27 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -975,8 +975,9 @@ i915_translate_instructions(struct i915_fp_compile *p, = &parse.FullToken.FullImmediate; const uint pos = p->num_immediates++; uint j; + assert( imm->Immediate.NrTokens <= 4 + 1 ); for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { - p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float; + p->immediates[pos][j] = imm->u[j].Float; } } break; diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index f4aa8e60d8..a3de38d586 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -232,6 +232,8 @@ i915_get_tex_transfer(struct pipe_screen *screen, if (trans) { pipe_texture_reference(&trans->base.texture, texture); trans->base.format = trans->base.format; + trans->base.x = x; + trans->base.y = y; trans->base.width = w; trans->base.height = h; trans->base.block = texture->block; diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index ca8e87af8d..03f0e14e7c 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -72,11 +72,6 @@ static const int step_offsets[6][2] = { {-1, 1} }; -static unsigned minify( unsigned d ) -{ - return MAX2(1, d>>1); -} - static unsigned power_of_two(unsigned x) { @@ -160,10 +155,10 @@ i915_miptree_set_image_offset(struct i915_texture *tex, /** - * Special case to deal with display targets. + * Special case to deal with scanout textures. */ static boolean -i915_displaytarget_layout(struct i915_texture *tex) +i915_scanout_layout(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; @@ -177,9 +172,13 @@ i915_displaytarget_layout(struct i915_texture *tex) i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); if (tex->base.width[0] >= 128) { +#if 0 tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); +#else + tex->stride = 2048 * 4; /* TODO fix when backend is smarter */ +#endif tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); -#if 0 /* used for tiled display targets */ +#if 0 /* used for tiled textures */ tex->tiled = 1; #endif } else { @@ -209,9 +208,9 @@ i945_miptree_layout_2d( struct i915_texture *tex ) unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; - /* used for tiled display targets */ - if (0) - if (i915_displaytarget_layout(tex)) + /* used for scanouts that need special layouts */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + if (i915_scanout_layout(tex)) return; tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); @@ -584,6 +583,7 @@ i915_texture_create(struct pipe_screen *screen, struct i915_screen *i915screen = i915_screen(screen); struct i915_texture *tex = CALLOC_STRUCT(i915_texture); size_t tex_size; + unsigned buf_usage = 0; if (!tex) return NULL; @@ -605,9 +605,11 @@ i915_texture_create(struct pipe_screen *screen, tex_size = tex->stride * tex->total_nblocksy; - tex->buffer = screen->buffer_create(screen, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex_size); + buf_usage = PIPE_BUFFER_USAGE_PIXEL; + if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + buf_usage |= I915_BUFFER_USAGE_SCANOUT; + + tex->buffer = screen->buffer_create(screen, 64, buf_usage, tex_size); if (!tex->buffer) goto fail; diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h index ff5b34f193..711db91c36 100644 --- a/src/gallium/drivers/i915simple/i915_winsys.h +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -109,6 +109,7 @@ struct i915_winsys { #define I915_BUFFER_ACCESS_READ 0x2 #define I915_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) +#define I915_BUFFER_USAGE_SCANOUT (PIPE_BUFFER_USAGE_CUSTOM << 1) /** diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 8aea8c0558..998ffaeac4 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -65,11 +65,6 @@ unsigned intel_compressed_alignment(unsigned internalFormat) } #endif -static unsigned minify( unsigned d ) -{ - return MAX2(1, d>>1); -} - static void intel_miptree_set_image_offset(struct brw_texture *tex, unsigned level, diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index e03d653482..3ee82d95b3 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -1294,10 +1294,10 @@ void brw_vs_emit(struct brw_vs_compile *c) case TGSI_TOKEN_TYPE_IMMEDIATE: { struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate; assert(imm->Immediate.NrTokens == 4 + 1); - c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float; - c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float; - c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float; - c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u[0].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u[1].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u[2].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u[3].Float; c->prog_data.num_imm++; } break; diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index ab6410aa60..db75963932 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -947,7 +947,7 @@ static void brw_wm_emit_instruction( struct brw_wm_compile *c, #endif break; - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8); break; case TGSI_OPCODE_BRK: @@ -958,11 +958,11 @@ static void brw_wm_emit_instruction( struct brw_wm_compile *c, brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDFOR: c->loop_insn--; c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]); /* patch all the BREAK instructions from - last BEGINLOOP */ + last BGNFOR */ while (c->inst0 > c->loop_inst[c->loop_insn]) { c->inst0--; if (c->inst0->header.opcode == BRW_OPCODE_BREAK) { diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index a500ec6045..4e700089e3 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -501,7 +501,7 @@ identity_set_sampler_textures(struct pipe_context *_pipe, pipe->set_sampler_textures(pipe, num_textures, - _textures); + textures); } static void diff --git a/src/gallium/drivers/identity/id_drm.c b/src/gallium/drivers/identity/id_drm.c index 555220f853..e5342ac06e 100644 --- a/src/gallium/drivers/identity/id_drm.c +++ b/src/gallium/drivers/identity/id_drm.c @@ -60,7 +60,7 @@ identity_drm_create_screen(struct drm_api *_api, int fd, screen = api->create_screen(api, fd, arg); return identity_screen_create(screen); -}; +} static struct pipe_context * identity_drm_create_context(struct drm_api *_api, @@ -77,7 +77,7 @@ identity_drm_create_context(struct drm_api *_api, pipe = identity_context_create(_screen, pipe); return pipe; -}; +} static boolean identity_drm_buffer_from_texture(struct drm_api *_api, diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c index 259f1be36e..26439637d0 100644 --- a/src/gallium/drivers/identity/id_screen.c +++ b/src/gallium/drivers/identity/id_screen.c @@ -289,6 +289,7 @@ identity_screen_surface_buffer_create(struct pipe_screen *_screen, unsigned height, enum pipe_format format, unsigned usage, + unsigned tex_usage, unsigned *stride) { struct identity_screen *id_screen = identity_screen(_screen); @@ -300,6 +301,7 @@ identity_screen_surface_buffer_create(struct pipe_screen *_screen, height, format, usage, + tex_usage, stride); if (result) diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 832366e646..e4cf91c005 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -4,6 +4,8 @@ #include <util/u_memory.h> +#include <errno.h> + #include "nouveau/nouveau_bo.h" #include "nouveau_winsys.h" #include "nouveau_screen.h" @@ -141,12 +143,13 @@ nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned offset, unsigned length, unsigned usage) { struct nouveau_bo *bo = nouveau_bo(pb); + uint32_t flags = nouveau_screen_map_flags(usage); int ret; - ret = nouveau_bo_map_range(bo, offset, length, - nouveau_screen_map_flags(usage)); + ret = nouveau_bo_map_range(bo, offset, length, flags); if (ret) { - debug_printf("map_range failed: %d\n", ret); + if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY) + debug_printf("map_range failed: %d\n", ret); return NULL; } diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index f315cf54f0..bbbcb54c46 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -110,10 +110,10 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, unsigned cx; unsigned cy; - /* POT or GTFO */ - assert(!(w & (w - 1)) && !(h & (h - 1))); +#if 0 /* That's the way she likes it */ assert(src_pitch == ((struct nv04_surface *)dst)->pitch); +#endif BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); OUT_RELOCo(chan, dst_bo, @@ -133,7 +133,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, for (cy = 0; cy < h; cy += sub_h) { for (cx = 0; cx < w; cx += sub_w) { BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); - OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx, cy) * + OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx+dx, cy+dy) * dst->texture->block.size, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); @@ -153,8 +153,8 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, OUT_RING (chan, src_pitch | NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); - OUT_RELOCl(chan, src_bo, src->offset + cy * src_pitch + - cx * src->texture->block.size, NOUVEAU_BO_GART | + OUT_RELOCl(chan, src_bo, src->offset + (cy+sy) * src_pitch + + (cx+sx) * src->texture->block.size, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); OUT_RING (chan, 0); } @@ -210,6 +210,43 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, } static int +nv04_surface_copy_m2mf_swizzle(struct nv04_surface_2d *ctx, + struct pipe_surface *dst, int dx, int dy, + struct pipe_surface *src, int sx, int sy) +{ + struct nouveau_channel *chan = ctx->m2mf->channel; + struct nouveau_grobj *m2mf = ctx->m2mf; + struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); + struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); + unsigned src_pitch = ((struct nv04_surface *)src)->pitch; + unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; + unsigned dst_offset = dst->offset + nv04_swizzle_bits(dx, dy) * + dst->texture->block.size; + unsigned src_offset = src->offset + sy * src_pitch + + sx * src->texture->block.size; + + BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); + OUT_RELOCo(chan, src_bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, dst_bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); + OUT_RELOCl(chan, src_bo, src_offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, dst_bo, dst_offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RING (chan, src_pitch); + OUT_RING (chan, dst_pitch); + OUT_RING (chan, 1 * src->texture->block.size); + OUT_RING (chan, 1); + OUT_RING (chan, 0x0101); + OUT_RING (chan, 0); + + return 0; +} + +static int nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst, int dx, int dy, struct pipe_surface *src, int sx, int sy, int w, int h) @@ -258,8 +295,59 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst, assert(src->format == dst->format); /* Setup transfer to swizzle the texture to vram if needed */ - if (src_linear && !dst_linear && w > 1 && h > 1) { - nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h); + if (src_linear && !dst_linear) { + int x,y; + + if ((w>1) && (h>1)) { + int potWidth = 1<<log2i(w); + int potHeight = 1<<log2i(h); + int remainWidth = w-potWidth; + int remainHeight = h-potHeight; + int squareDim = (potWidth>potHeight ? potHeight : potWidth); + + /* top left is always POT, but we can only swizzle squares */ + for (y=0; y<potHeight; y+=squareDim) { + for (x=0; x<potWidth; x+= squareDim) { + nv04_surface_copy_swizzle(ctx, dst, dx+x, dy+y, + src, sx+x, sy+y, + squareDim, squareDim); + } + } + + /* top right */ + if (remainWidth>0) { + nv04_surface_copy(ctx, dst, dx+potWidth, dy, + src, sx+potWidth, sy, + remainWidth, potHeight); + } + + /* bottom left */ + if (remainHeight>0) { + nv04_surface_copy(ctx, dst, dx, dy+potHeight, + src, sx, sy+potHeight, + potWidth, remainHeight); + } + + /* bottom right */ + if ((remainWidth>0) && (remainHeight>0)) { + nv04_surface_copy(ctx, dst, dx+potWidth, dy+potHeight, + src, sx+potWidth, sy+potHeight, + remainWidth, remainHeight); + } + } else if (w==1) { + /* We have a column to copy to a swizzled texture */ + for (y=0; y<h; y++) { + nv04_surface_copy_m2mf_swizzle(ctx, dst, dx, dy+y, + src, sx, sy+y); + } + } else if (h==1) { + /* We have a row to copy to a swizzled texture */ + for (x=0; x<w; x++) { + nv04_surface_copy_m2mf_swizzle(ctx, dst, dx+x, dy, + src, sx+x, sy); + } + } + return; } diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c index c1e588902b..388245ecb0 100644 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -617,10 +617,10 @@ nv20_vertprog_translate(struct nv20_context *nv20, assert(imm->Immediate.NrTokens == 4 + 1); vpc->imm[vpc->nr_imm++] = constant(vpc, -1, - imm->u.ImmediateFloat32[0].Float, - imm->u.ImmediateFloat32[1].Float, - imm->u.ImmediateFloat32[2].Float, - imm->u.ImmediateFloat32[3].Float); + imm->u[0].Float, + imm->u[1].Float, + imm->u[2].Float, + imm->u[3].Float); } break; case TGSI_TOKEN_TYPE_INSTRUCTION: diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 1d1c556fb1..a48ba9782b 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -704,10 +704,10 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc) assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); assert(fpc->nr_imm < MAX_IMM); - vals[0] = imm->u.ImmediateFloat32[0].Float; - vals[1] = imm->u.ImmediateFloat32[1].Float; - vals[2] = imm->u.ImmediateFloat32[2].Float; - vals[3] = imm->u.ImmediateFloat32[3].Float; + vals[0] = imm->u[0].Float; + vals[1] = imm->u[1].Float; + vals[2] = imm->u[2].Float; + vals[3] = imm->u[3].Float; fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); } break; diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index c8b40784b0..f8285e4455 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -95,7 +95,7 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, } } else if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { - switch (tex_usage) { + switch (format) { case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z16_UNORM: diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index c7514efcfe..14a5c0260d 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -617,10 +617,10 @@ nv30_vertprog_translate(struct nv30_context *nv30, assert(imm->Immediate.NrTokens == 4 + 1); vpc->imm[vpc->nr_imm++] = constant(vpc, -1, - imm->u.ImmediateFloat32[0].Float, - imm->u.ImmediateFloat32[1].Float, - imm->u.ImmediateFloat32[2].Float, - imm->u.ImmediateFloat32[3].Float); + imm->u[0].Float, + imm->u[1].Float, + imm->u[2].Float, + imm->u[3].Float); } break; case TGSI_TOKEN_TYPE_INSTRUCTION: diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 680976da56..32d9ed1a7f 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -790,10 +790,10 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); assert(fpc->nr_imm < MAX_IMM); - vals[0] = imm->u.ImmediateFloat32[0].Float; - vals[1] = imm->u.ImmediateFloat32[1].Float; - vals[2] = imm->u.ImmediateFloat32[2].Float; - vals[3] = imm->u.ImmediateFloat32[3].Float; + vals[0] = imm->u[0].Float; + vals[1] = imm->u[1].Float; + vals[2] = imm->u[2].Float; + vals[3] = imm->u[3].Float; fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); } break; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index e75e8d3f42..0382dbba8f 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -788,10 +788,10 @@ nv40_vertprog_translate(struct nv40_context *nv40, assert(imm->Immediate.NrTokens == 4 + 1); vpc->imm[vpc->nr_imm++] = constant(vpc, -1, - imm->u.ImmediateFloat32[0].Float, - imm->u.ImmediateFloat32[1].Float, - imm->u.ImmediateFloat32[2].Float, - imm->u.ImmediateFloat32[3].Float); + imm->u[0].Float, + imm->u[1].Float, + imm->u[2].Float, + imm->u[3].Float); } break; case TGSI_TOKEN_TYPE_INSTRUCTION: diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index e02afc4be9..6e8f4f9750 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -31,15 +31,23 @@ static void nv50_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { - struct nv50_context *nv50 = (struct nv50_context *)pipe; - - FIRE_RING(nv50->screen->base.channel); + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *eng2d = nv50->screen->eng2d; + + /* We need this in the ddx for reliable composite, not sure what we're + * actually flushing. We generate all our own flushes with flags = 0. */ + WAIT_RING(chan, 3); + BEGIN_RING(chan, eng2d, 0x0110, 1); + OUT_RING (chan, 0); + + FIRE_RING(chan); } static void nv50_destroy(struct pipe_context *pipe) { - struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nv50_context *nv50 = nv50_context(pipe); draw_destroy(nv50->draw); FREE(nv50); @@ -112,5 +120,3 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) return &nv50->pipe; } - - diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 9b8cc4d37d..5cbc2c8f82 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -71,6 +71,7 @@ struct nv50_sampler_stateobj { struct nv50_miptree_level { int *image_offset; unsigned pitch; + unsigned tile_mode; }; struct nv50_miptree { diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 22465e0227..dd1b0303bd 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -42,9 +42,14 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) mt->base.screen = pscreen; switch (pt->format) { - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + tile_flags = 0x4800; + break; case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x1800; + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: tile_flags = 0x2800; break; default: @@ -82,20 +87,27 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); lvl->pitch = align(pt->width[l] * pt->block.size, 64); + lvl->tile_mode = tile_mode; width = MAX2(1, width >> 1); height = MAX2(1, height >> 1); depth = MAX2(1, depth >> 1); + + if (tile_mode && height <= (tile_h >> 1)) { + tile_mode--; + tile_h >>= 1; + } } for (i = 0; i < mt->image_nr; i++) { for (l = 0; l <= pt->last_level; l++) { struct nv50_miptree_level *lvl = &mt->level[l]; int size; + tile_h = 1 << (lvl->tile_mode + 2); size = align(pt->width[l], 8) * pt->block.size; size = align(size, 64); - size *= align(pt->height[l], tile_h) * pt->block.size; + size *= align(pt->height[l], tile_h); lvl->image_offset[i] = mt->total_size; @@ -104,12 +116,12 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) } ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, mt->total_size, - tile_mode, tile_flags, &mt->bo); + mt->level[0].tile_mode, tile_flags, &mt->bo); if (ret) { FREE(mt); return NULL; } - + return &mt->base; } @@ -146,7 +158,7 @@ nv50_miptree_destroy(struct pipe_texture *pt) struct nv50_miptree *mt = nv50_miptree(pt); nouveau_bo_ref(NULL, &mt->bo); - FREE(mt); + FREE(mt); } static struct pipe_surface * @@ -189,8 +201,8 @@ nv50_miptree_surface_del(struct pipe_surface *ps) { struct nv50_surface *s = nv50_surface(ps); - pipe_texture_reference(&ps->texture, NULL); - FREE(s); + pipe_texture_reference(&ps->texture, NULL); + FREE(s); } void diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5f7d06dbec..289c3485e0 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -251,7 +251,7 @@ alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) if (pc->r_temp[idx] || pc->r_temp[idx + 1] || pc->r_temp[idx + 2] || pc->r_temp[idx + 3]) - return alloc_temp4(pc, dst, idx + 1); + return alloc_temp4(pc, dst, idx + 4); for (i = 0; i < 4; i++) { dst[i] = CALLOC_STRUCT(nv50_reg); @@ -296,7 +296,7 @@ kill_temp_temp(struct nv50_pc *pc) static int ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) { - pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)), + pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(float)), (pc->immd_nr + 1) * 4 * sizeof(float)); pc->immd_buf[(pc->immd_nr * 4) + 0] = x; pc->immd_buf[(pc->immd_nr * 4) + 1] = y; @@ -1014,6 +1014,7 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, break; } + /* some cards need t[0]'s hw index to be a multiple of 4 */ alloc_temp4(pc, t, 0); if (proj) { @@ -1809,10 +1810,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) const struct tgsi_full_immediate *imm = &p.FullToken.FullImmediate; - ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, - imm->u.ImmediateFloat32[1].Float, - imm->u.ImmediateFloat32[2].Float, - imm->u.ImmediateFloat32[3].Float); + ctor_immd(pc, imm->u[0].Float, + imm->u[1].Float, + imm->u[2].Float, + imm->u[3].Float); } break; case TGSI_TOKEN_TYPE_DECLARATION: @@ -2221,9 +2222,9 @@ nv50_program_upload_data(struct nv50_context *nv50, float *map, while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(chan, tesla, 0x00000f00, 1); + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); OUT_RING (chan, (cbuf << 0) | (start << 8)); - BEGIN_RING(chan, tesla, 0x40000f04, nr); + BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr); OUT_RINGp (chan, map, nr); map += nr; @@ -2345,7 +2346,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) } so = so_new(4,2); - so_method(so, nv50->screen->tesla, 0x1280, 3); + so_method(so, nv50->screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); @@ -2364,9 +2365,9 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) continue; } - BEGIN_RING(chan, tesla, 0x0f00, 1); + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD); - BEGIN_RING(chan, tesla, 0x40000f04, nr); + BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr); OUT_RINGp (chan, up + start, nr); start += nr; @@ -2399,15 +2400,15 @@ nv50_vertprog_validate(struct nv50_context *nv50) NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, 0x1650, 2); + so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); so_data (so, p->cfg.vp.attr[0]); so_data (so, p->cfg.vp.attr[1]); - so_method(so, tesla, 0x16b8, 1); + so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, 0x16ac, 2); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2); so_data (so, p->cfg.high_result); //8); so_data (so, p->cfg.high_temp); - so_method(so, tesla, 0x140c, 1); + so_method(so, tesla, NV50TCL_VP_START_ID, 1); so_data (so, 0); /* program start offset */ so_ref(so, &nv50->state.vertprog); so_ref(NULL, &so); @@ -2436,24 +2437,24 @@ nv50_fragprog_validate(struct nv50_context *nv50) NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, 0x1904, 4); + so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */ so_data (so, 0x00000004); so_data (so, 0x00000000); so_data (so, 0x00000000); - so_method(so, tesla, 0x16bc, p->cfg.fp.high_map); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), p->cfg.fp.high_map); for (i = 0; i < p->cfg.fp.high_map; i++) so_data(so, p->cfg.fp.map[i]); - so_method(so, tesla, 0x1988, 2); + so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 2); so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */ so_data (so, p->cfg.high_temp); - so_method(so, tesla, 0x1298, 1); + so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, 0x19a8, 1); + so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); so_data (so, p->cfg.fp.regs[2]); - so_method(so, tesla, 0x196c, 1); + so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); so_data (so, p->cfg.fp.regs[3]); - so_method(so, tesla, 0x1414, 1); + so_method(so, tesla, NV50TCL_FP_START_ID, 1); so_data (so, 0); /* program start offset */ so_ref(so, &nv50->state.fragprog); so_ref(NULL, &so); @@ -2478,4 +2479,3 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) p->translated = 0; } - diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 940e04365f..5305c93d59 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -94,7 +94,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) struct nv50_query *q = nv50_query(pq); WAIT_RING (chan, 5); - BEGIN_RING(chan, tesla, 0x1b00, 4); + BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4); OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); OUT_RING (chan, 0x00000000); @@ -107,13 +107,13 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, boolean wait, uint64_t *result) { struct nv50_query *q = nv50_query(pq); - - /*XXX: Want to be able to return FALSE here instead of blocking - * until the result is available.. - */ + int ret; if (!q->ready) { - nouveau_bo_map(q->bo, NOUVEAU_BO_RD); + ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD | + wait ? 0 : NOUVEAU_BO_NOWAIT); + if (ret) + return false; q->result = ((uint32_t *)q->bo->map)[1]; q->ready = TRUE; nouveau_bo_unmap(q->bo); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ce8f906b15..c7f80a2203 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -44,9 +44,10 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, } else if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { switch (format) { + case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: return TRUE; default: break; @@ -188,7 +189,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_transfer_init_screen_functions(pscreen); /* DMA engine object */ - ret = nouveau_grobj_alloc(chan, 0xbeef5039, 0x5039, &screen->m2mf); + ret = nouveau_grobj_alloc(chan, 0xbeef5039, + NV50_MEMORY_TO_MEMORY_FORMAT, &screen->m2mf); if (ret) { NOUVEAU_ERR("Error creating M2MF object: %d\n", ret); nv50_screen_destroy(pscreen); @@ -197,7 +199,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BIND_RING(chan, screen->m2mf, 1); /* 2D object */ - ret = nouveau_grobj_alloc(chan, 0xbeef502d, 0x502d, &screen->eng2d); + ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d); if (ret) { NOUVEAU_ERR("Error creating 2D object: %d\n", ret); nv50_screen_destroy(pscreen); @@ -208,14 +210,15 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* 3D object */ switch (chipset & 0xf0) { case 0x50: - tesla_class = 0x5097; + tesla_class = NV50TCL; break; case 0x80: case 0x90: - tesla_class = 0x8297; + /* this stupid name should be corrected. */ + tesla_class = NV54TCL; break; case 0xa0: - tesla_class = 0x8397; + tesla_class = NVA0TCL; break; default: NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", chipset); @@ -229,7 +232,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class, &screen->tesla); + ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class, + &screen->tesla); if (ret) { NOUVEAU_ERR("Error creating 3D object: %d\n", ret); nv50_screen_destroy(pscreen); @@ -247,7 +251,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Static M2MF init */ so = so_new(32, 0); - so_method(so, screen->m2mf, 0x0180, 3); + so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); so_data (so, chan->vram->handle); @@ -290,9 +294,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, 0x13bc, 1); so_data (so, 0x54); + /* origin is top left (set to 1 for bottom left) */ so_method(so, screen->tesla, 0x13ac, 1); - so_data (so, 1); - so_method(so, screen->tesla, 0x16b8, 1); + so_data (so, 0); + so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, 8); /* constant buffers for immediates and VP/FP parameters */ @@ -330,33 +335,33 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, 0x000BBNP1); */ - so_method(so, screen->tesla, 0x1280, 3); + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PMISC << 16) | 0x00000800); - so_method(so, screen->tesla, 0x1694, 1); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000001 | (NV50_CB_PMISC << 12)); - so_method(so, screen->tesla, 0x1694, 1); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000031 | (NV50_CB_PMISC << 12)); - so_method(so, screen->tesla, 0x1280, 3); + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PVP << 16) | 0x00000800); - so_method(so, screen->tesla, 0x1694, 1); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000101 | (NV50_CB_PVP << 12)); - so_method(so, screen->tesla, 0x1280, 3); + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PFP << 16) | 0x00000800); - so_method(so, screen->tesla, 0x1694, 1); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000131 | (NV50_CB_PFP << 12)); /* Texture sampler/image unit setup - we abuse the constant buffer @@ -370,13 +375,13 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - so_method(so, screen->tesla, 0x1280, 3); + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_TIC << 16) | 0x0800); - so_method(so, screen->tesla, 0x1574, 3); + so_method(so, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | @@ -389,13 +394,13 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - so_method(so, screen->tesla, 0x1280, 3); + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_TSC << 16) | 0x0800); - so_method(so, screen->tesla, 0x155c, 3); + so_method(so, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | @@ -405,7 +410,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { - so_method(so, screen->tesla, 0x1080 + (i * 8), 2); + so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2); so_data (so, 0x000000ff); so_data (so, 0xffffffff); } @@ -417,6 +422,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, 0x1234, 1); so_data (so, 1); + /* activate first scissor rectangle */ + so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1); + so_data (so, 1); + so_emit(chan, so); so_ref (so, &screen->static_init); so_ref (NULL, &so); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 116866a8e7..ef4154d303 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -205,11 +205,16 @@ nv50_sampler_state_create(struct pipe_context *pipe, } limit = CLAMP(cso->lod_bias, -16.0, 15.0); - tsc[1] |= ((int)(limit * 256.0) & 0x1fff) << 11; + tsc[1] |= ((int)(limit * 256.0) & 0x1fff) << 12; tsc[2] |= ((int)CLAMP(cso->max_lod, 0.0, 15.0) << 20) | ((int)CLAMP(cso->min_lod, 0.0, 15.0) << 8); + tsc[4] = fui(cso->border_color[0]); + tsc[5] = fui(cso->border_color[1]); + tsc[6] = fui(cso->border_color[2]); + tsc[7] = fui(cso->border_color[3]); + sso->normalized = cso->normalized_coords; return (void *)sso; } @@ -404,35 +409,35 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, 0); } - /*XXX: yes, I know they're backwards.. header needs fixing */ + /* XXX: keep hex values until header is updated (names reversed) */ if (cso->stencil[0].enabled) { - so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); + so_method(so, tesla, 0x1380, 8); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); so_data (so, nvgl_comparison_op(cso->stencil[0].func)); - so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); so_data (so, cso->stencil[0].ref_value); so_data (so, cso->stencil[0].writemask); so_data (so, cso->stencil[0].valuemask); } else { - so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); + so_method(so, tesla, 0x1380, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); + so_method(so, tesla, 0x1594, 5); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_method(so, tesla, 0x0f54, 3); so_data (so, cso->stencil[1].ref_value); so_data (so, cso->stencil[1].writemask); so_data (so, cso->stencil[1].valuemask); } else { - so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); + so_method(so, tesla, 0x1594, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index d313e9de4f..a879df2e6e 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -55,15 +55,15 @@ nv50_state_validate_fb(struct nv50_context *nv50) NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); switch (fb->cbufs[i]->format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - so_data(so, 0xcf); + so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM); break; case PIPE_FORMAT_R5G6B5_UNORM: - so_data(so, 0xe8); + so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM); break; default: NOUVEAU_ERR("AIIII unknown format %s\n", pf_name(fb->cbufs[i]->format)); - so_data(so, 0xe6); + so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM); break; } so_data(so, bo->tile_mode << 4); @@ -92,17 +92,22 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); switch (fb->zsbuf->format) { + case PIPE_FORMAT_Z32_FLOAT: + so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT); + break; case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - so_data(so, 0x16); + so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM); + break; + case PIPE_FORMAT_X8Z24_UNORM: + so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM); break; - case PIPE_FORMAT_Z16_UNORM: - so_data(so, 0x15); + case PIPE_FORMAT_S8Z24_UNORM: + so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM); break; default: NOUVEAU_ERR("AIIII unknown format %s\n", pf_name(fb->zsbuf->format)); - so_data(so, 0x16); + so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM); break; } so_data(so, bo->tile_mode << 4); @@ -110,7 +115,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_method(so, tesla, 0x1538, 1); so_data (so, 1); - so_method(so, tesla, 0x1228, 3); + so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3); so_data (so, fb->zsbuf->width); so_data (so, fb->zsbuf->height); so_data (so, 0x00010001); @@ -119,12 +124,18 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); so_data (so, w << 16); so_data (so, h << 16); - so_method(so, tesla, 0x0e04, 2); + /* set window lower left corner */ + so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2); + so_data (so, 0); + so_data (so, 0); + /* set screen scissor rectangle */ + so_method(so, tesla, NV50TCL_SCREEN_SCISSOR_HORIZ, 2); so_data (so, w << 16); so_data (so, h << 16); - so_method(so, tesla, 0xdf8, 2); - so_data (so, 0); - so_data (so, h); + + /* we set scissors to framebuffer size when they're 'turned off' */ + nv50->dirty |= NV50_NEW_SCISSOR; + so_ref(NULL, &nv50->state.scissor); so_ref(so, &nv50->state.fb); so_ref(NULL, &so); @@ -137,7 +148,32 @@ nv50_state_emit(struct nv50_context *nv50) struct nouveau_channel *chan = screen->base.channel; if (nv50->pctx_id != screen->cur_pctx) { - nv50->state.dirty |= 0xffffffff; + if (nv50->state.fb) + nv50->state.dirty |= NV50_NEW_FRAMEBUFFER; + if (nv50->state.blend) + nv50->state.dirty |= NV50_NEW_BLEND; + if (nv50->state.zsa) + nv50->state.dirty |= NV50_NEW_ZSA; + if (nv50->state.vertprog) + nv50->state.dirty |= NV50_NEW_VERTPROG; + if (nv50->state.fragprog) + nv50->state.dirty |= NV50_NEW_FRAGPROG; + if (nv50->state.rast) + nv50->state.dirty |= NV50_NEW_RASTERIZER; + if (nv50->state.blend_colour) + nv50->state.dirty |= NV50_NEW_BLEND_COLOUR; + if (nv50->state.stipple) + nv50->state.dirty |= NV50_NEW_STIPPLE; + if (nv50->state.scissor) + nv50->state.dirty |= NV50_NEW_SCISSOR; + if (nv50->state.viewport) + nv50->state.dirty |= NV50_NEW_VIEWPORT; + if (nv50->state.tsc_upload) + nv50->state.dirty |= NV50_NEW_SAMPLER; + if (nv50->state.tic_upload) + nv50->state.dirty |= NV50_NEW_TEXTURE; + if (nv50->state.vtxfmt && nv50->state.vtxbuf) + nv50->state.dirty |= NV50_NEW_ARRAYS; screen->cur_pctx = nv50->pctx_id; } @@ -233,13 +269,13 @@ nv50_state_validate(struct nv50_context *nv50) nv50->state.scissor_enabled = rast->scissor; so = so_new(3, 0); - so_method(so, tesla, 0x0ff4, 2); + so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2); if (nv50->state.scissor_enabled) { - so_data(so, ((s->maxx - s->minx) << 16) | s->minx); - so_data(so, ((s->maxy - s->miny) << 16) | s->miny); + so_data(so, (s->maxx << 16) | s->minx); + so_data(so, (s->maxy << 16) | s->miny); } else { - so_data(so, (8192 << 16)); - so_data(so, (8192 << 16)); + so_data(so, (nv50->framebuffer.width << 16)); + so_data(so, (nv50->framebuffer.height << 16)); } so_ref(so, &nv50->state.scissor); so_ref(NULL, &so); @@ -263,20 +299,22 @@ scissor_uptodate: so = so_new(12, 0); if (!bypass) { - so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3); so_data (so, fui(nv50->viewport.translate[0])); so_data (so, fui(nv50->viewport.translate[1])); so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3); so_data (so, fui(nv50->viewport.scale[0])); - so_data (so, fui(-nv50->viewport.scale[1])); + so_data (so, fui(nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); - so_method(so, tesla, 0x192c, 1); + + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); so_data (so, 1); + /* no idea what 0f90 does */ so_method(so, tesla, 0x0f90, 1); so_data (so, 0); } else { - so_method(so, tesla, 0x192c, 1); + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); so_data (so, 0); so_method(so, tesla, 0x0f90, 1); so_data (so, 1); @@ -292,9 +330,10 @@ viewport_uptodate: int i; so = so_new(nv50->sampler_nr * 8 + 3, 0); - so_method(so, tesla, 0x0f00, 1); + so_method(so, tesla, NV50TCL_CB_ADDR, 1); so_data (so, NV50_CB_TSC); - so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8); + so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, + nv50->sampler_nr * 8); for (i = 0; i < nv50->sampler_nr; i++) so_datap (so, nv50->sampler[i]->tsc, 8); so_ref(so, &nv50->state.tsc_upload); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 3da9d6e728..edaf4b055a 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -35,13 +35,13 @@ nv50_format(enum pipe_format format) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - return NV50_2D_DST_FORMAT_32BPP; + return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM; case PIPE_FORMAT_X8R8G8B8_UNORM: - return NV50_2D_DST_FORMAT_24BPP; + return NV50_2D_DST_FORMAT_X8R8G8B8_UNORM; case PIPE_FORMAT_R5G6B5_UNORM: - return NV50_2D_DST_FORMAT_16BPP; + return NV50_2D_DST_FORMAT_R5G6B5_UNORM; case PIPE_FORMAT_A8_UNORM: - return NV50_2D_DST_FORMAT_8BPP; + return NV50_2D_DST_FORMAT_R8_UNORM; default: return -1; } @@ -144,7 +144,7 @@ nv50_surface_copy(struct pipe_context *pipe, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nv50_context *nv50 = nv50_context(pipe); struct nv50_screen *screen = nv50->screen; assert(src->format == dest->format); @@ -158,7 +158,7 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, unsigned destx, unsigned desty, unsigned width, unsigned height, unsigned value) { - struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nv50_context *nv50 = nv50_context(pipe); struct nv50_screen *screen = nv50->screen; struct nouveau_channel *chan = screen->eng2d->channel; struct nouveau_grobj *eng2d = screen->eng2d; diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index ff40c2ad81..14c68b96e1 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -32,30 +32,30 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, switch (mt->base.format) { case PIPE_FORMAT_A8R8G8B8_UNORM: so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | NV50TIC_0_0_FMT_8_8_8_8); break; case PIPE_FORMAT_A1R5G5B5_UNORM: so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | NV50TIC_0_0_FMT_1_5_5_5); break; case PIPE_FORMAT_A4R4G4B4_UNORM: so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | NV50TIC_0_0_FMT_4_4_4_4); break; case PIPE_FORMAT_R5G6B5_UNORM: so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | NV50TIC_0_0_FMT_5_6_5); break; case PIPE_FORMAT_L8_UNORM: @@ -145,25 +145,28 @@ nv50_tex_validate(struct nv50_context *nv50) push += MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2; so = so_new(push, nv50->miptree_nr * 2); - so_method(so, tesla, 0x0f00, 1); + so_method(so, tesla, NV50TCL_CB_ADDR, 1); so_data (so, NV50_CB_TIC); for (unit = 0; unit < nv50->miptree_nr; unit++) { struct nv50_miptree *mt = nv50->miptree[unit]; - so_method(so, tesla, 0x40000f04, 8); + so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, 8); if (nv50_tex_construct(nv50, so, mt, unit)) { NOUVEAU_ERR("failed tex validate\n"); so_ref(NULL, &so); return; } - so_method(so, tesla, 0x1458, 1); - so_data (so, (unit << 9) | (unit << 1) | 1); + so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1); + so_data (so, (unit << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) | + (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | + NV50TCL_SET_SAMPLER_TEX_VALID); } for (; unit < nv50->state.miptree_nr; unit++) { - so_method(so, tesla, 0x1458, 1); - so_data (so, (unit << 1) | 0); + so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1); + so_data (so, + (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0); } so_ref(so, &nv50->state.tic_upload); diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h index aca622c73b..207fb039f7 100644 --- a/src/gallium/drivers/nv50/nv50_texture.h +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -14,13 +14,13 @@ #define NV50TIC_0_0_MAPA_C2 0x20000000 #define NV50TIC_0_0_MAPA_C3 0x28000000 #define NV50TIC_0_0_MAPA_ONE 0x38000000 -#define NV50TIC_0_0_MAPR_MASK 0x07000000 -#define NV50TIC_0_0_MAPR_ZERO 0x00000000 -#define NV50TIC_0_0_MAPR_C0 0x02000000 -#define NV50TIC_0_0_MAPR_C1 0x03000000 -#define NV50TIC_0_0_MAPR_C2 0x04000000 -#define NV50TIC_0_0_MAPR_C3 0x05000000 -#define NV50TIC_0_0_MAPR_ONE 0x07000000 +#define NV50TIC_0_0_MAPB_MASK 0x07000000 +#define NV50TIC_0_0_MAPB_ZERO 0x00000000 +#define NV50TIC_0_0_MAPB_C0 0x02000000 +#define NV50TIC_0_0_MAPB_C1 0x03000000 +#define NV50TIC_0_0_MAPB_C2 0x04000000 +#define NV50TIC_0_0_MAPB_C3 0x05000000 +#define NV50TIC_0_0_MAPB_ONE 0x07000000 #define NV50TIC_0_0_MAPG_MASK 0x00e00000 #define NV50TIC_0_0_MAPG_ZERO 0x00000000 #define NV50TIC_0_0_MAPG_C0 0x00400000 @@ -28,31 +28,49 @@ #define NV50TIC_0_0_MAPG_C2 0x00800000 #define NV50TIC_0_0_MAPG_C3 0x00a00000 #define NV50TIC_0_0_MAPG_ONE 0x00e00000 -#define NV50TIC_0_0_MAPB_MASK 0x001c0000 -#define NV50TIC_0_0_MAPB_ZERO 0x00000000 -#define NV50TIC_0_0_MAPB_C0 0x00080000 -#define NV50TIC_0_0_MAPB_C1 0x000c0000 -#define NV50TIC_0_0_MAPB_C2 0x00100000 -#define NV50TIC_0_0_MAPB_C3 0x00140000 -#define NV50TIC_0_0_MAPB_ONE 0x001c0000 +#define NV50TIC_0_0_MAPR_MASK 0x001c0000 +#define NV50TIC_0_0_MAPR_ZERO 0x00000000 +#define NV50TIC_0_0_MAPR_C0 0x00080000 +#define NV50TIC_0_0_MAPR_C1 0x000c0000 +#define NV50TIC_0_0_MAPR_C2 0x00100000 +#define NV50TIC_0_0_MAPR_C3 0x00140000 +#define NV50TIC_0_0_MAPR_ONE 0x001c0000 #define NV50TIC_0_0_TYPEA_MASK 0x00038000 #define NV50TIC_0_0_TYPEA_UNORM 0x00010000 -#define NV50TIC_0_0_TYPER_MASK 0x00007000 -#define NV50TIC_0_0_TYPER_UNORM 0x00002000 +#define NV50TIC_0_0_TYPEA_SNORM 0x00008000 +#define NV50TIC_0_0_TYPEA_FLOAT 0x00038000 +#define NV50TIC_0_0_TYPEB_MASK 0x00007000 +#define NV50TIC_0_0_TYPEB_UNORM 0x00002000 +#define NV50TIC_0_0_TYPEB_SNORM 0x00001000 +#define NV50TIC_0_0_TYPEB_FLOAT 0x00007000 #define NV50TIC_0_0_TYPEG_MASK 0x00000e00 #define NV50TIC_0_0_TYPEG_UNORM 0x00000400 -#define NV50TIC_0_0_TYPEB_MASK 0x000001c0 -#define NV50TIC_0_0_TYPEB_UNORM 0x00000080 -#define NV50TIC_0_0_FMT_MASK 0x0000003c +#define NV50TIC_0_0_TYPEG_SNORM 0x00000200 +#define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00 +#define NV50TIC_0_0_TYPER_MASK 0x000001c0 +#define NV50TIC_0_0_TYPER_UNORM 0x00000080 +#define NV50TIC_0_0_TYPER_SNORM 0x00000040 +#define NV50TIC_0_0_TYPER_FLOAT 0x000001c0 +#define NV50TIC_0_0_FMT_MASK 0x0000003f +#define NV50TIC_0_0_FMT_32_32_32_32 0x00000001 +#define NV50TIC_0_0_FMT_16_16_16_16 0x00000003 +#define NV50TIC_0_0_FMT_32_32 0x00000004 #define NV50TIC_0_0_FMT_8_8_8_8 0x00000008 +#define NV50TIC_0_0_FMT_2_10_10_10 0x00000009 +#define NV50TIC_0_0_FMT_32 0x0000000f #define NV50TIC_0_0_FMT_4_4_4_4 0x00000012 -#define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 +/* #define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 */ +#define NV50TIC_0_0_FMT_1_5_5_5 0x00000014 #define NV50TIC_0_0_FMT_5_6_5 0x00000015 #define NV50TIC_0_0_FMT_8_8 0x00000018 +#define NV50TIC_0_0_FMT_16 0x0000001b #define NV50TIC_0_0_FMT_8 0x0000001d +#define NV50TIC_0_0_FMT_10_11_11 0x00000021 #define NV50TIC_0_0_FMT_DXT1 0x00000024 #define NV50TIC_0_0_FMT_DXT3 0x00000025 #define NV50TIC_0_0_FMT_DXT5 0x00000026 +#define NV50TIC_0_0_FMT_RGTC1 0x00000027 +#define NV50TIC_0_0_FMT_RGTC2 0x00000028 #define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff #define NV50TIC_0_1_OFFSET_LOW_SHIFT 0 @@ -102,6 +120,7 @@ #define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140 #define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180 #define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0 +#define NV50TSC_1_0_MAX_ANISOTROPY_MASK 0x00700000 #define NV50TSC_1_1_MAGF_MASK 0x00000003 #define NV50TSC_1_1_MAGF_NEAREST 0x00000001 @@ -113,17 +132,19 @@ #define NV50TSC_1_1_MIPF_NONE 0x00000040 #define NV50TSC_1_1_MIPF_NEAREST 0x00000080 #define NV50TSC_1_1_MIPF_LINEAR 0x000000c0 +#define NV50TSC_1_1_LOD_BIAS_MASK 0x01fff000 -#define NV50TSC_1_2_UNKNOWN_MASK 0xffffffff +#define NV50TSC_1_2_MIN_LOD_MASK 0x00000f00 +#define NV50TSC_1_2_MAX_LOD_MASK 0x00f00000 #define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff -#define NV50TSC_1_4_UNKNOWN_MASK 0xffffffff +#define NV50TSC_1_4_BORDER_COLOR_RED_MASK 0xffffffff -#define NV50TSC_1_5_UNKNOWN_MASK 0xffffffff +#define NV50TSC_1_5_BORDER_COLOR_GREEN_MASK 0xffffffff -#define NV50TSC_1_6_UNKNOWN_MASK 0xffffffff +#define NV50TSC_1_6_BORDER_COLOR_BLUE_MASK 0xffffffff -#define NV50TSC_1_7_UNKNOWN_MASK 0xffffffff +#define NV50TSC_1_7_BORDER_COLOR_ALPHA_MASK 0xffffffff #endif diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index d0b7f0bef4..d2b5e4d75d 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -8,6 +8,7 @@ struct nv50_transfer { struct pipe_transfer base; struct nouveau_bo *bo; unsigned level_offset; + unsigned level_tiling; int level_pitch; int level_width; int level_height; @@ -16,11 +17,14 @@ struct nv50_transfer { }; static void -nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo, - unsigned src_offset, int src_pitch, int sx, int sy, - int sw, int sh, struct nouveau_bo *dst_bo, - unsigned dst_offset, int dst_pitch, int dx, int dy, - int dw, int dh, int cpp, int width, int height, +nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, + struct nouveau_bo *src_bo, unsigned src_offset, + int src_pitch, unsigned src_tile_mode, + int sx, int sy, int sw, int sh, + struct nouveau_bo *dst_bo, unsigned dst_offset, + int dst_pitch, unsigned dst_tile_mode, + int dx, int dy, int dw, int dh, + int cpp, int width, int height, unsigned src_reloc, unsigned dst_reloc) { struct nv50_screen *screen = nv50_screen(pscreen); @@ -33,15 +37,18 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo, WAIT_RING (chan, 14); if (!src_bo->tile_flags) { - BEGIN_RING(chan, m2mf, 0x0200, 1); + BEGIN_RING(chan, m2mf, + NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); - BEGIN_RING(chan, m2mf, 0x0314, 1); + BEGIN_RING(chan, m2mf, + NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); OUT_RING (chan, src_pitch); src_offset += (sy * src_pitch) + (sx * cpp); } else { - BEGIN_RING(chan, m2mf, 0x0200, 6); + BEGIN_RING(chan, m2mf, + NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 6); OUT_RING (chan, 0); - OUT_RING (chan, src_bo->tile_mode << 4); + OUT_RING (chan, src_tile_mode << 4); OUT_RING (chan, sw * cpp); OUT_RING (chan, sh); OUT_RING (chan, 1); @@ -49,15 +56,18 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo, } if (!dst_bo->tile_flags) { - BEGIN_RING(chan, m2mf, 0x021c, 1); + BEGIN_RING(chan, m2mf, + NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); - BEGIN_RING(chan, m2mf, 0x0318, 1); + BEGIN_RING(chan, m2mf, + NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); OUT_RING (chan, dst_pitch); dst_offset += (dy * dst_pitch) + (dx * cpp); } else { - BEGIN_RING(chan, m2mf, 0x021c, 6); + BEGIN_RING(chan, m2mf, + NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 6); OUT_RING (chan, 0); - OUT_RING (chan, dst_bo->tile_mode << 4); + OUT_RING (chan, dst_tile_mode << 4); OUT_RING (chan, dw * cpp); OUT_RING (chan, dh); OUT_RING (chan, 1); @@ -68,25 +78,30 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo, int line_count = height > 2047 ? 2047 : height; WAIT_RING (chan, 15); - BEGIN_RING(chan, m2mf, 0x0238, 2); + BEGIN_RING(chan, m2mf, + NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2); OUT_RELOCh(chan, src_bo, src_offset, src_reloc); OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc); - BEGIN_RING(chan, m2mf, 0x030c, 2); + BEGIN_RING(chan, m2mf, + NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); if (src_bo->tile_flags) { - BEGIN_RING(chan, m2mf, 0x0218, 1); - OUT_RING (chan, (dy << 16) | sx); + BEGIN_RING(chan, m2mf, + NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1); + OUT_RING (chan, (sy << 16) | sx); } else { src_offset += (line_count * src_pitch); } if (dst_bo->tile_flags) { - BEGIN_RING(chan, m2mf, 0x0234, 1); - OUT_RING (chan, (sy << 16) | dx); + BEGIN_RING(chan, m2mf, + NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1); + OUT_RING (chan, (dy << 16) | dx); } else { dst_offset += (line_count * dst_pitch); } - BEGIN_RING(chan, m2mf, 0x031c, 4); + BEGIN_RING(chan, m2mf, + NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); OUT_RING (chan, width * cpp); OUT_RING (chan, line_count); OUT_RING (chan, 0x00000101); @@ -136,6 +151,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->level_width = mt->base.width[level]; tx->level_height = mt->base.height[level]; tx->level_offset = lvl->image_offset[image]; + tx->level_tiling = lvl->tile_mode; tx->level_x = x; tx->level_y = y; ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, @@ -147,9 +163,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (usage != PIPE_TRANSFER_WRITE) { nv50_transfer_rect_m2mf(pscreen, mt->bo, tx->level_offset, - tx->level_pitch, x, y, tx->level_width, - tx->level_height, tx->bo, 0, - tx->base.stride, 0, 0, + tx->level_pitch, tx->level_tiling, + x, y, + tx->level_width, tx->level_height, + tx->bo, 0, tx->base.stride, + tx->bo->tile_mode, 0, 0, tx->base.width, tx->base.height, tx->base.block.size, w, h, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART, @@ -168,12 +186,14 @@ nv50_transfer_del(struct pipe_transfer *ptx) if (ptx->usage != PIPE_TRANSFER_READ) { struct pipe_screen *pscreen = ptx->texture->screen; nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride, - 0, 0, tx->base.width, tx->base.height, - mt->bo, tx->level_offset, - tx->level_pitch, tx->level_x, - tx->level_y, tx->level_width, - tx->level_height, tx->base.block.size, + tx->bo->tile_mode, 0, 0, tx->base.width, tx->base.height, + mt->bo, tx->level_offset, + tx->level_pitch, tx->level_tiling, + tx->level_x, tx->level_y, + tx->level_width, tx->level_height, + tx->base.block.size, tx->base.width, + tx->base.height, NOUVEAU_BO_GART, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f81929f238..17283f3f41 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -49,6 +49,57 @@ nv50_prim(unsigned mode) return NV50TCL_VERTEX_BEGIN_POINTS; } +static INLINE unsigned +nv50_vtxeltfmt(unsigned pf) +{ + static const uint8_t vtxelt_32[4] = { 0x90, 0x20, 0x10, 0x08 }; + static const uint8_t vtxelt_16[4] = { 0xd8, 0x78, 0x28, 0x18 }; + static const uint8_t vtxelt_08[4] = { 0xe8, 0xc0, 0x98, 0x50 }; + + unsigned nf, c = 0; + + switch (pf_type(pf)) { + case PIPE_FORMAT_TYPE_FLOAT: + nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT; break; + case PIPE_FORMAT_TYPE_UNORM: + nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM; break; + case PIPE_FORMAT_TYPE_SNORM: + nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM; break; + case PIPE_FORMAT_TYPE_USCALED: + nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED; break; + case PIPE_FORMAT_TYPE_SSCALED: + nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED; break; + default: + NOUVEAU_ERR("invalid vbo type %d\n",pf_type(pf)); + assert(0); + nf = NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT; + break; + } + + if (pf_size_y(pf)) c++; + if (pf_size_z(pf)) c++; + if (pf_size_w(pf)) c++; + + if (pf_exp2(pf) == 3) { + switch (pf_size_x(pf)) { + case 1: return (nf | (vtxelt_08[c] << 16)); + case 2: return (nf | (vtxelt_16[c] << 16)); + case 4: return (nf | (vtxelt_32[c] << 16)); + default: + break; + } + } else + if (pf_exp2(pf) == 6 && pf_size_x(pf) == 1) { + NOUVEAU_ERR("unsupported vbo component size 64\n"); + assert(0); + return (nf | 0x08000000); + } + + NOUVEAU_ERR("invalid vbo format %s\n",pf_name(pf)); + assert(0); + return (nf | 0x08000000); +} + boolean nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) @@ -139,7 +190,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, } static INLINE void -nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map, +nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, unsigned start, unsigned count) { struct nouveau_channel *chan = nv50->screen->tesla->channel; @@ -208,9 +259,14 @@ nv50_vbo_validate(struct nv50_context *nv50) struct nouveau_stateobj *vtxbuf, *vtxfmt; int i; + /* don't validate if Gallium took away our buffers */ + if (nv50->vtxbuf_nr == 0) + return; + vtxbuf = so_new(nv50->vtxelt_nr * 4, nv50->vtxelt_nr * 2); vtxfmt = so_new(nv50->vtxelt_nr + 1, 0); - so_method(vtxfmt, tesla, 0x1ac0, nv50->vtxelt_nr); + so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), + nv50->vtxelt_nr); for (i = 0; i < nv50->vtxelt_nr; i++) { struct pipe_vertex_element *ve = &nv50->vtxelt[i]; @@ -218,32 +274,9 @@ nv50_vbo_validate(struct nv50_context *nv50) &nv50->vtxbuf[ve->vertex_buffer_index]; struct nouveau_bo *bo = nouveau_bo(vb->buffer); - switch (ve->src_format) { - case PIPE_FORMAT_R32G32B32A32_FLOAT: - so_data(vtxfmt, 0x7e080000 | i); - break; - case PIPE_FORMAT_R32G32B32_FLOAT: - so_data(vtxfmt, 0x7e100000 | i); - break; - case PIPE_FORMAT_R32G32_FLOAT: - so_data(vtxfmt, 0x7e200000 | i); - break; - case PIPE_FORMAT_R32_FLOAT: - so_data(vtxfmt, 0x7e900000 | i); - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - so_data(vtxfmt, 0x24500000 | i); - break; - default: - { - NOUVEAU_ERR("invalid vbo format %s\n", - pf_name(ve->src_format)); - assert(0); - return; - } - } + so_data(vtxfmt, nv50_vtxeltfmt(ve->src_format) | i); - so_method(vtxbuf, tesla, 0x900 + (i * 16), 3); + so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3); so_data (vtxbuf, 0x20000000 | vb->stride); so_reloc (vtxbuf, bo, vb->buffer_offset + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index faceec9842..d7a2c8c462 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -9,7 +9,6 @@ C_SOURCES = \ r300_chipset.c \ r300_clear.c \ r300_context.c \ - r300_debug.c \ r300_emit.c \ r300_flush.c \ r300_fs.c \ @@ -21,6 +20,22 @@ C_SOURCES = \ r300_state_invariant.c \ r300_vs.c \ r300_surface.c \ - r300_texture.c + r300_texture.c \ + r300_tgsi_to_rc.c + +LIBRARY_INCLUDES = \ + -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ + -I$(TOP)/src/mesa \ + -I$(TOP)/include + +COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a + +EXTRA_OBJECTS = \ + $(COMPILER_ARCHIVE) include ../../Makefile.template + +.PHONY : $(COMPILER_ARCHIVE) + +$(COMPILER_ARCHIVE): + cd $(TOP)/src/mesa/drivers/dri/r300/compiler; make diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 233a32b53c..c8510bc63e 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -52,7 +52,7 @@ static boolean r300_draw_range_elements(struct pipe_context* pipe, draw_set_mapped_constant_buffer(r300->draw, r300->shader_constants[PIPE_SHADER_VERTEX].constants, - r300->shader_constants[PIPE_SHADER_VERTEX].user_count * + r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); draw_arrays(r300->draw, mode, start, count); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index d891fd6265..fc8a449893 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -34,6 +34,9 @@ #include "r300_screen.h" #include "r300_winsys.h" +struct r300_fragment_shader; +struct r300_vertex_shader; + struct r300_blend_state { uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ @@ -143,71 +146,10 @@ struct r300_constant_buffer { /* Buffer of constants */ /* XXX first number should be raised */ float constants[32][4]; - /* Number of user-defined constants */ - unsigned user_count; /* Total number of constants */ unsigned count; }; -struct r300_fragment_shader { - /* Parent class */ - struct pipe_shader_state state; - struct tgsi_shader_info info; - - /* Has this shader been translated yet? */ - boolean translated; - - /* Pixel stack size */ - int stack_size; - - /* Are there immediates in this shader? - * If not, we can heavily optimize recompilation. */ - boolean uses_imms; -}; - -struct r3xx_fragment_shader { - /* Parent class */ - struct r300_fragment_shader shader; - - /* Number of ALU instructions */ - int alu_instruction_count; - - /* Number of texture instructions */ - int tex_instruction_count; - - /* Number of texture indirections */ - int indirections; - - /* Indirection node offsets */ - int alu_offset[4]; - - /* Machine instructions */ - struct { - uint32_t alu_rgb_inst; - uint32_t alu_rgb_addr; - uint32_t alu_alpha_inst; - uint32_t alu_alpha_addr; - } instructions[64]; /* XXX magic num */ -}; - -struct r5xx_fragment_shader { - /* Parent class */ - struct r300_fragment_shader shader; - - /* Number of used instructions */ - int instruction_count; - - /* Machine instructions */ - struct { - uint32_t inst0; - uint32_t inst1; - uint32_t inst2; - uint32_t inst3; - uint32_t inst4; - uint32_t inst5; - } instructions[256]; /*< XXX magic number */ -}; - struct r300_texture { /* Parent class */ struct pipe_texture tex; @@ -242,33 +184,6 @@ struct r300_vertex_format { int fs_tab[16]; }; -struct r300_vertex_shader { - /* Parent class */ - struct pipe_shader_state state; - struct tgsi_shader_info info; - - /* Fallback shader, because Draw has issues */ - struct draw_vertex_shader* draw; - - /* Has this shader been translated yet? */ - boolean translated; - - /* Are there immediates in this shader? - * If not, we can heavily optimize recompilation. */ - boolean uses_imms; - - /* Number of used instructions */ - int instruction_count; - - /* Machine instructions */ - struct { - uint32_t inst0; - uint32_t inst1; - uint32_t inst2; - uint32_t inst3; - } instructions[128]; /*< XXX magic number */ -}; - static struct pipe_viewport_state r300_viewport_identity = { .scale = {1.0, 1.0, 1.0, 1.0}, .translate = {0.0, 0.0, 0.0, 0.0}, diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c deleted file mode 100644 index c83e8526cf..0000000000 --- a/src/gallium/drivers/r300/r300_debug.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r300_debug.h" - -void r3xx_dump_fs(struct r3xx_fragment_shader* fs) -{ - int i; - - for (i = 0; i < fs->alu_instruction_count; i++) { - } -} - -void r5xx_fs_dump(struct r5xx_fragment_shader* fs) -{ - int i; - uint32_t inst; - - for (i = 0; i < fs->instruction_count; i++) { - inst = fs->instructions[i].inst0; - debug_printf("%d: 0: CMN_INST 0x%08x:", i, inst); - switch (inst & 0x3) { - case R500_INST_TYPE_ALU: - debug_printf("ALU "); - break; - case R500_INST_TYPE_OUT: - debug_printf("OUT "); - break; - case R500_INST_TYPE_FC: - debug_printf("FC "); - break; - case R500_INST_TYPE_TEX: - debug_printf("TEX "); - break; - } - debug_printf("%s %s %s %s ", - inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", - inst & R500_INST_LAST ? "LAST" : "", - inst & R500_INST_NOP ? "NOP" : "", - inst & R500_INST_ALU_WAIT ? "ALU_WAIT" : ""); - debug_printf("wmask: %s omask: %s\n", - r5xx_fs_mask[(inst >> 11) & 0xf], - r5xx_fs_mask[(inst >> 15) & 0xf]); - switch (inst & 0x3) { - case R500_INST_TYPE_ALU: - case R500_INST_TYPE_OUT: - inst = fs->instructions[i].inst1; - debug_printf(" 1: RGB_ADDR 0x%08x:", inst); - debug_printf("Addr0: %d%c, Addr1: %d%c, " - "Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1 << 8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't', - (inst >> 30)); - - inst = fs->instructions[i].inst2; - debug_printf(" 2: ALPHA_ADDR 0x%08x:", inst); - debug_printf("Addr0: %d%c, Addr1: %d%c, " - "Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1 << 8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't', - (inst >> 30)); - - inst = fs->instructions[i].inst3; - debug_printf(" 3: RGB_INST 0x%08x:", inst); - debug_printf("rgb_A_src:%d %s/%s/%s %d " - "rgb_B_src:%d %s/%s/%s %d\n", - inst & 0x3, r5xx_fs_swiz[(inst >> 2) & 0x7], - r5xx_fs_swiz[(inst >> 5) & 0x7], - r5xx_fs_swiz[(inst >> 8) & 0x7], - (inst >> 11) & 0x3, (inst >> 13) & 0x3, - r5xx_fs_swiz[(inst >> 15) & 0x7], - r5xx_fs_swiz[(inst >> 18) & 0x7], - r5xx_fs_swiz[(inst >> 21) & 0x7], - (inst >> 24) & 0x3); - - inst = fs->instructions[i].inst4; - debug_printf(" 4: ALPHA_INST 0x%08x:", inst); - debug_printf("%s dest:%d%s alp_A_src:%d %s %d " - "alp_B_src:%d %s %d w:%d\n", - r5xx_fs_op_alpha[inst & 0xf], (inst >> 4) & 0x7f, - inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, - r5xx_fs_swiz[(inst >> 14) & 0x7], (inst >> 17) & 0x3, - (inst >> 19) & 0x3, r5xx_fs_swiz[(inst >> 21) & 0x7], - (inst >> 24) & 0x3, (inst >> 31) & 0x1); - - inst = fs->instructions[i].inst5; - debug_printf(" 5: RGBA_INST 0x%08x:", inst); - debug_printf("%s dest:%d%s rgb_C_src:%d %s/%s/%s %d " - "alp_C_src:%d %s %d\n", - r5xx_fs_op_rgb[inst & 0xf], (inst >> 4) & 0x7f, - inst & (1 << 11) ? "(rel)":"", (inst >> 12) & 0x3, - r5xx_fs_swiz[(inst >> 14) & 0x7], - r5xx_fs_swiz[(inst >> 17) & 0x7], - r5xx_fs_swiz[(inst >> 20) & 0x7], - (inst >> 23) & 0x3, (inst >> 25) & 0x3, - r5xx_fs_swiz[(inst >> 27) & 0x7], (inst >> 30) & 0x3); - break; - case R500_INST_TYPE_FC: - /* XXX don't even bother yet */ - break; - case R500_INST_TYPE_TEX: - inst = fs->instructions[i].inst1; - debug_printf(" 1: TEX_INST 0x%08x: id: %d " - "op:%s, %s, %s %s\n", - inst, (inst >> 16) & 0xf, - r5xx_fs_tex[(inst >> 22) & 0x7], - (inst & (1 << 25)) ? "ACQ" : "", - (inst & (1 << 26)) ? "IGNUNC" : "", - (inst & (1 << 27)) ? "UNSCALED" : "SCALED"); - - inst = fs->instructions[i].inst2; - debug_printf(" 2: TEX_ADDR 0x%08x: " - "src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", - inst, inst & 0x7f, inst & (1 << 7) ? "(rel)" : "", - r5xx_fs_swiz[(inst >> 8) & 0x3], - r5xx_fs_swiz[(inst >> 10) & 0x3], - r5xx_fs_swiz[(inst >> 12) & 0x3], - r5xx_fs_swiz[(inst >> 14) & 0x3], - (inst >> 16) & 0x7f, inst & (1 << 23) ? "(rel)" : "", - r5xx_fs_swiz[(inst >> 24) & 0x3], - r5xx_fs_swiz[(inst >> 26) & 0x3], - r5xx_fs_swiz[(inst >> 28) & 0x3], - r5xx_fs_swiz[(inst >> 30) & 0x3]); - - inst = fs->instructions[i].inst3; - debug_printf(" 3: TEX_DXDY 0x%08x\n", inst); - break; - } - } -} - -static void r300_vs_op_dump(uint32_t op) -{ - debug_printf(" dst: %d%s op: ", - (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); - if (op & 0x80) { - if (op & 0x1) { - debug_printf("PVS_MACRO_OP_2CLK_M2X_ADD\n"); - } else { - debug_printf(" PVS_MACRO_OP_2CLK_MADD\n"); - } - } else if (op & 0x40) { - debug_printf("%s\n", r300_vs_me_ops[op & 0x1f]); - } else { - debug_printf("%s\n", r300_vs_ve_ops[op & 0x1f]); - } -} - -void r300_vs_src_dump(uint32_t src) -{ - debug_printf(" reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", - (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3], - src & (1 << 25) ? "-" : " ", - r300_vs_swiz_debug[(src >> 13) & 0x7], - src & (1 << 26) ? "-" : " ", - r300_vs_swiz_debug[(src >> 16) & 0x7], - src & (1 << 27) ? "-" : " ", - r300_vs_swiz_debug[(src >> 19) & 0x7], - src & (1 << 28) ? "-" : " ", - r300_vs_swiz_debug[(src >> 22) & 0x7]); -} - -void r300_vs_dump(struct r300_vertex_shader* vs) -{ - int i; - - for (i = 0; i < vs->instruction_count; i++) { - debug_printf("%d: op: 0x%08x", i, vs->instructions[i].inst0); - r300_vs_op_dump(vs->instructions[i].inst0); - debug_printf(" src0: 0x%08x", vs->instructions[i].inst1); - r300_vs_src_dump(vs->instructions[i].inst1); - debug_printf(" src1: 0x%08x", vs->instructions[i].inst2); - r300_vs_src_dump(vs->instructions[i].inst2); - debug_printf(" src2: 0x%08x", vs->instructions[i].inst3); - r300_vs_src_dump(vs->instructions[i].inst3); - } -} diff --git a/src/gallium/drivers/r300/r300_debug.h b/src/gallium/drivers/r300/r300_debug.h deleted file mode 100644 index 6b58c1e250..0000000000 --- a/src/gallium/drivers/r300/r300_debug.h +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_DEBUG_H -#define R300_DEBUG_H - -#include "r300_reg.h" -#include "r300_fs.h" -#include "r300_vs.h" - -static char* r5xx_fs_swiz[] = { - " R", - " G", - " B", - " A", - " 0", - ".5", - " 1", - " U", -}; - -static char* r5xx_fs_op_rgb[] = { - "MAD", - "DP3", - "DP4", - "D2A", - "MIN", - "MAX", - "---", - "CND", - "CMP", - "FRC", - "SOP", - "MDH", - "MDV", -}; - -static char* r5xx_fs_op_alpha[] = { - "MAD", - " DP", - "MIN", - "MAX", - "---", - "CND", - "CMP", - "FRC", - "EX2", - "LN2", - "RCP", - "RSQ", - "SIN", - "COS", - "MDH", - "MDV", -}; - -static char* r5xx_fs_mask[] = { - "NONE", - "R ", - " G ", - "RG ", - " B ", - "R B ", - " GB ", - "RGB ", - " A", - "R A", - " G A", - "RG A", - " BA", - "R BA", - " GBA", - "RGBA", -}; - -static char* r5xx_fs_tex[] = { - " NOP", - " LD", - "TEXKILL", - " PROJ", - "LODBIAS", - " LOD", - " DXDY", -}; - -static char* r300_vs_ve_ops[] = { - /* R300 vector ops */ - " VE_NO_OP", - " VE_DOT_PRODUCT", - " VE_MULTIPLY", - " VE_ADD", - " VE_MULTIPLY_ADD", - " VE_DISTANCE_FACTOR", - " VE_FRACTION", - " VE_MAXIMUM", - " VE_MINIMUM", - "VE_SET_GREATER_THAN_EQUAL", - " VE_SET_LESS_THAN", - " VE_MULTIPLYX2_ADD", - " VE_MULTIPLY_CLAMP", - " VE_FLT2FIX_DX", - " VE_FLT2FIX_DX_RND", - /* R500 vector ops */ - " VE_PRED_SET_EQ_PUSH", - " VE_PRED_SET_GT_PUSH", - " VE_PRED_SET_GTE_PUSH", - " VE_PRED_SET_NEQ_PUSH", - " VE_COND_WRITE_EQ", - " VE_COND_WRITE_GT", - " VE_COND_WRITE_GTE", - " VE_COND_WRITE_NEQ", - " VE_SET_GREATER_THAN", - " VE_SET_EQUAL", - " VE_SET_NOT_EQUAL", - " (reserved)", - " (reserved)", - " (reserved)", -}; - -static char* r300_vs_me_ops[] = { - /* R300 math ops */ - " ME_NO_OP", - " ME_EXP_BASE2_DX", - " ME_LOG_BASE2_DX", - " ME_EXP_BASEE_FF", - " ME_LIGHT_COEFF_DX", - " ME_POWER_FUNC_FF", - " ME_RECIP_DX", - " ME_RECIP_FF", - " ME_RECIP_SQRT_DX", - " ME_RECIP_SQRT_FF", - " ME_MULTIPLY", - " ME_EXP_BASE2_FULL_DX", - " ME_LOG_BASE2_FULL_DX", - " ME_POWER_FUNC_FF_CLAMP_B", - "ME_POWER_FUNC_FF_CLAMP_B1", - "ME_POWER_FUNC_FF_CLAMP_01", - " ME_SIN", - " ME_COS", - /* R500 math ops */ - " ME_LOG_BASE2_IEEE", - " ME_RECIP_IEEE", - " ME_RECIP_SQRT_IEEE", - " ME_PRED_SET_EQ", - " ME_PRED_SET_GT", - " ME_PRED_SET_GTE", - " ME_PRED_SET_NEQ", - " ME_PRED_SET_CLR", - " ME_PRED_SET_INV", - " ME_PRED_SET_POP", - " ME_PRED_SET_RESTORE", - " (reserved)", - " (reserved)", - " (reserved)", -}; - -/* XXX refactor to avoid clashing symbols */ -static char* r300_vs_src_debug[] = { - "t", - "i", - "c", - "a", -}; - -static char* r300_vs_dst_debug[] = { - "t", - "a0", - "o", - "ox", - "a", - "i", - "u", - "u", -}; - -static char* r300_vs_swiz_debug[] = { - "X", - "Y", - "Z", - "W", - "0", - "1", - "U", - "U", -}; - -void r5xx_fs_dump(struct r5xx_fragment_shader* fs); -void r3xx_dump_fs(struct r3xx_fragment_shader* fs); - -void r300_vs_dump(struct r300_vertex_shader* vs); - -#endif /* R300_DEBUG_H */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 7ba56cdc1d..53256fc6dd 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -24,6 +24,9 @@ #include "r300_emit.h" +#include "r300_fs.h" +#include "r300_vs.h" + void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend) { @@ -109,73 +112,158 @@ void r300_emit_dsa_state(struct r300_context* r300, END_CS; } -void r300_emit_fragment_shader(struct r300_context* r300, - struct r3xx_fragment_shader* fs) +static const float * get_shader_constant( + struct r300_context * r300, + struct rc_constant * constant, + struct r300_constant_buffer * externals) +{ + static const float zero[4] = { 0.0, 0.0, 0.0, 0.0 }; + switch(constant->Type) { + case RC_CONSTANT_EXTERNAL: + return externals->constants[constant->u.External]; + + case RC_CONSTANT_IMMEDIATE: + return constant->u.Immediate; + + default: + debug_printf("r300: Implementation error: Unhandled constant type %i\n", + constant->Type); + return zero; + } +} + +/* Convert a normal single-precision float into the 7.16 format + * used by the R300 fragment shader. + */ +static uint32_t pack_float24(float f) { + union { + float fl; + uint32_t u; + } u; + float mantissa; + int exponent; + uint32_t float24 = 0; + + if (f == 0.0) + return 0; + + u.fl = f; + + mantissa = frexpf(f, &exponent); + + /* Handle -ve */ + if (mantissa < 0) { + float24 |= (1 << 23); + mantissa = mantissa * -1.0; + } + /* Handle exponent, bias of 63 */ + exponent += 62; + float24 |= (exponent << 16); + /* Kill 7 LSB of mantissa */ + float24 |= (u.u & 0x7FFFFF) >> 7; + + return float24; +} + +void r300_emit_fragment_program_code(struct r300_context* r300, + struct rX00_fragment_program_code* generic_code, + struct r300_constant_buffer* externals) +{ + struct r300_fragment_program_code * code = &generic_code->code.r300; + struct rc_constant_list * constants = &generic_code->constants; int i; CS_LOCALS(r300); - BEGIN_CS(22); - - OUT_CS_REG(R300_US_CONFIG, fs->indirections); - OUT_CS_REG(R300_US_PIXSIZE, fs->shader.stack_size); - /* XXX figure out exactly how big the sizes are on this reg */ - OUT_CS_REG(R300_US_CODE_OFFSET, 0x40); - /* XXX figure these ones out a bit better kthnx */ - OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_3, 0x40 | R300_RGBA_OUT); - - for (i = 0; i < fs->alu_instruction_count; i++) { - OUT_CS_REG(R300_US_ALU_RGB_INST_0 + (4 * i), - fs->instructions[i].alu_rgb_inst); - OUT_CS_REG(R300_US_ALU_RGB_ADDR_0 + (4 * i), - fs->instructions[i].alu_rgb_addr); - OUT_CS_REG(R300_US_ALU_ALPHA_INST_0 + (4 * i), - fs->instructions[i].alu_alpha_inst); - OUT_CS_REG(R300_US_ALU_ALPHA_ADDR_0 + (4 * i), - fs->instructions[i].alu_alpha_addr); + BEGIN_CS(15 + + code->alu.length * 4 + + (code->tex.length ? (1 + code->tex.length) : 0) + + (constants->Count ? (1 + constants->Count * 4) : 0)); + + OUT_CS_REG(R300_US_CONFIG, code->config); + OUT_CS_REG(R300_US_PIXSIZE, code->pixsize); + OUT_CS_REG(R300_US_CODE_OFFSET, code->code_offset); + + OUT_CS_REG_SEQ(R300_US_CODE_ADDR_0, 4); + for(i = 0; i < 4; ++i) + OUT_CS(code->code_addr[i]); + + OUT_CS_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CS(code->alu.inst[i].rgb_inst); + + OUT_CS_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CS(code->alu.inst[i].rgb_addr); + + OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CS(code->alu.inst[i].alpha_inst); + + OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CS(code->alu.inst[i].alpha_addr); + + if (code->tex.length) { + OUT_CS_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); + for(i = 0; i < code->tex.length; ++i) + OUT_CS(code->tex.inst[i]); + } + + if (constants->Count) { + OUT_CS_ONE_REG(R300_PFS_PARAM_0_X, constants->Count * 4); + for(i = 0; i < constants->Count; ++i) { + const float * data = get_shader_constant(r300, &constants->Constants[i], externals); + OUT_CS(pack_float24(data[0])); + OUT_CS(pack_float24(data[1])); + OUT_CS(pack_float24(data[2])); + OUT_CS(pack_float24(data[3])); + } } END_CS; } -void r500_emit_fragment_shader(struct r300_context* r300, - struct r5xx_fragment_shader* fs) +void r500_emit_fragment_program_code(struct r300_context* r300, + struct rX00_fragment_program_code* generic_code, + struct r300_constant_buffer* externals) { + struct r500_fragment_program_code * code = &generic_code->code.r500; + struct rc_constant_list * constants = &generic_code->constants; int i; - struct r300_constant_buffer* constants = - &r300->shader_constants[PIPE_SHADER_FRAGMENT]; CS_LOCALS(r300); - BEGIN_CS(9 + (fs->instruction_count * 6) + (constants->count ? 3 : 0) + - (constants->count * 4)); - OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); - OUT_CS_REG(R500_US_PIXSIZE, fs->shader.stack_size); - OUT_CS_REG(R500_US_CODE_ADDR, R500_US_CODE_START_ADDR(0) | - R500_US_CODE_END_ADDR(fs->instruction_count)); + BEGIN_CS(13 + + ((code->inst_end + 1) * 6) + + (constants->Count ? (3 + (constants->Count * 4)) : 0)); + OUT_CS_REG(R500_US_CONFIG, 0); + OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx); + OUT_CS_REG(R500_US_CODE_RANGE, + R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); + OUT_CS_REG(R500_US_CODE_OFFSET, 0); + OUT_CS_REG(R500_US_CODE_ADDR, + R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end)); OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, fs->instruction_count * 6); - for (i = 0; i < fs->instruction_count; i++) { - OUT_CS(fs->instructions[i].inst0); - OUT_CS(fs->instructions[i].inst1); - OUT_CS(fs->instructions[i].inst2); - OUT_CS(fs->instructions[i].inst3); - OUT_CS(fs->instructions[i].inst4); - OUT_CS(fs->instructions[i].inst5); - } - - if (constants->count) { - OUT_CS_REG(R500_GA_US_VECTOR_INDEX, - R500_GA_US_VECTOR_INDEX_TYPE_CONST); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->count * 4); - for (i = 0; i < constants->count; i++) { - OUT_CS_32F(constants->constants[i][0]); - OUT_CS_32F(constants->constants[i][1]); - OUT_CS_32F(constants->constants[i][2]); - OUT_CS_32F(constants->constants[i][3]); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6); + for (i = 0; i <= code->inst_end; i++) { + OUT_CS(code->inst[i].inst0); + OUT_CS(code->inst[i].inst1); + OUT_CS(code->inst[i].inst2); + OUT_CS(code->inst[i].inst3); + OUT_CS(code->inst[i].inst4); + OUT_CS(code->inst[i].inst5); + } + + if (constants->Count) { + OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4); + for (i = 0; i < constants->Count; i++) { + const float * data = get_shader_constant(r300, &constants->Constants[i], externals); + OUT_CS_32F(data[0]); + OUT_CS_32F(data[1]); + OUT_CS_32F(data[2]); + OUT_CS_32F(data[3]); } } @@ -190,7 +278,7 @@ void r300_emit_fb_state(struct r300_context* r300, int i; CS_LOCALS(r300); - BEGIN_CS((8 * fb->nr_cbufs) + (fb->zsbuf ? 8 : 0) + 4); + BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4); for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); @@ -199,8 +287,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), pixpitch | - r300_translate_colorformat(tex->tex.format)); + OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); + OUT_CS_RELOC(tex->buffer, pixpitch | + r300_translate_colorformat(tex->tex.format), 0, + RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), r300_translate_out_fmt(fb->cbufs[i]->format)); @@ -216,7 +306,8 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); - OUT_CS_REG(R300_ZB_DEPTHPITCH, pixpitch); + OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); + OUT_CS_RELOC(tex->buffer, pixpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); } OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, @@ -380,13 +471,13 @@ void r300_emit_vertex_format_state(struct r300_context* r300) END_CS; } -void r300_emit_vertex_shader(struct r300_context* r300, - struct r300_vertex_shader* vs) +void r300_emit_vertex_program_code(struct r300_context* r300, + struct r300_vertex_program_code* code, + struct r300_constant_buffer* constants) { int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_constant_buffer* constants = - &r300->shader_constants[PIPE_SHADER_VERTEX]; + unsigned instruction_count = code->length / 4; CS_LOCALS(r300); if (!r300screen->caps->has_tcl) { @@ -395,10 +486,10 @@ void r300_emit_vertex_shader(struct r300_context* r300, return; } - if (constants->count) { - BEGIN_CS(14 + (vs->instruction_count * 4) + (constants->count * 4)); + if (code->constants.Count) { + BEGIN_CS(14 + code->length + (code->constants.Count * 4)); } else { - BEGIN_CS(11 + (vs->instruction_count * 4)); + BEGIN_CS(11 + code->length); } /* R300_VAP_PVS_CODE_CNTL_0 @@ -408,30 +499,27 @@ void r300_emit_vertex_shader(struct r300_context* r300, * XXX these could be optimized to select better values... */ OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3); OUT_CS(R300_PVS_FIRST_INST(0) | - R300_PVS_XYZW_VALID_INST(vs->instruction_count - 1) | - R300_PVS_LAST_INST(vs->instruction_count - 1)); - OUT_CS(R300_PVS_MAX_CONST_ADDR(constants->count - 1)); - OUT_CS(vs->instruction_count - 1); + R300_PVS_XYZW_VALID_INST(instruction_count - 1) | + R300_PVS_LAST_INST(instruction_count - 1)); + OUT_CS(R300_PVS_MAX_CONST_ADDR(code->constants.Count - 1)); + OUT_CS(instruction_count - 1); OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); - OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, vs->instruction_count * 4); - for (i = 0; i < vs->instruction_count; i++) { - OUT_CS(vs->instructions[i].inst0); - OUT_CS(vs->instructions[i].inst1); - OUT_CS(vs->instructions[i].inst2); - OUT_CS(vs->instructions[i].inst3); - } + OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length); + for (i = 0; i < code->length; i++) + OUT_CS(code->body.d[i]); - if (constants->count) { + if (code->constants.Count) { OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, (r300screen->caps->is_r500 ? R500_PVS_CONST_START : R300_PVS_CONST_START)); - OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, constants->count * 4); - for (i = 0; i < constants->count; i++) { - OUT_CS_32F(constants->constants[i][0]); - OUT_CS_32F(constants->constants[i][1]); - OUT_CS_32F(constants->constants[i][2]); - OUT_CS_32F(constants->constants[i][3]); + OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->constants.Count * 4); + for (i = 0; i < code->constants.Count; i++) { + const float * data = get_shader_constant(r300, &code->constants.Constants[i], constants); + OUT_CS_32F(data[0]); + OUT_CS_32F(data[1]); + OUT_CS_32F(data[2]); + OUT_CS_32F(data[3]); } } @@ -443,6 +531,12 @@ void r300_emit_vertex_shader(struct r300_context* r300, END_CS; } +void r300_emit_vertex_shader(struct r300_context* r300, + struct r300_vertex_shader* vs) +{ + r300_emit_vertex_program_code(r300, &vs->code, &r300->shader_constants[PIPE_SHADER_VERTEX]); +} + void r300_emit_viewport_state(struct r300_context* r300, struct r300_viewport_state* viewport) { @@ -531,10 +625,11 @@ validate: } else { debug_printf("No VBO while emitting dirty state!\n"); } - if (r300->winsys->validate(r300->winsys)) { + if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { /* Well, hell. */ + debug_printf("r300: Stuck in validation loop, gonna quit now."); exit(1); } invalid = TRUE; @@ -563,11 +658,9 @@ validate: if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { if (r300screen->caps->is_r500) { - r500_emit_fragment_shader(r300, - (struct r5xx_fragment_shader*)r300->fs); + r500_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]); } else { - r300_emit_fragment_shader(r300, - (struct r3xx_fragment_shader*)r300->fs); + r300_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]); } r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index fda26f3948..350691d592 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -30,6 +30,9 @@ #include "r300_screen.h" #include "r300_state_inlines.h" +struct rX00_fragment_program_code; +struct r300_vertex_program_code; + void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend); @@ -42,11 +45,13 @@ void r300_emit_clip_state(struct r300_context* r300, void r300_emit_dsa_state(struct r300_context* r300, struct r300_dsa_state* dsa); -void r300_emit_fragment_shader(struct r300_context* r300, - struct r3xx_fragment_shader* fs); +void r300_emit_fragment_program_code(struct r300_context* r300, + struct rX00_fragment_program_code* generic_code, + struct r300_constant_buffer* externals); -void r500_emit_fragment_shader(struct r300_context* r300, - struct r5xx_fragment_shader* fs); +void r500_emit_fragment_program_code(struct r300_context* r300, + struct rX00_fragment_program_code* generic_code, + struct r300_constant_buffer* externals); void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb); @@ -68,6 +73,10 @@ void r300_emit_vertex_buffer(struct r300_context* r300); void r300_emit_vertex_format_state(struct r300_context* r300); +void r300_emit_vertex_program_code(struct r300_context* r300, + struct r300_vertex_program_code* code, + struct r300_constant_buffer* constants); + void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 4b304306d0..36463b9a2e 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -23,87 +23,115 @@ #include "r300_fs.h" -void r300_translate_fragment_shader(struct r300_context* r300, - struct r300_fragment_shader* fs) +#include "r300_tgsi_to_rc.h" + +#include "radeon_compiler.h" + +static void find_output_registers(struct r300_fragment_program_compiler * compiler, + struct r300_fragment_shader * fs) { - struct tgsi_parse_context parser; - int i; - boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; - struct r300_constant_buffer* consts = - &r300->shader_constants[PIPE_SHADER_FRAGMENT]; + unsigned i; - struct r300_fs_asm* assembler = CALLOC_STRUCT(r300_fs_asm); - if (assembler == NULL) { - return; - } - /* Setup starting offset for immediates. */ - assembler->imm_offset = consts->user_count; - /* Enable depth writes, if needed. */ - assembler->writes_depth = fs->info.writes_z; - - /* Make sure we start at the beginning of the shader. */ - if (is_r500) { - ((struct r5xx_fragment_shader*)fs)->instruction_count = 0; - } + /* Mark the outputs as not present initially */ + compiler->OutputColor = fs->info.num_outputs; + compiler->OutputDepth = fs->info.num_outputs; - tgsi_parse_init(&parser, fs->state.tokens); + /* Now see where they really are. */ + for(i = 0; i < fs->info.num_outputs; ++i) { + switch(fs->info.output_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + compiler->OutputColor = i; + break; + case TGSI_SEMANTIC_POSITION: + compiler->OutputDepth = i; + break; + } + } +} - while (!tgsi_parse_end_of_tokens(&parser)) { - tgsi_parse_token(&parser); +static void allocate_hardware_inputs( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata) +{ + struct tgsi_shader_info* info = &((struct r300_fragment_shader*)c->UserData)->info; + int total_colors = 0; + int colors = 0; + int total_generic = 0; + int generic = 0; + int i; - /* This is seriously the lamest way to create fragment programs ever. - * I blame TGSI. */ - switch (parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* Allocated registers sitting at the beginning - * of the program. */ - r300_fs_declare(assembler, &parser.FullToken.FullDeclaration); + for (i = 0; i < info->num_inputs; i++) { + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + total_colors++; break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - debug_printf("r300: Emitting immediate to constant buffer, " - "position %d\n", - assembler->imm_offset + assembler->imm_count); - /* I am not amused by the length of these. */ - for (i = 0; i < 4; i++) { - consts->constants[assembler->imm_offset + - assembler->imm_count][i] = - parser.FullToken.FullImmediate.u.ImmediateFloat32[i] - .Float; - } - assembler->imm_count++; + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + total_generic++; break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (is_r500) { - r5xx_fs_instruction((struct r5xx_fragment_shader*)fs, - assembler, &parser.FullToken.FullInstruction); - } else { - r3xx_fs_instruction((struct r3xx_fragment_shader*)fs, - assembler, &parser.FullToken.FullInstruction); - } + } + } + + for(i = 0; i < info->num_inputs; i++) { + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + allocate(mydata, i, colors); + colors++; + break; + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + allocate(mydata, i, total_colors + generic); + generic++; break; } } +} + +void r300_translate_fragment_shader(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + struct r300_fragment_program_compiler compiler; + struct tgsi_to_rc ttr; + + memset(&compiler, 0, sizeof(compiler)); + rc_init(&compiler.Base); + compiler.Base.Debug = 1; + + compiler.code = &fs->code; + compiler.is_r500 = r300_screen(r300->context.screen)->caps->is_r500; + compiler.AllocateHwInputs = &allocate_hardware_inputs; + compiler.UserData = fs; + + /* TODO: Program compilation depends on texture compare modes, + * which are sampler state. Therefore, programs need to be recompiled + * depending on this state as in the classic Mesa driver. + * + * This is not yet handled correctly. + */ - debug_printf("r300: fs: %d texs and %d colors, first free reg is %d\n", - assembler->tex_count, assembler->color_count, - assembler->tex_count + assembler->color_count); - - consts->count = consts->user_count + assembler->imm_count; - fs->uses_imms = assembler->imm_count; - debug_printf("r300: fs: %d total constants, " - "%d from user and %d from immediates\n", consts->count, - consts->user_count, assembler->imm_count); - r3xx_fs_finalize(fs, assembler); - if (is_r500) { - r5xx_fs_finalize((struct r5xx_fragment_shader*)fs, assembler); + find_output_registers(&compiler, fs); + + if (compiler.Base.Debug) { + debug_printf("r300: Initial fragment program\n"); + tgsi_dump(fs->state.tokens, 0); } - tgsi_dump(fs->state.tokens, 0); - /* XXX finish r300 dumper too */ - if (is_r500) { - r5xx_fs_dump((struct r5xx_fragment_shader*)fs); + /* Translate TGSI to our internal representation */ + ttr.compiler = &compiler.Base; + ttr.info = &fs->info; + + r300_tgsi_to_rc(&ttr, fs->state.tokens); + + /* Invoke the compiler */ + r3xx_compile_fragment_program(&compiler); + if (compiler.Base.Error) { + /* Todo: Fail gracefully */ + fprintf(stderr, "r300 FP: Compiler error\n"); + abort(); } - tgsi_parse_free(&parser); - FREE(assembler); + /* And, finally... */ + rc_destroy(&compiler.Base); + fs->translated = TRUE; } diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 18deb7a05e..9fab789402 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -30,6 +30,21 @@ #include "r3xx_fs.h" #include "r5xx_fs.h" +#include "radeon_code.h" + +struct r300_fragment_shader { + /* Parent class */ + struct pipe_shader_state state; + struct tgsi_shader_info info; + + /* Has this shader been translated yet? */ + boolean translated; + + /* Compiled code */ + struct rX00_fragment_program_code code; +}; + + void r300_translate_fragment_shader(struct r300_context* r300, struct r300_fragment_shader* fs); diff --git a/src/gallium/drivers/r300/r300_fs_inlines.h b/src/gallium/drivers/r300/r300_fs_inlines.h deleted file mode 100644 index be4be9465e..0000000000 --- a/src/gallium/drivers/r300/r300_fs_inlines.h +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * Joakim Sindholt <opensource@zhasha.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_FS_INLINES_H -#define R300_FS_INLINES_H - -#include "tgsi/tgsi_parse.h" - -#include "r300_context.h" -#include "r300_debug.h" -#include "r300_reg.h" -#include "r300_screen.h" -#include "r300_shader_inlines.h" - -/* Temporary struct used to hold assembly state while putting together - * fragment programs. */ -struct r300_fs_asm { - /* Pipe context. */ - struct r300_context* r300; - /* Number of colors. */ - unsigned color_count; - /* Number of texcoords. */ - unsigned tex_count; - /* Offset for temporary registers. Inputs and temporaries have no - * distinguishing markings, so inputs start at 0 and the first usable - * temporary register is after all inputs. */ - unsigned temp_offset; - /* Number of requested temporary registers. */ - unsigned temp_count; - /* Offset for immediate constants. Neither R300 nor R500 can do four - * inline constants per source, so instead we copy immediates into the - * constant buffer. */ - unsigned imm_offset; - /* Number of immediate constants. */ - unsigned imm_count; - /* Are depth writes enabled? */ - boolean writes_depth; - /* Depth write offset. This is the TGSI output that corresponds to - * depth writes. */ - unsigned depth_output; -}; - -static INLINE void r300_fs_declare(struct r300_fs_asm* assembler, - struct tgsi_full_declaration* decl) -{ - switch (decl->Declaration.File) { - case TGSI_FILE_INPUT: - switch (decl->Semantic.SemanticName) { - case TGSI_SEMANTIC_COLOR: - assembler->color_count++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - assembler->tex_count++; - break; - default: - debug_printf("r300: fs: Bad semantic declaration %d\n", - decl->Semantic.SemanticName); - break; - } - break; - case TGSI_FILE_OUTPUT: - /* Depth write. Mark the position of the output so we can - * identify it later. */ - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { - assembler->depth_output = decl->DeclarationRange.First; - } - break; - case TGSI_FILE_CONSTANT: - break; - case TGSI_FILE_TEMPORARY: - assembler->temp_count++; - break; - default: - debug_printf("r300: fs: Bad file %d\n", decl->Declaration.File); - break; - } - - assembler->temp_offset = assembler->color_count + assembler->tex_count; -} - -static INLINE unsigned r300_fs_src(struct r300_fs_asm* assembler, - struct tgsi_src_register* src) -{ - switch (src->File) { - case TGSI_FILE_NULL: - return 0; - case TGSI_FILE_INPUT: - /* XXX may be wrong */ - return src->Index; - break; - case TGSI_FILE_TEMPORARY: - return src->Index + assembler->temp_offset; - break; - case TGSI_FILE_IMMEDIATE: - return (src->Index + assembler->imm_offset) | (1 << 8); - break; - case TGSI_FILE_CONSTANT: - /* XXX magic */ - return src->Index | (1 << 8); - break; - default: - debug_printf("r300: fs: Unimplemented src %d\n", src->File); - break; - } - return 0; -} - -static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler, - struct tgsi_dst_register* dst) -{ - switch (dst->File) { - case TGSI_FILE_NULL: - /* This happens during KIL instructions. */ - return 0; - break; - case TGSI_FILE_OUTPUT: - return 0; - break; - case TGSI_FILE_TEMPORARY: - return dst->Index + assembler->temp_offset; - break; - default: - debug_printf("r300: fs: Unimplemented dst %d\n", dst->File); - break; - } - return 0; -} - -static INLINE boolean r300_fs_is_depr(struct r300_fs_asm* assembler, - struct tgsi_dst_register* dst) -{ - return (assembler->writes_depth && - (dst->File == TGSI_FILE_OUTPUT) && - (dst->Index == assembler->depth_output)); -} - -#endif /* R300_FS_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index da1d5ffe2f..96a7304621 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -147,6 +147,8 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_TGSI_CONT_SUPPORTED: /* XXX */ return 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + return 1; default: debug_printf("r300: Implementation error: Bad param %d\n", param); @@ -320,13 +322,14 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans = CALLOC_STRUCT(r300_transfer); if (trans) { pipe_texture_reference(&trans->transfer.texture, texture); - trans->transfer.format = trans->transfer.format; + trans->transfer.format = texture->format; trans->transfer.width = w; trans->transfer.height = h; trans->transfer.block = texture->block; trans->transfer.nblocksx = texture->nblocksx[level]; trans->transfer.nblocksy = texture->nblocksy[level]; - trans->transfer.stride = tex->stride; + trans->transfer.stride = align(pf_get_stride(&trans->transfer.block, + texture->width[level]), 32); trans->transfer.usage = usage; trans->offset = offset; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 68da0aa4cb..a02fb34b2a 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -32,6 +32,7 @@ #include "r300_reg.h" #include "r300_state_inlines.h" #include "r300_fs.h" +#include "r300_vs.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -137,7 +138,6 @@ static void const struct pipe_constant_buffer* buffer) { struct r300_context* r300 = r300_context(pipe); - int i = r300->shader_constants[shader].user_count; /* This entire chunk of code seems ever-so-slightly baked. * It's as if I've got pipe_buffer* matryoshkas... */ @@ -148,26 +148,13 @@ static void map, buffer->buffer->size); pipe->winsys->buffer_unmap(pipe->winsys, buffer->buffer); - r300->shader_constants[shader].user_count = + r300->shader_constants[shader].count = buffer->buffer->size / (sizeof(float) * 4); } else { - r300->shader_constants[shader].user_count = 0; + r300->shader_constants[shader].count = 0; } r300->dirty_state |= R300_NEW_CONSTANTS; - - /* If the number of constants have changed, invalidate the shader. */ - if (r300->shader_constants[shader].user_count != i) { - if (shader == PIPE_SHADER_FRAGMENT && r300->fs && - r300->fs->uses_imms) { - r300->fs->translated = FALSE; - r300_translate_fragment_shader(r300, r300->fs); - } else if (shader == PIPE_SHADER_VERTEX && r300->vs && - r300->vs->uses_imms) { - r300->vs->translated = FALSE; - r300_translate_vertex_shader(r300, r300->vs); - } - } } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -284,14 +271,9 @@ static void static void* r300_create_fs_state(struct pipe_context* pipe, const struct pipe_shader_state* shader) { - struct r300_context* r300 = r300_context(pipe); struct r300_fragment_shader* fs = NULL; - if (r300_screen(r300->context.screen)->caps->is_r500) { - fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r5xx_fragment_shader); - } else { - fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r3xx_fragment_shader); - } + fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader); /* Copy state directly into shader. */ fs->state = *shader; @@ -315,7 +297,6 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300_translate_fragment_shader(r300, fs); } - fs->translated = TRUE; r300->fs = fs; r300->dirty_state |= R300_NEW_FRAGMENT_SHADER; @@ -325,6 +306,7 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) { struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; + rc_constants_destroy(&fs->code.constants); FREE(fs->state.tokens); FREE(shader); } @@ -688,6 +670,7 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) if (r300_screen(pipe->screen)->caps->has_tcl) { struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; + rc_constants_destroy(&vs->code.constants); draw_delete_vertex_shader(r300->draw, vs->draw); FREE(vs->state.tokens); FREE(shader); diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 2477b30822..ea670f41fb 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -22,6 +22,9 @@ #include "r300_state_derived.h" +#include "r300_fs.h" +#include "r300_vs.h" + /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 430129d5bd..1e92374a4e 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -34,7 +34,7 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(22 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(24 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ @@ -56,6 +56,7 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0); + OUT_CS_REG(R300_US_W_FMT, 0x0); /*** VAP ***/ /* Max and min vertex index clamp. */ @@ -72,7 +73,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(71 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); + BEGIN_CS(64 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -132,11 +133,5 @@ void r300_emit_invariant_state(struct r300_context* r300) /* XXX */ OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa); - OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); - OUT_CS(R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A); - OUT_CS(R300_US_OUT_FMT_UNUSED); - OUT_CS(R300_US_OUT_FMT_UNUSED); - OUT_CS(R300_US_OUT_FMT_UNUSED); - OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0); END_CS; } diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index fdabe4d9cf..a093f83945 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -37,7 +37,7 @@ static void r300_surface_setup(struct r300_context* r300, r300_emit_dsa_state(r300, &dsa_clear_state); r300_emit_rs_state(r300, &rs_clear_state); - BEGIN_CS(24); + BEGIN_CS(26); /* Viewport setup */ OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); @@ -78,8 +78,10 @@ static void r300_surface_setup(struct r300_context* r300, /* Setup colorbuffer. */ OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | - r300_translate_colorformat(dest->tex.format)); + OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0, 1); + OUT_CS_RELOC(dest->buffer, pixpitch | + r300_translate_colorformat(dest->tex.format), 0, + RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf); END_CS; @@ -125,9 +127,10 @@ validate: r300->context.flush(&r300->context, 0, NULL); goto validate; } - if (r300->winsys->validate(r300->winsys)) { + if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { + debug_printf("r300: Stuck in validation loop, gonna fallback."); goto fallback; } invalid = TRUE; @@ -138,10 +141,14 @@ validate: /* Vertex shader setup */ if (caps->has_tcl) { - r300_emit_vertex_shader(r300, &r300_passthrough_vertex_shader); + r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0); } else { BEGIN_CS(4); - OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VAP_TCL_BYPASS); + OUT_CS_REG(R300_VAP_CNTL_STATUS, +#ifdef PIPE_ARCH_BIG_ENDIAN + R300_VC_32BIT_SWAP | +#endif + R300_VAP_TCL_BYPASS); OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) | R300_PVS_NUM_CNTLRS(5) | R300_PVS_NUM_FPUS(caps->num_vert_fpus) | @@ -151,10 +158,10 @@ validate: /* Fragment shader setup */ if (caps->is_r500) { - r500_emit_fragment_shader(r300, &r5xx_passthrough_fragment_shader); + r500_emit_fragment_program_code(r300, &r5xx_passthrough_fragment_shader, 0); r300_emit_rs_block_state(r300, &r5xx_rs_block_clear_state); } else { - r300_emit_fragment_shader(r300, &r3xx_passthrough_fragment_shader); + r300_emit_fragment_program_code(r300, &r3xx_passthrough_fragment_shader, 0); r300_emit_rs_block_state(r300, &r3xx_rs_block_clear_state); } @@ -256,9 +263,10 @@ validate: r300->context.flush(&r300->context, 0, NULL); goto validate; } - if (r300->winsys->validate(r300->winsys)) { + if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { + debug_printf("r300: Stuck in validation loop, gonna fallback."); goto fallback; } invalid = TRUE; @@ -275,10 +283,14 @@ validate: /* Vertex shader setup */ if (caps->has_tcl) { - r300_emit_vertex_shader(r300, &r300_passthrough_vertex_shader); + r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0); } else { BEGIN_CS(4); - OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VAP_TCL_BYPASS); + OUT_CS_REG(R300_VAP_CNTL_STATUS, +#ifdef PIPE_ARCH_BIG_ENDIAN + R300_VC_32BIT_SWAP | +#endif + R300_VAP_TCL_BYPASS); OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) | R300_PVS_NUM_CNTLRS(5) | R300_PVS_NUM_FPUS(caps->num_vert_fpus) | @@ -288,10 +300,10 @@ validate: /* Fragment shader setup */ if (caps->is_r500) { - r500_emit_fragment_shader(r300, &r5xx_texture_fragment_shader); + r500_emit_fragment_program_code(r300, &r5xx_texture_fragment_shader, 0); r300_emit_rs_block_state(r300, &r5xx_rs_block_copy_state); } else { - r300_emit_fragment_shader(r300, &r3xx_texture_fragment_shader); + r300_emit_fragment_program_code(r300, &r3xx_texture_fragment_shader, 0); r300_emit_rs_block_state(r300, &r3xx_rs_block_copy_state); } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 11c7858d42..590052509c 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -22,13 +22,6 @@ #include "r300_texture.h" -/* XXX maths need to go to util */ - -static int minify(int i) -{ - return MAX2(1, i >> 1); -} - static void r300_setup_texture_state(struct r300_texture* tex, unsigned width, unsigned height, @@ -55,6 +48,9 @@ static void r300_setup_texture_state(struct r300_texture* tex, if (height > 2048) { state->format2 |= R500_TXHEIGHT_BIT11; } + + debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n", + width, height, pitch, levels); } static void r300_setup_miptree(struct r300_texture* tex) @@ -71,19 +67,23 @@ static void r300_setup_miptree(struct r300_texture* tex) } base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]); - base->nblocksy[i] = pf_get_nblocksy(&base->block, base->width[i]); + base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]); /* Radeons enjoy things in multiples of 64. * * XXX * POT, uncompressed, unmippmapped textures can be aligned to 32, * instead of 64. */ - stride = align(base->nblocksx[i] * base->block.size, 64); + stride = align(pf_get_stride(&base->block, base->width[i]), 32); size = stride * base->nblocksy[i] * base->depth[i]; - tex->offset[i] = align(tex->size, 64); + tex->offset[i] = align(tex->size, 32); tex->size = tex->offset[i] + size; + debug_printf("r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes)\n", + i, base->width[i], base->height[i], base->depth[i], + stride); /* Save stride of first level to the texture. */ if (i == 0) { tex->stride = stride; @@ -111,7 +111,7 @@ static struct pipe_texture* r300_setup_texture_state(tex, template->width[0], template->height[0], template->width[0], template->last_level); - tex->buffer = screen->buffer_create(screen, 64, + tex->buffer = screen->buffer_create(screen, 1024, PIPE_BUFFER_USAGE_PIXEL, tex->size); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c new file mode 100644 index 0000000000..3adbb715f3 --- /dev/null +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -0,0 +1,337 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_tgsi_to_rc.h" + +#include "radeon_compiler.h" +#include "radeon_program.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_util.h" + + +static unsigned translate_opcode(unsigned opcode) +{ + switch(opcode) { + case TGSI_OPCODE_ARL: return OPCODE_ARL; + case TGSI_OPCODE_MOV: return OPCODE_MOV; + case TGSI_OPCODE_LIT: return OPCODE_LIT; + case TGSI_OPCODE_RCP: return OPCODE_RCP; + case TGSI_OPCODE_RSQ: return OPCODE_RSQ; + case TGSI_OPCODE_EXP: return OPCODE_EXP; + case TGSI_OPCODE_LOG: return OPCODE_LOG; + case TGSI_OPCODE_MUL: return OPCODE_MUL; + case TGSI_OPCODE_ADD: return OPCODE_ADD; + case TGSI_OPCODE_DP3: return OPCODE_DP3; + case TGSI_OPCODE_DP4: return OPCODE_DP4; + case TGSI_OPCODE_DST: return OPCODE_DST; + case TGSI_OPCODE_MIN: return OPCODE_MIN; + case TGSI_OPCODE_MAX: return OPCODE_MAX; + case TGSI_OPCODE_SLT: return OPCODE_SLT; + case TGSI_OPCODE_SGE: return OPCODE_SGE; + case TGSI_OPCODE_MAD: return OPCODE_MAD; + case TGSI_OPCODE_SUB: return OPCODE_SUB; + case TGSI_OPCODE_LRP: return OPCODE_LRP; + /* case TGSI_OPCODE_CND: return OPCODE_CND; */ + /* case TGSI_OPCODE_CND0: return OPCODE_CND0; */ + case TGSI_OPCODE_DP2A: return OPCODE_DP2A; + /* gap */ + case TGSI_OPCODE_FRC: return OPCODE_FRC; + /* case TGSI_OPCODE_CLAMP: return OPCODE_CLAMP; */ + case TGSI_OPCODE_FLR: return OPCODE_FLR; + /* case TGSI_OPCODE_ROUND: return OPCODE_ROUND; */ + case TGSI_OPCODE_EX2: return OPCODE_EX2; + case TGSI_OPCODE_LG2: return OPCODE_LG2; + case TGSI_OPCODE_POW: return OPCODE_POW; + case TGSI_OPCODE_XPD: return OPCODE_XPD; + /* gap */ + case TGSI_OPCODE_ABS: return OPCODE_ABS; + case TGSI_OPCODE_RCC: return OPCODE_RCC; + case TGSI_OPCODE_DPH: return OPCODE_DPH; + case TGSI_OPCODE_COS: return OPCODE_COS; + case TGSI_OPCODE_DDX: return OPCODE_DDX; + case TGSI_OPCODE_DDY: return OPCODE_DDY; + /* case TGSI_OPCODE_KILP: return OPCODE_KILP; */ + case TGSI_OPCODE_PK2H: return OPCODE_PK2H; + case TGSI_OPCODE_PK2US: return OPCODE_PK2US; + case TGSI_OPCODE_PK4B: return OPCODE_PK4B; + case TGSI_OPCODE_PK4UB: return OPCODE_PK4UB; + case TGSI_OPCODE_RFL: return OPCODE_RFL; + case TGSI_OPCODE_SEQ: return OPCODE_SEQ; + case TGSI_OPCODE_SFL: return OPCODE_SFL; + case TGSI_OPCODE_SGT: return OPCODE_SGT; + case TGSI_OPCODE_SIN: return OPCODE_SIN; + case TGSI_OPCODE_SLE: return OPCODE_SLE; + case TGSI_OPCODE_SNE: return OPCODE_SNE; + case TGSI_OPCODE_STR: return OPCODE_STR; + case TGSI_OPCODE_TEX: return OPCODE_TEX; + case TGSI_OPCODE_TXD: return OPCODE_TXD; + case TGSI_OPCODE_TXP: return OPCODE_TXP; + case TGSI_OPCODE_UP2H: return OPCODE_UP2H; + case TGSI_OPCODE_UP2US: return OPCODE_UP2US; + case TGSI_OPCODE_UP4B: return OPCODE_UP4B; + case TGSI_OPCODE_UP4UB: return OPCODE_UP4UB; + case TGSI_OPCODE_X2D: return OPCODE_X2D; + case TGSI_OPCODE_ARA: return OPCODE_ARA; + case TGSI_OPCODE_ARR: return OPCODE_ARR; + case TGSI_OPCODE_BRA: return OPCODE_BRA; + case TGSI_OPCODE_CAL: return OPCODE_CAL; + case TGSI_OPCODE_RET: return OPCODE_RET; + case TGSI_OPCODE_SSG: return OPCODE_SSG; + case TGSI_OPCODE_CMP: return OPCODE_CMP; + case TGSI_OPCODE_SCS: return OPCODE_SCS; + case TGSI_OPCODE_TXB: return OPCODE_TXB; + /* case TGSI_OPCODE_NRM: return OPCODE_NRM; */ + /* case TGSI_OPCODE_DIV: return OPCODE_DIV; */ + case TGSI_OPCODE_DP2: return OPCODE_DP2; + case TGSI_OPCODE_TXL: return OPCODE_TXL; + case TGSI_OPCODE_BRK: return OPCODE_BRK; + case TGSI_OPCODE_IF: return OPCODE_IF; + /* case TGSI_OPCODE_LOOP: return OPCODE_LOOP; */ + /* case TGSI_OPCODE_REP: return OPCODE_REP; */ + case TGSI_OPCODE_ELSE: return OPCODE_ELSE; + case TGSI_OPCODE_ENDIF: return OPCODE_ENDIF; + case TGSI_OPCODE_ENDLOOP: return OPCODE_ENDLOOP; + /* case TGSI_OPCODE_ENDREP: return OPCODE_ENDREP; */ + case TGSI_OPCODE_PUSHA: return OPCODE_PUSHA; + case TGSI_OPCODE_POPA: return OPCODE_POPA; + /* case TGSI_OPCODE_CEIL: return OPCODE_CEIL; */ + /* case TGSI_OPCODE_I2F: return OPCODE_I2F; */ + case TGSI_OPCODE_NOT: return OPCODE_NOT; + case TGSI_OPCODE_TRUNC: return OPCODE_TRUNC; + /* case TGSI_OPCODE_SHL: return OPCODE_SHL; */ + /* case TGSI_OPCODE_SHR: return OPCODE_SHR; */ + case TGSI_OPCODE_AND: return OPCODE_AND; + case TGSI_OPCODE_OR: return OPCODE_OR; + /* case TGSI_OPCODE_MOD: return OPCODE_MOD; */ + case TGSI_OPCODE_XOR: return OPCODE_XOR; + /* case TGSI_OPCODE_SAD: return OPCODE_SAD; */ + /* case TGSI_OPCODE_TXF: return OPCODE_TXF; */ + /* case TGSI_OPCODE_TXQ: return OPCODE_TXQ; */ + case TGSI_OPCODE_CONT: return OPCODE_CONT; + /* case TGSI_OPCODE_EMIT: return OPCODE_EMIT; */ + /* case TGSI_OPCODE_ENDPRIM: return OPCODE_ENDPRIM; */ + /* case TGSI_OPCODE_BGNLOOP2: return OPCODE_BGNLOOP2; */ + case TGSI_OPCODE_BGNSUB: return OPCODE_BGNSUB; + /* case TGSI_OPCODE_ENDLOOP2: return OPCODE_ENDLOOP2; */ + case TGSI_OPCODE_ENDSUB: return OPCODE_ENDSUB; + case TGSI_OPCODE_NOISE1: return OPCODE_NOISE1; + case TGSI_OPCODE_NOISE2: return OPCODE_NOISE2; + case TGSI_OPCODE_NOISE3: return OPCODE_NOISE3; + case TGSI_OPCODE_NOISE4: return OPCODE_NOISE4; + case TGSI_OPCODE_NOP: return OPCODE_NOP; + /* gap */ + case TGSI_OPCODE_NRM4: return OPCODE_NRM4; + /* case TGSI_OPCODE_CALLNZ: return OPCODE_CALLNZ; */ + /* case TGSI_OPCODE_IFC: return OPCODE_IFC; */ + /* case TGSI_OPCODE_BREAKC: return OPCODE_BREAKC; */ + case TGSI_OPCODE_KIL: return OPCODE_KIL; + case TGSI_OPCODE_END: return OPCODE_END; + case TGSI_OPCODE_SWZ: return OPCODE_SWZ; + } + + fprintf(stderr, "Unknown opcode: %i\n", opcode); + abort(); +} + +static unsigned translate_saturate(unsigned saturate) +{ + switch(saturate) { + case TGSI_SAT_NONE: return SATURATE_OFF; + case TGSI_SAT_ZERO_ONE: return SATURATE_ZERO_ONE; + case TGSI_SAT_MINUS_PLUS_ONE: return SATURATE_PLUS_MINUS_ONE; + } + + fprintf(stderr, "Unknown saturate mode: %i\n", saturate); + abort(); +} + +static unsigned translate_register_file(unsigned file) +{ + switch(file) { + case TGSI_FILE_CONSTANT: return PROGRAM_CONSTANT; + case TGSI_FILE_IMMEDIATE: return PROGRAM_CONSTANT; + case TGSI_FILE_INPUT: return PROGRAM_INPUT; + case TGSI_FILE_OUTPUT: return PROGRAM_OUTPUT; + case TGSI_FILE_TEMPORARY: return PROGRAM_TEMPORARY; + case TGSI_FILE_ADDRESS: return PROGRAM_ADDRESS; + } + + fprintf(stderr, "Unhandled register file: %i\n", file); + abort(); +} + +static int translate_register_index( + struct tgsi_to_rc * ttr, + unsigned file, + int index) +{ + if (file == TGSI_FILE_IMMEDIATE) + return ttr->immediate_offset + index; + + return index; +} + +static void transform_dstreg( + struct tgsi_to_rc * ttr, + struct prog_dst_register * dst, + struct tgsi_full_dst_register * src) +{ + dst->File = translate_register_file(src->DstRegister.File); + dst->Index = translate_register_index(ttr, src->DstRegister.File, src->DstRegister.Index); + dst->WriteMask = src->DstRegister.WriteMask; + dst->RelAddr = src->DstRegister.Indirect; +} + +static void transform_srcreg( + struct tgsi_to_rc * ttr, + struct prog_src_register * dst, + struct tgsi_full_src_register * src) +{ + dst->File = translate_register_file(src->SrcRegister.File); + dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index); + dst->RelAddr = src->SrcRegister.Indirect; + dst->Swizzle = tgsi_util_get_full_src_register_extswizzle(src, 0); + dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 1) << 3; + dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 2) << 6; + dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 3) << 9; + dst->Abs = src->SrcRegisterExtMod.Absolute; + dst->Negate = + src->SrcRegisterExtSwz.NegateX | + (src->SrcRegisterExtSwz.NegateY << 1) | + (src->SrcRegisterExtSwz.NegateZ << 2) | + (src->SrcRegisterExtSwz.NegateW << 3); + dst->Negate ^= src->SrcRegister.Negate ? NEGATE_XYZW : 0; +} + +static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src) +{ + switch(src.Texture) { + case TGSI_TEXTURE_1D: + dst->I.TexSrcTarget = TEXTURE_1D_INDEX; + break; + case TGSI_TEXTURE_2D: + dst->I.TexSrcTarget = TEXTURE_2D_INDEX; + break; + case TGSI_TEXTURE_3D: + dst->I.TexSrcTarget = TEXTURE_3D_INDEX; + break; + case TGSI_TEXTURE_CUBE: + dst->I.TexSrcTarget = TEXTURE_CUBE_INDEX; + break; + case TGSI_TEXTURE_RECT: + dst->I.TexSrcTarget = TEXTURE_RECT_INDEX; + break; + case TGSI_TEXTURE_SHADOW1D: + dst->I.TexSrcTarget = TEXTURE_1D_INDEX; + dst->I.TexShadow = 1; + break; + case TGSI_TEXTURE_SHADOW2D: + dst->I.TexSrcTarget = TEXTURE_2D_INDEX; + dst->I.TexShadow = 1; + break; + case TGSI_TEXTURE_SHADOWRECT: + dst->I.TexSrcTarget = TEXTURE_RECT_INDEX; + dst->I.TexShadow = 1; + break; + } +} + +static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src) +{ + if (src->Instruction.Opcode == TGSI_OPCODE_END) + return; + + struct rc_instruction * dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev); + int i; + + dst->I.Opcode = translate_opcode(src->Instruction.Opcode); + dst->I.SaturateMode = translate_saturate(src->Instruction.Saturate); + + if (src->Instruction.NumDstRegs) + transform_dstreg(ttr, &dst->I.DstReg, &src->FullDstRegisters[0]); + + for(i = 0; i < src->Instruction.NumSrcRegs; ++i) { + if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER) + dst->I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index; + else + transform_srcreg(ttr, &dst->I.SrcReg[i], &src->FullSrcRegisters[i]); + } + + /* Texturing. */ + transform_texture(dst, src->InstructionExtTexture); +} + +static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm) +{ + struct rc_constant constant; + int i; + + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + for(i = 0; i < 4; ++i) + constant.u.Immediate[i] = imm->u[i].Float; + rc_constants_add(&ttr->compiler->Program.Constants, &constant); +} + +void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) +{ + struct tgsi_parse_context parser; + int i; + + /* Allocate constants placeholders. + * + * Note: What if declared constants are not contiguous? */ + for(i = 0; i <= ttr->info->file_max[TGSI_FILE_CONSTANT]; ++i) { + struct rc_constant constant; + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + rc_constants_add(&ttr->compiler->Program.Constants, &constant); + } + + ttr->immediate_offset = ttr->compiler->Program.Constants.Count; + + tgsi_parse_init(&parser, tokens); + + while (!tgsi_parse_end_of_tokens(&parser)) { + tgsi_parse_token(&parser); + + switch (parser.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + handle_immediate(ttr, &parser.FullToken.FullImmediate); + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + transform_instruction(ttr, &parser.FullToken.FullInstruction); + break; + } + } + + tgsi_parse_free(&parser); + + rc_calculate_inputs_outputs(ttr->compiler); +} + diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h new file mode 100644 index 0000000000..93e90ec6d2 --- /dev/null +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h @@ -0,0 +1,41 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_TGSI_TO_RC_H +#define R300_TGSI_TO_RC_H + +struct radeon_compiler; + +struct tgsi_full_declaration; +struct tgsi_shader_info; +struct tgsi_token; + +struct tgsi_to_rc { + struct radeon_compiler * compiler; + const struct tgsi_shader_info * info; + + int immediate_offset; +}; + +void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens); + +#endif /* R300_TGSI_TO_RC_H */ diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index f87435f9f0..2cb903bba2 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -22,391 +22,213 @@ #include "r300_vs.h" -static void r300_vs_declare(struct r300_vs_asm* assembler, - struct tgsi_full_declaration* decl) -{ - switch (decl->Declaration.File) { - case TGSI_FILE_INPUT: - break; - case TGSI_FILE_OUTPUT: - switch (decl->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - assembler->tab[decl->DeclarationRange.First] = 0; - break; - case TGSI_SEMANTIC_COLOR: - assembler->tab[decl->DeclarationRange.First] = - (assembler->point_size ? 1 : 0) + - assembler->out_colors; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - /* XXX multiple? */ - assembler->tab[decl->DeclarationRange.First] = - (assembler->point_size ? 1 : 0) + - assembler->out_colors + - assembler->out_texcoords; - break; - case TGSI_SEMANTIC_PSIZE: - assembler->tab[decl->DeclarationRange.First] = 1; - break; - default: - debug_printf("r300: vs: Bad semantic declaration %d\n", - decl->Semantic.SemanticName); - break; - } - break; - case TGSI_FILE_CONSTANT: - break; - case TGSI_FILE_TEMPORARY: - assembler->temp_count++; - break; - default: - debug_printf("r300: vs: Bad file %d\n", decl->Declaration.File); - break; - } -} +#include "r300_context.h" +#include "r300_tgsi_to_rc.h" -static INLINE unsigned r300_vs_src_type(struct r300_vs_asm* assembler, - struct tgsi_src_register* src) -{ - switch (src->File) { - case TGSI_FILE_NULL: - case TGSI_FILE_INPUT: - /* Probably a zero or one swizzle */ - return R300_PVS_SRC_REG_INPUT; - case TGSI_FILE_TEMPORARY: - return R300_PVS_SRC_REG_TEMPORARY; - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - return R300_PVS_SRC_REG_CONSTANT; - default: - debug_printf("r300: vs: Unimplemented src type %d\n", src->File); - break; - } - return 0; -} +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" -static INLINE unsigned r300_vs_src(struct r300_vs_asm* assembler, - struct tgsi_src_register* src) -{ - switch (src->File) { - case TGSI_FILE_NULL: - case TGSI_FILE_INPUT: - case TGSI_FILE_TEMPORARY: - case TGSI_FILE_CONSTANT: - return src->Index; - case TGSI_FILE_IMMEDIATE: - return src->Index + assembler->imm_offset; - default: - debug_printf("r300: vs: Unimplemented src type %d\n", src->File); - break; - } - return 0; -} +#include "radeon_compiler.h" -static INLINE unsigned r300_vs_dst_type(struct r300_vs_asm* assembler, - struct tgsi_dst_register* dst) -{ - switch (dst->File) { - case TGSI_FILE_TEMPORARY: - return R300_PVS_DST_REG_TEMPORARY; - case TGSI_FILE_OUTPUT: - return R300_PVS_DST_REG_OUT; - default: - debug_printf("r300: vs: Unimplemented dst type %d\n", dst->File); - break; - } - return 0; -} -static INLINE unsigned r300_vs_dst(struct r300_vs_asm* assembler, - struct tgsi_dst_register* dst) +static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) { - switch (dst->File) { - case TGSI_FILE_TEMPORARY: - return dst->Index; - case TGSI_FILE_OUTPUT: - return assembler->tab[dst->Index]; - default: - debug_printf("r300: vs: Unimplemented dst %d\n", dst->File); - break; - } - return 0; -} + struct r300_vertex_shader * vs = c->UserData; + struct tgsi_shader_info* info = &vs->info; + boolean pointsize = false; + int out_colors = 0; + int colors = 0; + int out_generic = 0; + int generic = 0; + int i; -static uint32_t r300_vs_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - return R300_VE_DOT_PRODUCT; - case TGSI_OPCODE_MUL: - return R300_VE_MULTIPLY; - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SUB: - case TGSI_OPCODE_SWZ: - return R300_VE_ADD; - case TGSI_OPCODE_MAX: - return R300_VE_MAXIMUM; - case TGSI_OPCODE_SLT: - return R300_VE_SET_LESS_THAN; - case TGSI_OPCODE_RSQ: - return R300_PVS_DST_MATH_INST | R300_ME_RECIP_DX; - case TGSI_OPCODE_MAD: - return R300_PVS_DST_MACRO_INST | R300_PVS_MACRO_OP_2CLK_MADD; - default: - break; - } - return 0; -} + /* Fill in the input mapping */ + for (i = 0; i < info->num_inputs; i++) + c->code->inputs[i] = i; -static uint32_t r300_vs_swiz(struct tgsi_full_src_register* reg) -{ - if (reg->SrcRegister.Extended) { - return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | - reg->SrcRegisterExtSwz.ExtSwizzleX | - (reg->SrcRegisterExtSwz.ExtSwizzleY << 3) | - (reg->SrcRegisterExtSwz.ExtSwizzleZ << 6) | - (reg->SrcRegisterExtSwz.ExtSwizzleW << 9); - } else { - return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | - reg->SrcRegister.SwizzleX | - (reg->SrcRegister.SwizzleY << 3) | - (reg->SrcRegister.SwizzleZ << 6) | - (reg->SrcRegister.SwizzleW << 9); + /* Fill in the output mapping */ + for (i = 0; i < info->num_outputs; i++) { + switch (info->output_semantic_name[i]) { + case TGSI_SEMANTIC_PSIZE: + pointsize = true; + break; + case TGSI_SEMANTIC_COLOR: + out_colors++; + break; + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + out_generic++; + break; + } } -} -/* XXX icky icky icky icky */ -static uint32_t r300_vs_scalar_swiz(struct tgsi_full_src_register* reg) -{ - if (reg->SrcRegister.Extended) { - return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | - reg->SrcRegisterExtSwz.ExtSwizzleX | - (reg->SrcRegisterExtSwz.ExtSwizzleX << 3) | - (reg->SrcRegisterExtSwz.ExtSwizzleX << 6) | - (reg->SrcRegisterExtSwz.ExtSwizzleX << 9); - } else { - return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | - reg->SrcRegister.SwizzleX | - (reg->SrcRegister.SwizzleX << 3) | - (reg->SrcRegister.SwizzleX << 6) | - (reg->SrcRegister.SwizzleX << 9); - } -} + struct tgsi_parse_context parser; -/* XXX scalar stupidity */ -static void r300_vs_emit_inst(struct r300_vertex_shader* vs, - struct r300_vs_asm* assembler, - struct tgsi_full_src_register* src, - struct tgsi_full_dst_register* dst, - unsigned op, - unsigned count, - boolean is_scalar) -{ - int i = vs->instruction_count; - vs->instructions[i].inst0 = R300_PVS_DST_OPCODE(r300_vs_op(op)) | - R300_PVS_DST_REG_TYPE(r300_vs_dst_type(assembler, &dst->DstRegister)) | - R300_PVS_DST_OFFSET(r300_vs_dst(assembler, &dst->DstRegister)) | - R300_PVS_DST_WE(dst->DstRegister.WriteMask); - switch (count) { - case 3: - vs->instructions[i].inst3 = - R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, - &src[2].SrcRegister)) | - R300_PVS_SRC_OFFSET(r300_vs_src(assembler, - &src[2].SrcRegister)) | - R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[2])); - /* Fall through */ - case 2: - vs->instructions[i].inst2 = - R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, - &src[1].SrcRegister)) | - R300_PVS_SRC_OFFSET(r300_vs_src(assembler, - &src[1].SrcRegister)) | - R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[1])); - /* Fall through */ - case 1: - vs->instructions[i].inst1 = - R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, - &src[0].SrcRegister)) | - R300_PVS_SRC_OFFSET(r300_vs_src(assembler, - &src[0].SrcRegister)) | - /* XXX the icky, it burns */ - R300_PVS_SRC_SWIZZLE(is_scalar ? r300_vs_scalar_swiz(&src[0]) - : r300_vs_swiz(&src[0])); - break; - } - vs->instruction_count++; -} + tgsi_parse_init(&parser, vs->state.tokens); -static void r300_vs_instruction(struct r300_vertex_shader* vs, - struct r300_vs_asm* assembler, - struct tgsi_full_instruction* inst) -{ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_RSQ: - r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 1, TRUE); - break; - case TGSI_OPCODE_SUB: - inst->FullSrcRegisters[1].SrcRegister.Negate = - !inst->FullSrcRegisters[1].SrcRegister.Negate; - /* Fall through */ - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_MAX: - case TGSI_OPCODE_SLT: - r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2, FALSE); - break; - case TGSI_OPCODE_DP3: - /* Set alpha swizzle to zero for src0 and src1 */ - if (!inst->FullSrcRegisters[0].SrcRegister.Extended) { - inst->FullSrcRegisters[0].SrcRegister.Extended = TRUE; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX = - inst->FullSrcRegisters[0].SrcRegister.SwizzleX; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleY = - inst->FullSrcRegisters[0].SrcRegister.SwizzleY; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleZ = - inst->FullSrcRegisters[0].SrcRegister.SwizzleZ; - } - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = - TGSI_EXTSWIZZLE_ZERO; - if (!inst->FullSrcRegisters[1].SrcRegister.Extended) { - inst->FullSrcRegisters[1].SrcRegister.Extended = TRUE; - inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleX = - inst->FullSrcRegisters[1].SrcRegister.SwizzleX; - inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleY = - inst->FullSrcRegisters[1].SrcRegister.SwizzleY; - inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleZ = - inst->FullSrcRegisters[1].SrcRegister.SwizzleZ; - } - inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleW = - TGSI_EXTSWIZZLE_ZERO; - /* Fall through */ - case TGSI_OPCODE_DP4: - r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2, FALSE); - break; - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - inst->FullSrcRegisters[1] = r300_constant_zero; - r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2, FALSE); - break; - case TGSI_OPCODE_MAD: - r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 3, FALSE); - break; - case TGSI_OPCODE_END: - break; - default: - debug_printf("r300: vs: Bad opcode %d\n", - inst->Instruction.Opcode); - break; - } -} + while (!tgsi_parse_end_of_tokens(&parser)) { + tgsi_parse_token(&parser); -static void r300_vs_init(struct r300_vertex_shader* vs, - struct r300_vs_asm* assembler) -{ - struct tgsi_shader_info* info = &vs->info; - int i; + if (parser.FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) + continue; - for (i = 0; i < info->num_outputs; i++) { - switch (info->output_semantic_name[i]) { + struct tgsi_full_declaration * decl = &parser.FullToken.FullDeclaration; + + if (decl->Declaration.File != TGSI_FILE_OUTPUT) + continue; + + switch (decl->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + c->code->outputs[decl->DeclarationRange.First] = 0; + break; case TGSI_SEMANTIC_PSIZE: - assembler->point_size = TRUE; + c->code->outputs[decl->DeclarationRange.First] = 1; break; case TGSI_SEMANTIC_COLOR: - assembler->out_colors++; + c->code->outputs[decl->DeclarationRange.First] = 1 + + (pointsize ? 1 : 0) + + colors++; break; case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_GENERIC: - assembler->out_texcoords++; + c->code->outputs[decl->DeclarationRange.First] = 1 + + (pointsize ? 1 : 0) + + out_colors + + generic++; + break; + default: + debug_printf("r300: vs: Bad semantic declaration %d\n", + decl->Semantic.SemanticName); break; } } - vs->instruction_count = 0; + tgsi_parse_free(&parser); } + void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs) { - struct tgsi_parse_context parser; - int i; - struct r300_constant_buffer* consts = - &r300->shader_constants[PIPE_SHADER_VERTEX]; + struct r300_vertex_program_compiler compiler; + struct tgsi_to_rc ttr; - struct r300_vs_asm* assembler = CALLOC_STRUCT(r300_vs_asm); - if (assembler == NULL) { - return; - } + /* Setup the compiler */ + rc_init(&compiler.Base); - /* Init assembler. */ - r300_vs_init(vs, assembler); + compiler.Base.Debug = 1; + compiler.code = &vs->code; + compiler.UserData = vs; - /* Setup starting offset for immediates. */ - assembler->imm_offset = consts->user_count; + if (compiler.Base.Debug) { + debug_printf("r300: Initial vertex program\n"); + tgsi_dump(vs->state.tokens, 0); + } - tgsi_parse_init(&parser, vs->state.tokens); + /* Translate TGSI to our internal representation */ + ttr.compiler = &compiler.Base; + ttr.info = &vs->info; - while (!tgsi_parse_end_of_tokens(&parser)) { - tgsi_parse_token(&parser); + r300_tgsi_to_rc(&ttr, vs->state.tokens); - /* This is seriously the lamest way to create fragment programs ever. - * I blame TGSI. */ - switch (parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* Allocated registers sitting at the beginning - * of the program. */ - r300_vs_declare(assembler, &parser.FullToken.FullDeclaration); - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - debug_printf("r300: Emitting immediate to constant buffer, " - "position %d\n", - assembler->imm_offset + assembler->imm_count); - /* I am not amused by the length of these. */ - for (i = 0; i < 4; i++) { - consts->constants[assembler->imm_offset + - assembler->imm_count][i] = - parser.FullToken.FullImmediate.u.ImmediateFloat32[i] - .Float; - } - assembler->imm_count++; - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - r300_vs_instruction(vs, assembler, - &parser.FullToken.FullInstruction); - break; - } - } + compiler.RequiredOutputs = ~(~0 << vs->info.num_outputs); + compiler.SetHwInputOutput = &set_vertex_inputs_outputs; - debug_printf("r300: vs: %d texs and %d colors, first free reg is %d\n", - assembler->tex_count, assembler->color_count, - assembler->tex_count + assembler->color_count); + /* Invoke the compiler */ + r3xx_compile_vertex_program(&compiler); + if (compiler.Base.Error) { + /* Todo: Fail gracefully */ + fprintf(stderr, "r300 VP: Compiler error\n"); + abort(); + } - consts->count = consts->user_count + assembler->imm_count; - vs->uses_imms = assembler->imm_count; - debug_printf("r300: vs: %d total constants, " - "%d from user and %d from immediates\n", consts->count, - consts->user_count, assembler->imm_count); + /* And, finally... */ + rc_destroy(&compiler.Base); + vs->translated = TRUE; +} - debug_printf("r300: vs: tab: %d %d %d %d\n", assembler->tab[0], - assembler->tab[1], assembler->tab[2], assembler->tab[3]); - tgsi_dump(vs->state.tokens, 0); - /* XXX finish r300 vertex shader dumper */ - r300_vs_dump(vs); +/* XXX get these to r300_reg */ +#define R300_PVS_DST_OPCODE(x) ((x) << 0) +# define R300_VE_DOT_PRODUCT 1 +# define R300_VE_MULTIPLY 2 +# define R300_VE_ADD 3 +# define R300_VE_MAXIMUM 7 +# define R300_VE_SET_LESS_THAN 10 +#define R300_PVS_DST_MATH_INST (1 << 6) +# define R300_ME_RECIP_DX 6 +#define R300_PVS_DST_MACRO_INST (1 << 7) +# define R300_PVS_MACRO_OP_2CLK_MADD 0 +#define R300_PVS_DST_REG_TYPE(x) ((x) << 8) +# define R300_PVS_DST_REG_TEMPORARY 0 +# define R300_PVS_DST_REG_A0 1 +# define R300_PVS_DST_REG_OUT 2 +# define R300_PVS_DST_REG_OUT_REPL_X 3 +# define R300_PVS_DST_REG_ALT_TEMPORARY 4 +# define R300_PVS_DST_REG_INPUT 5 +#define R300_PVS_DST_OFFSET(x) ((x) << 13) +#define R300_PVS_DST_WE(x) ((x) << 20) +#define R300_PVS_DST_WE_XYZW (0xf << 20) + +#define R300_PVS_SRC_REG_TYPE(x) ((x) << 0) +# define R300_PVS_SRC_REG_TEMPORARY 0 +# define R300_PVS_SRC_REG_INPUT 1 +# define R300_PVS_SRC_REG_CONSTANT 2 +# define R300_PVS_SRC_REG_ALT_TEMPORARY 3 +#define R300_PVS_SRC_OFFSET(x) ((x) << 5) +#define R300_PVS_SRC_SWIZZLE(x) ((x) << 13) +# define R300_PVS_SRC_SELECT_X 0 +# define R300_PVS_SRC_SELECT_Y 1 +# define R300_PVS_SRC_SELECT_Z 2 +# define R300_PVS_SRC_SELECT_W 3 +# define R300_PVS_SRC_SELECT_FORCE_0 4 +# define R300_PVS_SRC_SELECT_FORCE_1 5 +# define R300_PVS_SRC_SWIZZLE_XYZW \ + ((R300_PVS_SRC_SELECT_X | (R300_PVS_SRC_SELECT_Y << 3) | \ + (R300_PVS_SRC_SELECT_Z << 6) | (R300_PVS_SRC_SELECT_W << 9)) << 13) +# define R300_PVS_SRC_SWIZZLE_ZERO \ + ((R300_PVS_SRC_SELECT_FORCE_0 | (R300_PVS_SRC_SELECT_FORCE_0 << 3) | \ + (R300_PVS_SRC_SELECT_FORCE_0 << 6) | \ + (R300_PVS_SRC_SELECT_FORCE_0 << 9)) << 13) +# define R300_PVS_SRC_SWIZZLE_ONE \ + ((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \ + (R300_PVS_SRC_SELECT_FORCE_1 << 6) | \ + (R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13) +#define R300_PVS_MODIFIER_X (1 << 25) +#define R300_PVS_MODIFIER_Y (1 << 26) +#define R300_PVS_MODIFIER_Z (1 << 27) +#define R300_PVS_MODIFIER_W (1 << 28) +#define R300_PVS_NEGATE_XYZW \ + (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \ + R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W) + +struct r300_vertex_program_code r300_passthrough_vertex_shader = { + .length = 8, /* two instructions */ + + /* MOV out[0], in[0] */ + .body.d[0] = R300_PVS_DST_OPCODE(R300_VE_ADD) | + R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | + R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW, + .body.d[1] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | + R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW, + .body.d[2] = R300_PVS_SRC_SWIZZLE_ZERO, + .body.d[3] = 0x0, + + /* MOV out[1], in[1] */ + .body.d[4] = R300_PVS_DST_OPCODE(R300_VE_ADD) | + R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | + R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW, + .body.d[5] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | + R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW, + .body.d[6] = R300_PVS_SRC_SWIZZLE_ZERO, + .body.d[7] = 0x0, + + .inputs[0] = 0, + .inputs[1] = 1, + .outputs[0] = 0, + .outputs[1] = 1, + + .InputsRead = 3, + .OutputsWritten = 3 +}; - tgsi_parse_free(&parser); - FREE(assembler); -} diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 165d717812..2a4ce315e3 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -23,134 +23,31 @@ #ifndef R300_VS_H #define R300_VS_H -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_dump.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" -#include "r300_context.h" -#include "r300_debug.h" -#include "r300_reg.h" -#include "r300_screen.h" -#include "r300_shader_inlines.h" +#include "radeon_code.h" -/* XXX get these to r300_reg */ -#define R300_PVS_DST_OPCODE(x) ((x) << 0) -# define R300_VE_DOT_PRODUCT 1 -# define R300_VE_MULTIPLY 2 -# define R300_VE_ADD 3 -# define R300_VE_MAXIMUM 7 -# define R300_VE_SET_LESS_THAN 10 -#define R300_PVS_DST_MATH_INST (1 << 6) -# define R300_ME_RECIP_DX 6 -#define R300_PVS_DST_MACRO_INST (1 << 7) -# define R300_PVS_MACRO_OP_2CLK_MADD 0 -#define R300_PVS_DST_REG_TYPE(x) ((x) << 8) -# define R300_PVS_DST_REG_TEMPORARY 0 -# define R300_PVS_DST_REG_A0 1 -# define R300_PVS_DST_REG_OUT 2 -# define R300_PVS_DST_REG_OUT_REPL_X 3 -# define R300_PVS_DST_REG_ALT_TEMPORARY 4 -# define R300_PVS_DST_REG_INPUT 5 -#define R300_PVS_DST_OFFSET(x) ((x) << 13) -#define R300_PVS_DST_WE(x) ((x) << 20) -#define R300_PVS_DST_WE_XYZW (0xf << 20) +struct r300_context; -#define R300_PVS_SRC_REG_TYPE(x) ((x) << 0) -# define R300_PVS_SRC_REG_TEMPORARY 0 -# define R300_PVS_SRC_REG_INPUT 1 -# define R300_PVS_SRC_REG_CONSTANT 2 -# define R300_PVS_SRC_REG_ALT_TEMPORARY 3 -#define R300_PVS_SRC_OFFSET(x) ((x) << 5) -#define R300_PVS_SRC_SWIZZLE(x) ((x) << 13) -# define R300_PVS_SRC_SELECT_X 0 -# define R300_PVS_SRC_SELECT_Y 1 -# define R300_PVS_SRC_SELECT_Z 2 -# define R300_PVS_SRC_SELECT_W 3 -# define R300_PVS_SRC_SELECT_FORCE_0 4 -# define R300_PVS_SRC_SELECT_FORCE_1 5 -# define R300_PVS_SRC_SWIZZLE_XYZW \ - ((R300_PVS_SRC_SELECT_X | (R300_PVS_SRC_SELECT_Y << 3) | \ - (R300_PVS_SRC_SELECT_Z << 6) | (R300_PVS_SRC_SELECT_W << 9)) << 13) -# define R300_PVS_SRC_SWIZZLE_ZERO \ - ((R300_PVS_SRC_SELECT_FORCE_0 | (R300_PVS_SRC_SELECT_FORCE_0 << 3) | \ - (R300_PVS_SRC_SELECT_FORCE_0 << 6) | \ - (R300_PVS_SRC_SELECT_FORCE_0 << 9)) << 13) -# define R300_PVS_SRC_SWIZZLE_ONE \ - ((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \ - (R300_PVS_SRC_SELECT_FORCE_1 << 6) | \ - (R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13) -#define R300_PVS_MODIFIER_X (1 << 25) -#define R300_PVS_MODIFIER_Y (1 << 26) -#define R300_PVS_MODIFIER_Z (1 << 27) -#define R300_PVS_MODIFIER_W (1 << 28) -#define R300_PVS_NEGATE_XYZW \ - (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \ - R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W) +struct r300_vertex_shader { + /* Parent class */ + struct pipe_shader_state state; + struct tgsi_shader_info info; -/* Temporary struct used to hold assembly state while putting together - * fragment programs. */ -struct r300_vs_asm { - /* Pipe context. */ - struct r300_context* r300; - /* Number of colors. */ - unsigned color_count; - /* Number of texcoords. */ - unsigned tex_count; - /* Number of requested temporary registers. */ - unsigned temp_count; - /* Offset for immediate constants. Neither R300 nor R500 can do four - * inline constants per source, so instead we copy immediates into the - * constant buffer. */ - unsigned imm_offset; - /* Number of immediate constants. */ - unsigned imm_count; - /* Number of colors to write. */ - unsigned out_colors; - /* Number of texcoords to write. */ - unsigned out_texcoords; - /* Whether to emit point size. */ - boolean point_size; - /* Tab of declared outputs to OVM outputs. */ - unsigned tab[16]; -}; + /* Fallback shader, because Draw has issues */ + struct draw_vertex_shader* draw; -static struct r300_vertex_shader r300_passthrough_vertex_shader = { - /* XXX translate these back into normal instructions */ - .instruction_count = 2, - .instructions[0].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW, - .instructions[0].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW, - .instructions[0].inst2 = R300_PVS_SRC_SWIZZLE_ZERO, - .instructions[0].inst3 = 0x0, - .instructions[1].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW, - .instructions[1].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW, - .instructions[1].inst2 = R300_PVS_SRC_SWIZZLE_ZERO, - .instructions[1].inst3 = 0x0, -}; + /* Has this shader been translated yet? */ + boolean translated; -static struct r300_vertex_shader r300_texture_vertex_shader = { - /* XXX translate these back into normal instructions */ - .instruction_count = 2, - .instructions[0].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW, - .instructions[0].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW, - .instructions[0].inst2 = R300_PVS_SRC_SWIZZLE_ZERO, - .instructions[0].inst3 = 0x0, - .instructions[1].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW, - .instructions[1].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW, - .instructions[1].inst2 = R300_PVS_SRC_SWIZZLE_ZERO, - .instructions[1].inst3 = 0x0, + /* Machine code (if translated) */ + struct r300_vertex_program_code code; }; + +extern struct r300_vertex_program_code r300_passthrough_vertex_shader; + void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/r300/r3xx_fs.c b/src/gallium/drivers/r300/r3xx_fs.c index 6e05d76977..c1c1194d58 100644 --- a/src/gallium/drivers/r300/r3xx_fs.c +++ b/src/gallium/drivers/r300/r3xx_fs.c @@ -23,74 +23,52 @@ #include "r3xx_fs.h" -static INLINE uint32_t r3xx_rgb_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_MOV: - return R300_ALU_OUTC_CMP; - default: - return 0; - } -} +#include "r300_reg.h" -static INLINE uint32_t r3xx_alpha_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_MOV: - return R300_ALU_OUTA_CMP; - default: - return 0; - } -} +struct rX00_fragment_program_code r3xx_passthrough_fragment_shader = { + .code.r300.alu.length = 1, + .code.r300.tex.length = 0, -static INLINE void r3xx_emit_maths(struct r3xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_src_register* src, - struct tgsi_full_dst_register* dst, - unsigned op, - unsigned count) -{ - int i = fs->alu_instruction_count; + .code.r300.config = 0, + .code.r300.pixsize = 0, + .code.r300.code_offset = 0, + .code.r300.code_addr[3] = R300_RGBA_OUT, - fs->instructions[i].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | + .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - r3xx_rgb_op(op); - fs->instructions[i].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ; - fs->instructions[i].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | + R300_ALU_OUTC_CMP, + .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | + R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, + .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - r3xx_alpha_op(op); - fs->instructions[i].alu_alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT; + R300_ALU_OUTA_CMP, + .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) | + R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, +}; - fs->alu_instruction_count++; -} +struct rX00_fragment_program_code r3xx_texture_fragment_shader = { + .code.r300.alu.length = 1, + .code.r300.tex.length = 1, -void r3xx_fs_finalize(struct r300_fragment_shader* fs, - struct r300_fs_asm* assembler) -{ - fs->stack_size = assembler->temp_count + assembler->temp_offset + 1; -} + .code.r300.config = R300_PFS_CNTL_FIRST_NODE_HAS_TEX, + .code.r300.pixsize = 0, + .code.r300.code_offset = 0, + .code.r300.code_addr[3] = R300_RGBA_OUT, -void r3xx_fs_instruction(struct r3xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_instruction* inst) -{ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_MOV: - /* src0 -> src1 and src2 forced to zero */ - inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; - inst->FullSrcRegisters[2] = r300_constant_zero; - r3xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - case TGSI_OPCODE_END: - break; - default: - debug_printf("r300: fs: Bad opcode %d\n", - inst->Instruction.Opcode); - break; - } -} + .code.r300.tex.inst[0] = R300_TEX_OP_LD << R300_TEX_INST_SHIFT, + + .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | + R300_ALU_OUTC_CMP, + .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | + R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, + .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | + R300_ALU_OUTA_CMP, + .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) | + R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, +}; diff --git a/src/gallium/drivers/r300/r3xx_fs.h b/src/gallium/drivers/r300/r3xx_fs.h index 3da39ec252..51cd245724 100644 --- a/src/gallium/drivers/r300/r3xx_fs.h +++ b/src/gallium/drivers/r300/r3xx_fs.h @@ -24,53 +24,9 @@ #ifndef R3XX_FS_H #define R3XX_FS_H -#include "r300_fs_inlines.h" +#include "radeon_code.h" -static struct r3xx_fragment_shader r3xx_passthrough_fragment_shader = { - .alu_instruction_count = 1, - .tex_instruction_count = 0, - .indirections = 0, - .shader.stack_size = 1, - - .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - R300_ALU_OUTC_CMP, - .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, - .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - R300_ALU_OUTA_CMP, - .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, -}; - -static struct r3xx_fragment_shader r3xx_texture_fragment_shader = { - .alu_instruction_count = 1, - .tex_instruction_count = 0, - .indirections = 0, - .shader.stack_size = 1, - - .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - R300_ALU_OUTC_CMP, - .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, - .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - R300_ALU_OUTA_CMP, - .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, -}; - -void r3xx_fs_finalize(struct r300_fragment_shader* fs, - struct r300_fs_asm* assembler); - -void r3xx_fs_instruction(struct r3xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_instruction* inst); +struct rX00_fragment_program_code r3xx_passthrough_fragment_shader; +struct rX00_fragment_program_code r3xx_texture_fragment_shader; #endif /* R3XX_FS_H */ diff --git a/src/gallium/drivers/r300/r5xx_fs.c b/src/gallium/drivers/r300/r5xx_fs.c index 99d826278c..f072deab0d 100644 --- a/src/gallium/drivers/r300/r5xx_fs.c +++ b/src/gallium/drivers/r300/r5xx_fs.c @@ -23,445 +23,103 @@ #include "r5xx_fs.h" -static INLINE unsigned r5xx_fix_swiz(unsigned s) -{ - /* For historical reasons, the swizzle values x, y, z, w, and 0 are - * equivalent to the actual machine code, but 1 is not. Thus, we just - * adjust it a bit... */ - if (s == TGSI_EXTSWIZZLE_ONE) { - return R500_SWIZZLE_ONE; - } else { - return s; - } -} - -static uint32_t r5xx_rgba_swiz(struct tgsi_full_src_register* reg) -{ - if (reg->SrcRegister.Extended) { - return r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleX) | - (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleY) << 3) | - (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleZ) << 6) | - (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleW) << 9); - } else { - return reg->SrcRegister.SwizzleX | - (reg->SrcRegister.SwizzleY << 3) | - (reg->SrcRegister.SwizzleZ << 6) | - (reg->SrcRegister.SwizzleW << 9); - } -} - -static uint32_t r5xx_strq_swiz(struct tgsi_full_src_register* reg) -{ - return reg->SrcRegister.SwizzleX | - (reg->SrcRegister.SwizzleY << 2) | - (reg->SrcRegister.SwizzleZ << 4) | - (reg->SrcRegister.SwizzleW << 6); -} - -static INLINE uint32_t r5xx_rgb_swiz(struct tgsi_full_src_register* reg) -{ - /* Only the first 9 bits... */ - return (r5xx_rgba_swiz(reg) & 0x1ff) | - (reg->SrcRegister.Negate ? (1 << 9) : 0) | - (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0); -} - -static INLINE uint32_t r5xx_alpha_swiz(struct tgsi_full_src_register* reg) -{ - /* Only the last 3 bits... */ - return (r5xx_rgba_swiz(reg) >> 9) | - (reg->SrcRegister.Negate ? (1 << 9) : 0) | - (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0); -} - -static INLINE uint32_t r5xx_rgba_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_COS: - case TGSI_OPCODE_EX2: - case TGSI_OPCODE_LG2: - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_SIN: - return R500_ALU_RGBA_OP_SOP; - case TGSI_OPCODE_DDX: - return R500_ALU_RGBA_OP_MDH; - case TGSI_OPCODE_DDY: - return R500_ALU_RGBA_OP_MDV; - case TGSI_OPCODE_FRC: - return R500_ALU_RGBA_OP_FRC; - case TGSI_OPCODE_DP3: - return R500_ALU_RGBA_OP_DP3; - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - return R500_ALU_RGBA_OP_DP4; - case TGSI_OPCODE_ABS: - case TGSI_OPCODE_CMP: - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - return R500_ALU_RGBA_OP_CMP; - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MAD: - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_SUB: - return R500_ALU_RGBA_OP_MAD; - default: - return 0; - } -} - -static INLINE uint32_t r5xx_alpha_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_COS: - return R500_ALPHA_OP_COS; - case TGSI_OPCODE_EX2: - return R500_ALPHA_OP_EX2; - case TGSI_OPCODE_LG2: - return R500_ALPHA_OP_LN2; - case TGSI_OPCODE_RCP: - return R500_ALPHA_OP_RCP; - case TGSI_OPCODE_RSQ: - return R500_ALPHA_OP_RSQ; - case TGSI_OPCODE_FRC: - return R500_ALPHA_OP_FRC; - case TGSI_OPCODE_SIN: - return R500_ALPHA_OP_SIN; - case TGSI_OPCODE_DDX: - return R500_ALPHA_OP_MDH; - case TGSI_OPCODE_DDY: - return R500_ALPHA_OP_MDV; - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - return R500_ALPHA_OP_DP; - case TGSI_OPCODE_ABS: - case TGSI_OPCODE_CMP: - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - return R500_ALPHA_OP_CMP; - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MAD: - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_SUB: - return R500_ALPHA_OP_MAD; - default: - return 0; - } -} - -static INLINE uint32_t r5xx_tex_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_KIL: - return R500_TEX_INST_TEXKILL; - case TGSI_OPCODE_TEX: - return R500_TEX_INST_LD; - case TGSI_OPCODE_TXB: - return R500_TEX_INST_LODBIAS; - case TGSI_OPCODE_TXP: - return R500_TEX_INST_PROJ; - default: - return 0; - } -} - -/* Setup an ALU operation. */ -static INLINE void r5xx_emit_maths(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_src_register* src, - struct tgsi_full_dst_register* dst, - unsigned op, - unsigned count) -{ - int i = fs->instruction_count; - - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - fs->instructions[i].inst0 = R500_INST_TYPE_OUT; - if (r300_fs_is_depr(assembler, dst)) { - fs->instructions[i].inst4 = R500_W_OMASK; - } else { - fs->instructions[i].inst0 |= - R500_ALU_OMASK(dst->DstRegister.WriteMask); - } - } else { - fs->instructions[i].inst0 = R500_INST_TYPE_ALU | - R500_ALU_WMASK(dst->DstRegister.WriteMask); - } - - fs->instructions[i].inst0 |= R500_INST_TEX_SEM_WAIT; - - fs->instructions[i].inst4 |= - R500_ALPHA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); - fs->instructions[i].inst5 = - R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); - - switch (count) { - case 3: - fs->instructions[i].inst1 = - R500_RGB_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister)); - fs->instructions[i].inst2 = - R500_ALPHA_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister)); - fs->instructions[i].inst5 |= - R500_ALU_RGBA_SEL_C_SRC2 | - R500_SWIZ_RGBA_C(r5xx_rgb_swiz(&src[2])) | - R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | - R500_SWIZ_ALPHA_C(r5xx_alpha_swiz(&src[2])); - case 2: - fs->instructions[i].inst1 |= - R500_RGB_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister)); - fs->instructions[i].inst2 |= - R500_ALPHA_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister)); - fs->instructions[i].inst3 = - R500_ALU_RGB_SEL_B_SRC1 | - R500_SWIZ_RGB_B(r5xx_rgb_swiz(&src[1])); - fs->instructions[i].inst4 |= - R500_ALPHA_SEL_B_SRC1 | - R500_SWIZ_ALPHA_B(r5xx_alpha_swiz(&src[1])); - case 1: - case 0: - default: - fs->instructions[i].inst1 |= - R500_RGB_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister)); - fs->instructions[i].inst2 |= - R500_ALPHA_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister)); - fs->instructions[i].inst3 |= - R500_ALU_RGB_SEL_A_SRC0 | - R500_SWIZ_RGB_A(r5xx_rgb_swiz(&src[0])); - fs->instructions[i].inst4 |= - R500_ALPHA_SEL_A_SRC0 | - R500_SWIZ_ALPHA_A(r5xx_alpha_swiz(&src[0])); - break; - } - - fs->instructions[i].inst4 |= r5xx_alpha_op(op); - fs->instructions[i].inst5 |= r5xx_rgba_op(op); - - fs->instruction_count++; -} - -static INLINE void r5xx_emit_tex(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_src_register* src, - struct tgsi_full_dst_register* dst, - uint32_t op) -{ - int i = fs->instruction_count; - - fs->instructions[i].inst0 = R500_INST_TYPE_TEX | - R500_TEX_WMASK(dst->DstRegister.WriteMask) | - R500_INST_TEX_SEM_WAIT; - fs->instructions[i].inst1 = R500_TEX_ID(0) | - R500_TEX_SEM_ACQUIRE | //R500_TEX_IGNORE_UNCOVERED | - r5xx_tex_op(op); - fs->instructions[i].inst2 = - R500_TEX_SRC_ADDR(r300_fs_src(assembler, &src->SrcRegister)) | - R500_SWIZ_TEX_STRQ(r5xx_strq_swiz(src)) | - R500_TEX_DST_ADDR(r300_fs_dst(assembler, &dst->DstRegister)) | +#include "r300_reg.h" + +/* XXX this all should find its way back to r300_reg */ +/* Swizzle tools */ +#define R500_SWIZZLE_ZERO 4 +#define R500_SWIZZLE_HALF 5 +#define R500_SWIZZLE_ONE 6 +#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) +#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) +#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) +#define R500_SWIZ_MOD_NEG 1 +#define R500_SWIZ_MOD_ABS 2 +#define R500_SWIZ_MOD_NEG_ABS 3 +/* Swizzles for inst2 */ +#define R500_SWIZ_TEX_STRQ(x) ((x) << 8) +#define R500_SWIZ_TEX_RGBA(x) ((x) << 24) +/* Swizzles for inst3 */ +#define R500_SWIZ_RGB_A(x) ((x) << 2) +#define R500_SWIZ_RGB_B(x) ((x) << 15) +/* Swizzles for inst4 */ +#define R500_SWIZ_ALPHA_A(x) ((x) << 14) +#define R500_SWIZ_ALPHA_B(x) ((x) << 21) +/* Swizzle for inst5 */ +#define R500_SWIZ_RGBA_C(x) ((x) << 14) +#define R500_SWIZ_ALPHA_C(x) ((x) << 27) +/* Writemasks */ +#define R500_TEX_WMASK(x) ((x) << 11) +#define R500_ALU_WMASK(x) ((x) << 11) +#define R500_ALU_OMASK(x) ((x) << 15) +#define R500_W_OMASK (1 << 31) + +struct rX00_fragment_program_code r5xx_passthrough_fragment_shader = { + .code.r500.max_temp_idx = 0, + .code.r500.inst_end = 0, + + .code.r500.inst[0].inst0 = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | R500_INST_LAST | + R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, + .code.r500.inst[0].inst1 = + R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, + .code.r500.inst[0].inst2 = + R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, + .code.r500.inst[0].inst3 = + R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, + .code.r500.inst[0].inst4 = + R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, + .code.r500.inst[0].inst5 = + R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0, +}; + +struct rX00_fragment_program_code r5xx_texture_fragment_shader = { + .code.r500.max_temp_idx = 0, + .code.r500.inst_end = 1, + + .code.r500.inst[0].inst0 = R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK | + R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, + .code.r500.inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | + R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED, + .code.r500.inst[0].inst2 = R500_TEX_SRC_ADDR(0) | + R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | + R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A | + R500_TEX_DST_ADDR(0) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - fs->instructions[i].inst2 |= - R500_TEX_DST_ADDR(assembler->temp_count + - assembler->temp_offset); - - fs->instruction_count++; - - /* Setup and emit a MOV. */ - src[0].SrcRegister.Index = assembler->temp_count; - src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - - src[1] = src[0]; - src[2] = r300_constant_zero; - r5xx_emit_maths(fs, assembler, src, dst, TGSI_OPCODE_MOV, 3); - } else { - fs->instruction_count++; - } -} - -void r5xx_fs_finalize(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler) -{ - /* XXX should this just go with OPCODE_END? */ - fs->instructions[fs->instruction_count - 1].inst0 |= - R500_INST_LAST; -} - -void r5xx_fs_instruction(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_instruction* inst) -{ - /* Switch between opcodes. When possible, prefer using the official - * AMD/ATI names for opcodes, please, as it facilitates using the - * documentation. */ - switch (inst->Instruction.Opcode) { - /* XXX trig needs extra prep */ - case TGSI_OPCODE_COS: - case TGSI_OPCODE_SIN: - /* The simple scalar ops. */ - case TGSI_OPCODE_EX2: - case TGSI_OPCODE_LG2: - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - /* Copy red swizzle to alpha for src0 */ - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX; - inst->FullSrcRegisters[0].SrcRegister.SwizzleW = - inst->FullSrcRegisters[0].SrcRegister.SwizzleX; - /* Fall through */ - case TGSI_OPCODE_DDX: - case TGSI_OPCODE_DDY: - case TGSI_OPCODE_FRC: - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 1); - break; - - /* The dot products. */ - case TGSI_OPCODE_DPH: - /* Set alpha swizzle to one for src0 */ - if (!inst->FullSrcRegisters[0].SrcRegister.Extended) { - inst->FullSrcRegisters[0].SrcRegister.Extended = TRUE; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX = - inst->FullSrcRegisters[0].SrcRegister.SwizzleX; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleY = - inst->FullSrcRegisters[0].SrcRegister.SwizzleY; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleZ = - inst->FullSrcRegisters[0].SrcRegister.SwizzleZ; - } - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = - TGSI_EXTSWIZZLE_ONE; - /* Fall through */ - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 2); - break; - - /* Simple three-source operations. */ - case TGSI_OPCODE_CMP: - /* Swap src0 and src2 */ - inst->FullSrcRegisters[3] = inst->FullSrcRegisters[2]; - inst->FullSrcRegisters[2] = inst->FullSrcRegisters[0]; - inst->FullSrcRegisters[0] = inst->FullSrcRegisters[3]; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - - /* The MAD variants. */ - case TGSI_OPCODE_SUB: - /* Just like ADD, but flip the negation on src1 first */ - inst->FullSrcRegisters[1].SrcRegister.Negate = - !inst->FullSrcRegisters[1].SrcRegister.Negate; - /* Fall through */ - case TGSI_OPCODE_ADD: - /* Force src0 to one, move all registers over */ - inst->FullSrcRegisters[2] = inst->FullSrcRegisters[1]; - inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; - inst->FullSrcRegisters[0] = r300_constant_one; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - case TGSI_OPCODE_MUL: - /* Force our src2 to zero */ - inst->FullSrcRegisters[2] = r300_constant_zero; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - case TGSI_OPCODE_MAD: - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - - /* The MOV variants. */ - case TGSI_OPCODE_ABS: - /* Set absolute value modifiers. */ - inst->FullSrcRegisters[0].SrcRegisterExtMod.Absolute = TRUE; - /* Fall through */ - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - /* src0 -> src1 and src2 forced to zero */ - inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; - inst->FullSrcRegisters[2] = r300_constant_zero; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - - /* The compound and hybrid insts. */ - case TGSI_OPCODE_LRP: - /* LRP DST A, B, C -> MAD TMP -A, C, C; MAD DST A, B, TMP */ - inst->FullSrcRegisters[3] = inst->FullSrcRegisters[1]; - inst->FullSrcRegisters[1] = inst->FullSrcRegisters[2]; - inst->FullSrcRegisters[0].SrcRegister.Negate = - !(inst->FullSrcRegisters[0].SrcRegister.Negate); - inst->FullDstRegisters[1] = inst->FullDstRegisters[0]; - inst->FullDstRegisters[0].DstRegister.Index = - assembler->temp_count; - inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3); - inst->FullSrcRegisters[2].SrcRegister.Index = - assembler->temp_count; - inst->FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst->FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst->FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst->FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; - inst->FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - inst->FullSrcRegisters[1] = inst->FullSrcRegisters[3]; - inst->FullSrcRegisters[0].SrcRegister.Negate = - !(inst->FullSrcRegisters[0].SrcRegister.Negate); - inst->FullDstRegisters[0] = inst->FullDstRegisters[1]; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3); - break; - case TGSI_OPCODE_POW: - /* POW DST A, B -> LG2 TMP A; MUL TMP TMP, B; EX2 DST TMP */ - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX; - inst->FullSrcRegisters[0].SrcRegister.SwizzleW = - inst->FullSrcRegisters[0].SrcRegister.SwizzleX; - inst->FullDstRegisters[1] = inst->FullDstRegisters[0]; - inst->FullDstRegisters[0].DstRegister.Index = - assembler->temp_count; - inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], TGSI_OPCODE_LG2, 1); - inst->FullSrcRegisters[0].SrcRegister.Index = - assembler->temp_count; - inst->FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst->FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst->FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst->FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; - inst->FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - inst->FullSrcRegisters[2] = r300_constant_zero; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], TGSI_OPCODE_MUL, 3); - inst->FullDstRegisters[0] = inst->FullDstRegisters[1]; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], TGSI_OPCODE_EX2, 1); - break; - - /* The texture instruction set. */ - case TGSI_OPCODE_KIL: - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXB: - case TGSI_OPCODE_TXP: - r5xx_emit_tex(fs, assembler, &inst->FullSrcRegisters[0], - &inst->FullDstRegisters[0], inst->Instruction.Opcode); - break; - - /* This is the end. My only friend, the end. */ - case TGSI_OPCODE_END: - break; - default: - debug_printf("r300: fs: Bad opcode %d\n", - inst->Instruction.Opcode); - break; - } - - /* Clamp, if saturation flags are set. */ - if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) { - fs->instructions[fs->instruction_count - 1].inst0 |= - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; - } -} + R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A, + .code.r500.inst[0].inst3 = 0x0, + .code.r500.inst[0].inst4 = 0x0, + .code.r500.inst[0].inst5 = 0x0, + + .code.r500.inst[1].inst0 = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | R500_INST_LAST | + R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, + .code.r500.inst[1].inst1 = + R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, + .code.r500.inst[1].inst2 = + R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, + .code.r500.inst[1].inst3 = + R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, + .code.r500.inst[1].inst4 = + R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, + .code.r500.inst[1].inst5 = + R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0, +}; diff --git a/src/gallium/drivers/r300/r5xx_fs.h b/src/gallium/drivers/r300/r5xx_fs.h index 629e587be4..a4addde32b 100644 --- a/src/gallium/drivers/r300/r5xx_fs.h +++ b/src/gallium/drivers/r300/r5xx_fs.h @@ -24,109 +24,9 @@ #ifndef R5XX_FS_H #define R5XX_FS_H -#include "r300_fs_inlines.h" +#include "radeon_code.h" -/* XXX this all should find its way back to r300_reg */ -/* Swizzle tools */ -#define R500_SWIZZLE_ZERO 4 -#define R500_SWIZZLE_HALF 5 -#define R500_SWIZZLE_ONE 6 -#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) -#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) -#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) -#define R500_SWIZ_MOD_NEG 1 -#define R500_SWIZ_MOD_ABS 2 -#define R500_SWIZ_MOD_NEG_ABS 3 -/* Swizzles for inst2 */ -#define R500_SWIZ_TEX_STRQ(x) ((x) << 8) -#define R500_SWIZ_TEX_RGBA(x) ((x) << 24) -/* Swizzles for inst3 */ -#define R500_SWIZ_RGB_A(x) ((x) << 2) -#define R500_SWIZ_RGB_B(x) ((x) << 15) -/* Swizzles for inst4 */ -#define R500_SWIZ_ALPHA_A(x) ((x) << 14) -#define R500_SWIZ_ALPHA_B(x) ((x) << 21) -/* Swizzle for inst5 */ -#define R500_SWIZ_RGBA_C(x) ((x) << 14) -#define R500_SWIZ_ALPHA_C(x) ((x) << 27) -/* Writemasks */ -#define R500_TEX_WMASK(x) ((x) << 11) -#define R500_ALU_WMASK(x) ((x) << 11) -#define R500_ALU_OMASK(x) ((x) << 15) -#define R500_W_OMASK (1 << 31) - -static struct r5xx_fragment_shader r5xx_passthrough_fragment_shader = { - .shader.stack_size = 0, - .instruction_count = 1, - .instructions[0].inst0 = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | R500_INST_LAST | - R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .instructions[0].inst1 = - R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, - .instructions[0].inst2 = - R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, - .instructions[0].inst3 = - R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, - .instructions[0].inst4 = - R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, - .instructions[0].inst5 = - R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0, -}; - -static struct r5xx_fragment_shader r5xx_texture_fragment_shader = { - .shader.stack_size = 1, - .instruction_count = 2, - .instructions[0].inst0 = R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .instructions[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | - R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED, - .instructions[0].inst2 = R500_TEX_SRC_ADDR(0) | - R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | - R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A, - .instructions[0].inst3 = 0x0, - .instructions[0].inst4 = 0x0, - .instructions[0].inst5 = 0x0, - .instructions[1].inst0 = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | R500_INST_LAST | - R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .instructions[1].inst1 = - R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, - .instructions[1].inst2 = - R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, - .instructions[1].inst3 = - R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, - .instructions[1].inst4 = - R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, - .instructions[1].inst5 = - R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0, -}; - -void r5xx_fs_finalize(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler); - -void r5xx_fs_instruction(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_instruction* inst); +struct rX00_fragment_program_code r5xx_passthrough_fragment_shader; +struct rX00_fragment_program_code r5xx_texture_fragment_shader; #endif /* R5XX_FS_H */ diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 7a533dad9f..70f0932431 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -48,11 +48,6 @@ /* Simple, maximally packed layout. */ -static unsigned minify( unsigned d ) -{ - return MAX2(1, d>>1); -} - /* Conventional allocation path for non-display textures: */ @@ -100,6 +95,7 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, { unsigned usage = (PIPE_BUFFER_USAGE_CPU_READ_WRITE | PIPE_BUFFER_USAGE_GPU_READ_WRITE); + unsigned tex_usage = spt->base.tex_usage; spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); @@ -109,6 +105,7 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, spt->base.height[0], spt->base.format, usage, + tex_usage, &spt->stride[0]); return spt->buffer != NULL; @@ -130,7 +127,8 @@ softpipe_texture_create(struct pipe_screen *screen, pipe_reference_init(&spt->base.reference, 1); spt->base.screen = screen; - if (spt->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if (spt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_PRIMARY)) { if (!softpipe_displaytarget_layout(screen, spt)) goto fail; } @@ -224,12 +222,6 @@ softpipe_get_tex_surface(struct pipe_screen *screen, if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ) ps->usage |= PIPE_BUFFER_USAGE_CPU_READ; - if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_GPU_WRITE)) { - /* Mark the surface as dirty. The tile cache will look for this. */ - spt->modified = TRUE; - } - ps->face = face; ps->level = level; ps->zslice = zslice; @@ -376,6 +368,11 @@ softpipe_transfer_unmap(struct pipe_screen *screen, spt = softpipe_texture(transfer->texture); pipe_buffer_unmap( screen, spt->buffer ); + + if (transfer->usage != PIPE_TRANSFER_READ) { + /* Mark the texture as dirty to expire the tile caches. */ + spt->modified = TRUE; + } } diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c index 98ac75e3fa..93c569c73a 100644 --- a/src/gallium/drivers/trace/tr_drm.c +++ b/src/gallium/drivers/trace/tr_drm.c @@ -63,7 +63,7 @@ trace_drm_create_screen(struct drm_api *_api, int fd, screen = api->create_screen(api, fd, arg); return trace_screen_create(screen); -}; +} static struct pipe_context * trace_drm_create_context(struct drm_api *_api, @@ -82,7 +82,7 @@ trace_drm_create_context(struct drm_api *_api, pipe = trace_context_create(_screen, pipe); return pipe; -}; +} static boolean trace_drm_buffer_from_texture(struct drm_api *_api, @@ -102,7 +102,7 @@ trace_drm_buffer_from_texture(struct drm_api *_api, result = api->buffer_from_texture(api, texture, &buffer, stride); if (result && _buffer) - buffer = trace_buffer_create(trace_screen(texture->screen), buffer); + buffer = trace_buffer_create(trace_screen(_texture->screen), buffer); if (_buffer) *_buffer = buffer; diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 5b1e26a52d..26f1c04594 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -462,6 +462,7 @@ trace_screen_surface_buffer_create(struct pipe_screen *_screen, unsigned width, unsigned height, enum pipe_format format, unsigned usage, + unsigned tex_usage, unsigned *pstride) { struct trace_screen *tr_scr = trace_screen(_screen); @@ -476,11 +477,13 @@ trace_screen_surface_buffer_create(struct pipe_screen *_screen, trace_dump_arg(uint, height); trace_dump_arg(format, format); trace_dump_arg(uint, usage); + trace_dump_arg(uint, tex_usage); result = screen->surface_buffer_create(screen, width, height, format, usage, + tex_usage, pstride); stride = *pstride; |