diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300')
56 files changed, 4919 insertions, 4209 deletions
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 7460410ee6..fe775eac99 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -27,16 +27,19 @@ COMMON_SOURCES = \ ../common/dri_util.c RADEON_COMMON_SOURCES = \ - radeon_texture.c \ + radeon_bo_legacy.c \ + radeon_buffer_objects.c \ radeon_common_context.c \ radeon_common.c \ + radeon_cs_legacy.c \ radeon_dma.c \ + radeon_debug.c \ + radeon_fbo.c \ radeon_lock.c \ - radeon_bo_legacy.c \ - radeon_cs_legacy.c \ radeon_mipmap_tree.c \ radeon_span.c \ - radeon_fbo.c + radeon_queryobj.c \ + radeon_texture.c DRIVER_SOURCES = \ radeon_screen.c \ @@ -48,17 +51,8 @@ DRIVER_SOURCES = \ r300_render.c \ r300_tex.c \ r300_texstate.c \ - radeon_program.c \ - radeon_program_alu.c \ - radeon_program_pair.c \ - radeon_nqssadce.c \ r300_vertprog.c \ r300_fragprog_common.c \ - r300_fragprog.c \ - r300_fragprog_swizzle.c \ - r300_fragprog_emit.c \ - r500_fragprog.c \ - r500_fragprog_emit.c \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ @@ -68,15 +62,22 @@ DRIVER_SOURCES = \ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) -DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \ - -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 \ +DRIVER_DEFINES = -DRADEON_R300 # -DRADEON_BO_TRACK \ -Wall DRI_LIB_DEPS += $(RADEON_LDFLAGS) +PIPE_DRIVERS = compiler/libr300compiler.a + ##### TARGETS ##### include ../Makefile.template symlinks: + +# Mark the archive phony so that we always check for recompilation +.PHONY : compiler/libr300compiler.a + +compiler/libr300compiler.a: + cd compiler && $(MAKE) diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile new file mode 100644 index 0000000000..d973844192 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -0,0 +1,75 @@ +# src/mesa/drivers/dri/r300/compiler/Makefile + +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = r300compiler + +C_SOURCES = \ + radeon_code.c \ + radeon_compiler.c \ + radeon_nqssadce.c \ + radeon_program.c \ + radeon_program_alu.c \ + radeon_program_pair.c \ + r3xx_fragprog.c \ + r300_fragprog.c \ + r300_fragprog_swizzle.c \ + r300_fragprog_emit.c \ + r500_fragprog.c \ + r500_fragprog_emit.c \ + r3xx_vertprog.c \ + r3xx_vertprog_dump.c \ + \ + memory_pool.c + + +### Basic defines ### + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + +INCLUDES = \ + -I. \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + + +##### TARGETS ##### + +default: depend lib$(LIBNAME).a + +lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/configs/current + $(MKLIB) -o $(LIBNAME) -static $(OBJECTS) + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + +# Remove .o and backup files +clean: + rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak + +# Dummy target +install: + @echo -n "" + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + + +sinclude depend diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c new file mode 100644 index 0000000000..37aa2b6579 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.c @@ -0,0 +1,95 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "memory_pool.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + + +#define POOL_LARGE_ALLOC 4096 +#define POOL_ALIGN 4 + + +struct memory_block { + struct memory_block * next; +}; + +void memory_pool_init(struct memory_pool * pool) +{ + memset(pool, 0, sizeof(struct memory_pool)); +} + + +void memory_pool_destroy(struct memory_pool * pool) +{ + while(pool->blocks) { + struct memory_block * block = pool->blocks; + pool->blocks = block->next; + free(block); + } +} + +static void refill_pool(struct memory_pool * pool) +{ + unsigned int blocksize = pool->total_allocated; + struct memory_block * newblock; + + if (!blocksize) + blocksize = 2*POOL_LARGE_ALLOC; + + newblock = (struct memory_block*)malloc(blocksize); + newblock->next = pool->blocks; + pool->blocks = newblock; + + pool->head = (unsigned char*)(newblock + 1); + pool->end = ((unsigned char*)newblock) + blocksize; + pool->total_allocated += blocksize; +} + + +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) +{ + if (bytes < POOL_LARGE_ALLOC) { + if (pool->head + bytes > pool->end) + refill_pool(pool); + + assert(pool->head + bytes <= pool->end); + + void * ptr = pool->head; + + pool->head += bytes; + pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); + + return ptr; + } else { + struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block)); + + block->next = pool->blocks; + pool->blocks = block; + + return (block + 1); + } +} + + diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h new file mode 100644 index 0000000000..ce23c319ad --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h @@ -0,0 +1,49 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef MEMORY_POOL_H +#define MEMORY_POOL_H + +struct memory_block; + +/** + * Provides a pool of memory that can quickly be allocated from, at the + * cost of being unable to explicitly free one of the allocated blocks. + * Instead, the entire pool can be freed at once. + * + * The idea is to allow one to quickly allocate a flexible amount of + * memory during operations like shader compilation while avoiding + * reference counting headaches. + */ +struct memory_pool { + unsigned char * head; + unsigned char * end; + unsigned int total_allocated; + struct memory_block * blocks; +}; + + +void memory_pool_init(struct memory_pool * pool); +void memory_pool_destroy(struct memory_pool * pool); +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); + +#endif /* MEMORY_POOL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c new file mode 100644 index 0000000000..6c9fba4914 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -0,0 +1,416 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r300_fragprog.h" + +#include "shader/prog_parameter.h" + +#include "../r300_reg.h" + +static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) +{ + struct prog_src_register reg = { 0, }; + + reg.File = PROGRAM_STATE_VAR; + reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + */ +GLboolean r300_transform_TEX( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + + if (inst->I.Opcode != OPCODE_TEX && + inst->I.Opcode != OPCODE_TXB && + inst->I.Opcode != OPCODE_TXP && + inst->I.Opcode != OPCODE_KIL) + return GL_FALSE; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->I.Opcode != OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + inst->I.Opcode = OPCODE_MOV; + + if (comparefunc == GL_ALWAYS) { + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); + } + + return GL_TRUE; + } else { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); + struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); + int pass, fail; + + inst_rcp->I.Opcode = OPCODE_RCP; + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + inst_cmp->I.DstReg = inst->I.DstReg; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = rc_find_free_temporary(c); + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst_mad->I.Opcode = OPCODE_MAD; + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; + inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + else + inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + + inst_cmp->I.Opcode = OPCODE_CMP; + /* DstReg has been filled out above */ + inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; + inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; + inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); + } + } + + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + */ + if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) { + struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev); + + inst_mul->I.Opcode = OPCODE_MUL; + inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mul->I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR; + inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit); + + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index; + } + + /* Cannot write texture to output registers or with masks */ + if (inst->I.Opcode != OPCODE_KIL && + (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg = inst->I.DstReg; + inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + } + + + /* Cannot read texture coordinate from constants file */ + if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; + } + + return GL_TRUE; +} + +/* just some random things... */ +void r300FragmentProgramDump(struct rX00_fragment_program_code *c) +{ + struct r300_fragment_program_code *code = &c->code.r300; + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n <= (code->config & 3); n++) { + uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; + int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT; + int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT; + int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; + int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; + + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n, + alu_offset, tex_offset, alu_end, tex_end, code_addr); + + if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { + fprintf(stderr, " TEX:\n"); + for (i = tex_offset; + i <= tex_offset + tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = alu_offset; + i <= alu_offset + alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_addr >> (j * 6); + int rega = code->alu.inst[i].alpha_addr >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + code->alu.inst[i].rgb_addr, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].alpha_addr); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_inst >> (j * 7); + int rega = code->alu.inst[i].alpha_inst >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].rgb_inst, arga[0], arga[1], + arga[2], code->alu.inst[i].alpha_inst); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h new file mode 100644 index 0000000000..0ac46dbd9c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "radeon_compiler.h" +#include "radeon_program.h" + + +extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); + +extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); + +extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index b75656e7ee..c7227bbd15 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -40,57 +40,43 @@ #include "r300_fragprog.h" +#include "../r300_reg.h" + #include "radeon_program_pair.h" #include "r300_fragprog_swizzle.h" -#include "r300_reg.h" +struct r300_emit_state { + struct r300_fragment_program_compiler * compiler; + + unsigned current_node : 2; + unsigned node_first_tex : 8; + unsigned node_first_alu : 8; + uint32_t node_flags; +}; + #define PROG_CODE \ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r300_fragment_program_code *code = &c->code->r300 + struct r300_emit_state * emit = (struct r300_emit_state*)data; \ + struct r300_fragment_program_compiler *c = emit->compiler; \ + struct r300_fragment_program_code *code = &c->code->code.r300 #define error(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ __FILE__, __FUNCTION__, ##args); \ } while(0) -static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex) -{ - PROG_CODE; - - for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { - if (code->constant[*hwindex].File == file && - code->constant[*hwindex].Index == index) - break; - } - - if (*hwindex >= code->const_nr) { - if (*hwindex >= R300_PFS_NUM_CONST_REGS) { - error("Out of hw constants!\n"); - return GL_FALSE; - } - - code->const_nr++; - code->constant[*hwindex].File = file; - code->constant[*hwindex].Index = index; - } - - return GL_TRUE; -} - - /** * Mark a temporary register as used. */ static void use_temporary(struct r300_fragment_program_code *code, GLuint index) { - if (index > code->max_temp_idx) - code->max_temp_idx = index; + if (index > code->pixsize) + code->pixsize = index; } -static GLuint translate_rgb_opcode(GLuint opcode) +static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R300_ALU_OUTC_CMP; @@ -109,7 +95,7 @@ static GLuint translate_rgb_opcode(GLuint opcode) } } -static GLuint translate_alpha_opcode(GLuint opcode) +static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R300_ALU_OUTA_CMP; @@ -145,63 +131,62 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) int ip = code->alu.length++; int j; - code->node[code->cur_node].alu_end++; - code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode); - code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode); + code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); + code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); if (!inst->RGB.Src[j].Constant) use_temporary(code, inst->RGB.Src[j].Index); - code->alu.inst[ip].inst1 |= src << (6*j); + code->alu.inst[ip].rgb_addr |= src << (6*j); src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); if (!inst->Alpha.Src[j].Constant) use_temporary(code, inst->Alpha.Src[j].Index); - code->alu.inst[ip].inst3 |= src << (6*j); + code->alu.inst[ip].alpha_addr |= src << (6*j); GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); arg |= inst->RGB.Arg[j].Abs << 6; arg |= inst->RGB.Arg[j].Negate << 5; - code->alu.inst[ip].inst0 |= arg << (7*j); + code->alu.inst[ip].rgb_inst |= arg << (7*j); arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); arg |= inst->Alpha.Arg[j].Abs << 6; arg |= inst->Alpha.Arg[j].Negate << 5; - code->alu.inst[ip].inst2 |= arg << (7*j); + code->alu.inst[ip].alpha_inst |= arg << (7*j); } if (inst->RGB.Saturate) - code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP; + code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; if (inst->Alpha.Saturate) - code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP; + code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; if (inst->RGB.WriteMask) { use_temporary(code, inst->RGB.DestIndex); - code->alu.inst[ip].inst1 |= + code->alu.inst[ip].rgb_addr |= (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); } if (inst->RGB.OutputWriteMask) { - code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); - code->node[code->cur_node].flags |= R300_RGBA_OUT; + code->alu.inst[ip].rgb_addr |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); + emit->node_flags |= R300_RGBA_OUT; } if (inst->Alpha.WriteMask) { use_temporary(code, inst->Alpha.DestIndex); - code->alu.inst[ip].inst3 |= + code->alu.inst[ip].alpha_addr |= (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG; } if (inst->Alpha.OutputWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT; - code->node[code->cur_node].flags |= R300_RGBA_OUT; + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT; + emit->node_flags |= R300_RGBA_OUT; } if (inst->Alpha.DepthWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; - code->node[code->cur_node].flags |= R300_W_OUT; - c->fp->writes_depth = GL_TRUE; + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; + emit->node_flags |= R300_W_OUT; + c->code->writes_depth = GL_TRUE; } return GL_TRUE; @@ -211,31 +196,50 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) /** * Finish the current node without advancing to the next one. */ -static GLboolean finish_node(struct r300_fragment_program_compiler *c) +static GLboolean finish_node(struct r300_emit_state * emit) { - struct r300_fragment_program_code *code = &c->code->r300; - struct r300_fragment_program_node *node = &code->node[code->cur_node]; + struct r300_fragment_program_compiler * c = emit->compiler; + struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; - if (node->alu_end < 0) { + if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ struct radeon_pair_instruction inst; _mesa_bzero(&inst, sizeof(inst)); - if (!emit_alu(c, &inst)) + if (!emit_alu(emit, &inst)) return GL_FALSE; } - if (node->tex_end < 0) { - if (code->cur_node == 0) { - node->tex_end = 0; - } else { - error("Node %i has no TEX instructions", code->cur_node); + unsigned alu_offset = emit->node_first_alu; + unsigned alu_end = code->alu.length - alu_offset - 1; + unsigned tex_offset = emit->node_first_tex; + unsigned tex_end = code->tex.length - tex_offset - 1; + + if (code->tex.length == emit->node_first_tex) { + if (emit->current_node > 0) { + error("Node %i has no TEX instructions", emit->current_node); return GL_FALSE; } + + tex_end = 0; } else { - if (code->cur_node == 0) - code->first_node_has_tex = 1; + if (emit->current_node == 0) + code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; } + /* Write the config register. + * Note: The order in which the words for each node are written + * is not correct here and needs to be fixed up once we're entirely + * done + * + * Also note that the register specification from AMD is slightly + * incorrect in its description of this register. */ + code->code_addr[emit->current_node] = + (alu_offset << R300_ALU_START_SHIFT) | + (alu_end << R300_ALU_SIZE_SHIFT) | + (tex_offset << R300_TEX_START_SHIFT) | + (tex_end << R300_TEX_SIZE_SHIFT) | + emit->node_flags; + return GL_TRUE; } @@ -248,30 +252,28 @@ static GLboolean begin_tex(void* data) { PROG_CODE; - if (code->cur_node == 0) { - if (code->node[0].alu_end < 0 && - code->node[0].tex_end < 0) - return GL_TRUE; + if (code->alu.length == emit->node_first_alu && + code->tex.length == emit->node_first_tex) { + return GL_TRUE; } - if (code->cur_node == 3) { + if (emit->current_node == 3) { error("Too many texture indirections"); return GL_FALSE; } - if (!finish_node(c)) + if (!finish_node(emit)) return GL_FALSE; - struct r300_fragment_program_node *node = &code->node[++code->cur_node]; - node->alu_offset = code->alu.length; - node->alu_end = -1; - node->tex_offset = code->tex.length; - node->tex_end = -1; + emit->current_node++; + emit->node_first_tex = code->tex.length; + emit->node_first_alu = code->alu.length; + emit->node_flags = 0; return GL_TRUE; } -static GLboolean emit_tex(void* data, struct prog_instruction* inst) +static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst) { PROG_CODE; @@ -281,31 +283,30 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst) } GLuint unit = inst->TexSrcUnit; - GLuint dest = inst->DstReg.Index; + GLuint dest = inst->DestIndex; GLuint opcode; switch(inst->Opcode) { - case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; - case OPCODE_TEX: opcode = R300_TEX_OP_LD; break; - case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; - case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; default: error("Unknown texture opcode %i", inst->Opcode); return GL_FALSE; } - if (inst->Opcode == OPCODE_KIL) { + if (inst->Opcode == RADEON_OPCODE_KIL) { unit = 0; dest = 0; } else { use_temporary(code, dest); } - use_temporary(code, inst->SrcReg[0].Index); + use_temporary(code, inst->SrcIndex); - code->node[code->cur_node].tex_end++; code->tex.inst[code->tex.length++] = - (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | + (inst->SrcIndex << R300_SRC_ADDR_SHIFT) | (dest << R300_DST_ADDR_SHIFT) | (unit << R300_TEX_ID_SHIFT) | (opcode << R300_TEX_INST_SHIFT); @@ -314,7 +315,6 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst) static const struct radeon_pair_handler pair_handler = { - .EmitConst = &emit_const, .EmitPaired = &emit_alu, .EmitTex = &emit_tex, .BeginTexBlock = &begin_tex, @@ -325,20 +325,36 @@ static const struct radeon_pair_handler pair_handler = { * Final compilation step: Turn the intermediate radeon_program into * machine-readable instructions. */ -GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { - struct r300_fragment_program_code *code = &compiler->code->r300; - - _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); - code->node[0].alu_end = -1; - code->node[0].tex_end = -1; + struct r300_emit_state emit; + struct r300_fragment_program_code *code = &compiler->code->code.r300; - if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) - return GL_FALSE; + memset(&emit, 0, sizeof(emit)); + emit.compiler = compiler; - if (!finish_node(compiler)) - return GL_FALSE; + _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); - return GL_TRUE; + radeonPairProgram(compiler, &pair_handler, &emit); + if (compiler->Base.Error) + return; + + /* Finish the program */ + finish_node(&emit); + + code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + code->code_offset = + (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | + (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); + + if (emit.current_node < 3) { + int shift = 3 - emit.current_node; + int i; + for(i = emit.current_node; i >= 0; --i) + code->code_addr[shift + i] = code->code_addr[i]; + for(i = 0; i < shift; ++i) + code->code_addr[i] = 0; + } } - diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index fc9d855bce..1b14cc3888 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -33,8 +33,9 @@ #include "r300_fragprog_swizzle.h" -#include "r300_reg.h" +#include "../r300_reg.h" #include "radeon_nqssadce.h" +#include "radeon_compiler.h" #define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) @@ -174,18 +175,15 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, } } - struct prog_instruction *inst; - - _mesa_insert_instructions(s->Program, s->IP, 1); - inst = s->Program->Instructions + s->IP++; - inst->Opcode = OPCODE_MOV; - inst->DstReg = dst; - inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); - inst->SrcReg[0] = src; - inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; + struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg = dst; + inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); + inst->I.SrcReg[0] = src; + inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ - dst.WriteMask &= ~inst->DstReg.WriteMask; + dst.WriteMask &= ~inst->I.DstReg.WriteMask; } } diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h index 231bf4eef5..231bf4eef5 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c new file mode 100644 index 0000000000..76c3a7ecfd --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -0,0 +1,149 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_statevars.h" + +#include "radeon_nqssadce.h" +#include "radeon_program_alu.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + + +static void nqssadce_init(struct nqssadce_state* s) +{ + struct r300_fragment_program_compiler * c = s->UserData; + s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW; + s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W; +} + +static void rewrite_depth_out(struct r300_fragment_program_compiler * c) +{ + struct rc_instruction *rci; + + for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { + struct prog_instruction * inst = &rci->I; + + if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth) + continue; + + if (inst->DstReg.WriteMask & WRITEMASK_Z) { + inst->DstReg.WriteMask = WRITEMASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; + } + } +} + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +{ + rewrite_depth_out(c); + + if (c->is_r500) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(&c->Base, 4, transformations); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(&c->Base, 3, transformations); + } + + if (c->Base.Debug) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); + } + + if (c->is_r500) { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle + }; + radeonNqssaDce(&c->Base, &nqssadce, c); + } else { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle + }; + radeonNqssaDce(&c->Base, &nqssadce, c); + } + + if (c->Base.Debug) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); + } + + if (c->is_r500) { + r500BuildFragmentProgramHwCode(c); + } else { + r300BuildFragmentProgramHwCode(c); + } + + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); + + if (c->Base.Debug) { + if (c->is_r500) { + r500FragmentProgramDump(c->code); + } else { + r300FragmentProgramDump(c->code); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c new file mode 100644 index 0000000000..dad27fc98e --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,656 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "../r300_reg.h" + +#include "radeon_nqssadce.h" +#include "radeon_program.h" +#include "radeon_program_alu.h" + +#include "shader/prog_print.h" + + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(vpi->SrcReg[x].File), \ + NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + + +static unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & WRITEMASK_XYZW; +} + +static unsigned long t_dst_class(gl_register_file file) +{ + + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case PROGRAM_OUTPUT: + return PVS_DST_REG_OUT; + case PROGRAM_ADDRESS: + return PVS_DST_REG_A0; + /* + case PROGRAM_INPUT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(gl_register_file file) +{ + switch (file) { + case PROGRAM_BUILTIN: + case PROGRAM_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case PROGRAM_INPUT: + return PVS_SRC_REG_INPUT; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_STATE_VAR: + return PVS_SRC_REG_CONSTANT; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b) +{ + unsigned long aclass = t_src_class(a.File); + unsigned long bclass = t_src_class(b.File); + + if (aclass != bclass) + return GL_FALSE; + if (aclass == PVS_SRC_REG_TEMPORARY) + return GL_FALSE; + + if (a.RelAddr || b.RelAddr) + return GL_TRUE; + if (a.Index != b.Index) + return GL_TRUE; + + return GL_FALSE; +} + +static INLINE unsigned long t_swizzle(GLubyte swizzle) +{ + /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + if (src->File == PROGRAM_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src->RelAddr << 4); +} + +static GLboolean valid_dst(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } + + return GL_TRUE; +} + +static void ei_vector1(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); +} + +static void ei_vector2(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); +} + +static void ei_math1(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); +} + +static void ei_lit(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); +} + +static void ei_mad(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + /* Remarks about hardware limitations of MAD + * (please preserve this comment, as this information is _NOT_ + * in the documentation provided by AMD). + * + * As described in the documentation, MAD with three unique temporary + * source registers requires the use of the macro version. + * + * However (and this is not mentioned in the documentation), apparently + * the macro version is _NOT_ a full superset of the normal version. + * In particular, the macro version does not always work when relative + * addressing is used in the source operands. + * + * This limitation caused incorrect rendering in Sauerbraten's OpenGL + * assembly shader path when using medium quality animations + * (i.e. animations with matrix blending instead of quaternion blending). + * + * Unfortunately, I (nha) have been unable to extract a Piglit regression + * test for this issue - for some reason, it is possible to have vertex + * programs whose prefix is *exactly* the same as the prefix of the + * offending program in Sauerbraten up to the offending instruction + * without causing any trouble. + * + * Bottom line: Only use the macro version only when really necessary; + * according to AMD docs, this should improve performance by one clock + * as a nice side bonus. + */ + if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY && + vpi->SrcReg[1].File == PROGRAM_TEMPORARY && + vpi->SrcReg[2].File == PROGRAM_TEMPORARY && + vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && + vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && + vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + GL_FALSE, + GL_TRUE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } else { + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = t_src(vp, &vpi->SrcReg[2]); +} + +static void ei_pow(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); +} + + +static void translate_vertex_program(struct r300_vertex_program_compiler * compiler) +{ + struct rc_instruction *rci; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + + compiler->SetHwInputOutput(compiler); + + for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { + struct prog_instruction *vpi = &rci->I; + GLuint *inst = compiler->code->body.d + compiler->code->length; + + /* Skip instructions writing to non-existing destination */ + if (!valid_dst(compiler->code, &vpi->DstReg)) + continue; + + if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + rc_error(&compiler->Base, "Vertex program has too many instructions\n"); + return; + } + + switch (vpi->Opcode) { + case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + default: + rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode); + return; + } + + compiler->code->length += 4; + + if (compiler->Base.Error) + return; + } +} + +struct temporary_allocation { + GLuint Allocated:1; + GLuint HwTemp:15; + struct rc_instruction * LastRead; +}; + +static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) +{ + struct rc_instruction *inst; + GLuint num_orig_temps = 0; + GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + struct temporary_allocation * ta; + GLuint i, j; + + compiler->code->num_temporaries = 0; + memset(hwtemps, 0, sizeof(hwtemps)); + + /* Pass 1: Count original temporaries and allocate structures */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + if (inst->I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->I.SrcReg[i].Index + 1; + } + } + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + if (inst->I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->I.DstReg.Index + 1; + } + } + } + + ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, + sizeof(struct temporary_allocation) * num_orig_temps); + memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); + + /* Pass 2: Determine original temporary lifetimes */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) + ta[inst->I.SrcReg[i].Index].LastRead = inst; + } + } + + /* Pass 3: Register allocation */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + GLuint orig = inst->I.SrcReg[i].Index; + inst->I.SrcReg[i].Index = ta[orig].HwTemp; + + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = GL_FALSE; + } + } + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + GLuint orig = inst->I.DstReg.Index; + + if (!ta[orig].Allocated) { + for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { + if (!hwtemps[j]) + break; + } + if (j >= VSF_MAX_FRAGMENT_TEMPS) { + fprintf(stderr, "Out of hw temporaries\n"); + } else { + ta[orig].Allocated = GL_TRUE; + ta[orig].HwTemp = j; + hwtemps[j] = GL_TRUE; + + if (j >= compiler->code->num_temporaries) + compiler->code->num_temporaries = j + 1; + } + } + + inst->I.DstReg.Index = ta[orig].HwTemp; + } + } + } +} + + +/** + * Vertex engine cannot read two inputs or two constants at the same time. + * Introduce intermediate MOVs to temporary registers to account for this. + */ +static GLboolean transform_source_conflicts( + struct radeon_compiler *c, + struct rc_instruction* inst, + void* unused) +{ + GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode); + + if (num_operands == 3) { + if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2]) + || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = tmpreg; + inst_mov->I.SrcReg[0] = inst->I.SrcReg[2]; + + reset_srcreg(&inst->I.SrcReg[2]); + inst->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[2].Index = tmpreg; + } + } + + if (num_operands >= 2) { + if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = tmpreg; + inst_mov->I.SrcReg[0] = inst->I.SrcReg[1]; + + reset_srcreg(&inst->I.SrcReg[1]); + inst->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[1].Index = tmpreg; + } + } + + return GL_TRUE; +} + +static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) +{ + int i; + + for(i = 0; i < 32; ++i) { + if ((compiler->RequiredOutputs & (1 << i)) && + !(compiler->Base.Program.OutputsWritten & (1 << i))) { + struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = i; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst->I.SrcReg[0].File = PROGRAM_CONSTANT; + inst->I.SrcReg[0].Index = 0; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + compiler->Base.Program.OutputsWritten |= 1 << i; + } + } +} + +static void nqssadceInit(struct nqssadce_state* s) +{ + struct r300_vertex_program_compiler * compiler = s->UserData; + int i; + + for(i = 0; i < VERT_RESULT_MAX; ++i) { + if (compiler->RequiredOutputs & (1 << i)) + s->Outputs[i].Sourced = WRITEMASK_XYZW; + } +} + +static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +{ + (void) opcode; + (void) reg; + + return GL_TRUE; +} + + + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) +{ + addArtificialOutputs(compiler); + + { + struct radeon_program_transformation transformations[] = { + { &r300_transform_vertex_alu, 0 }, + }; + radeonLocalTransform(&compiler->Base, 1, transformations); + } + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stderr); + } + + { + /* Note: This pass has to be done seperately from ALU rewrite, + * otherwise non-native ALU instructions with source conflits + * will not be treated properly. + */ + struct radeon_program_transformation transformations[] = { + { &transform_source_conflicts, 0 }, + }; + radeonLocalTransform(&compiler->Base, 1, transformations); + } + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after source conflict resolve:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stderr); + } + + { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadceInit, + .IsNativeSwizzle = &swizzleIsNative, + .BuildSwizzle = NULL + }; + radeonNqssaDce(&compiler->Base, &nqssadce, compiler); + + /* We need this step for reusing temporary registers */ + allocate_temporary_registers(compiler); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after NQSSADCE:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stderr); + } + } + + translate_vertex_program(compiler); + + rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants); + + compiler->code->InputsRead = compiler->Base.Program.InputsRead; + compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten; + + if (compiler->Base.Debug) { + fprintf(stderr, "Final vertex program code:\n"); + r300_vertex_program_dump(compiler->code); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c new file mode 100644 index 0000000000..980ef3eaea --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c @@ -0,0 +1,177 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_code.h" + +#include <stdio.h> + +static char* r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +static char* r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +/* XXX refactor to avoid clashing symbols */ +static char* r300_vs_src_debug[] = { + "t", + "i", + "c", + "a", +}; + +static char* r300_vs_dst_debug[] = { + "t", + "a0", + "o", + "ox", + "a", + "i", + "u", + "u", +}; + +static char* r300_vs_swiz_debug[] = { + "X", + "Y", + "Z", + "W", + "0", + "1", + "U", + "U", +}; + + +static void r300_vs_op_dump(uint32_t op) +{ + fprintf(stderr, " dst: %d%s op: ", + (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if (op & 0x80) { + if (op & 0x1) { + fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); + } +} + +static void r300_vs_src_dump(uint32_t src) +{ + fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", + (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3], + src & (1 << 25) ? "-" : " ", + r300_vs_swiz_debug[(src >> 13) & 0x7], + src & (1 << 26) ? "-" : " ", + r300_vs_swiz_debug[(src >> 16) & 0x7], + src & (1 << 27) ? "-" : " ", + r300_vs_swiz_debug[(src >> 19) & 0x7], + src & (1 << 28) ? "-" : " ", + r300_vs_swiz_debug[(src >> 22) & 0x7]); +} + +void r300_vertex_program_dump(struct r300_vertex_program_code * vs) +{ + unsigned instrcount = vs->length / 4; + unsigned i; + + for(i = 0; i < instrcount; i++) { + unsigned offset = i*4; + unsigned src; + + fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); + r300_vs_op_dump(vs->body.d[offset]); + + for(src = 0; src < 3; ++src) { + fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]); + r300_vs_src_dump(vs->body.d[offset+1+src]); + } + } +} diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 4d58cf2162..7e2faed690 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -27,158 +27,141 @@ #include "r500_fragprog.h" -static void reset_srcreg(struct prog_src_register* reg) -{ - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; -} +#include "../r300_reg.h" -static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) +static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) { - gl_state_index fail_value_tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 - }; struct prog_src_register reg = { 0, }; - fail_value_tokens[2] = tmu; reg.File = PROGRAM_STATE_VAR; - reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); + reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); reg.Swizzle = SWIZZLE_WWWW; return reg; } /** * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - * + * - implement texture compare (shadow extensions) + * - extract non-native source / destination operands */ GLboolean r500_transform_TEX( - struct radeon_transform_context *t, - struct prog_instruction* orig_inst, void* data) + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) { struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; - struct prog_instruction inst = *orig_inst; - struct prog_instruction* tgt; - GLboolean destredirect = GL_FALSE; - - if (inst.Opcode != OPCODE_TEX && - inst.Opcode != OPCODE_TXB && - inst.Opcode != OPCODE_TXP && - inst.Opcode != OPCODE_KIL) + + if (inst->I.Opcode != OPCODE_TEX && + inst->I.Opcode != OPCODE_TXB && + inst->I.Opcode != OPCODE_TXP && + inst->I.Opcode != OPCODE_KIL) return GL_FALSE; /* ARB_shadow & EXT_shadow_funcs */ - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + if (inst->I.Opcode != OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(t->Program, 1); + inst->I.Opcode = OPCODE_MOV; - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = inst.DstReg; if (comparefunc == GL_ALWAYS) { - tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_1111; + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; } else { - tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); + inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); } + return GL_TRUE; + } else { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); + struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); + int pass, fail; + + inst_rcp->I.Opcode = OPCODE_RCP; + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + inst_cmp->I.DstReg = inst->I.DstReg; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = rc_find_free_temporary(c); + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst_mad->I.Opcode = OPCODE_MAD; + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; + inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + else + inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + + inst_cmp->I.Opcode = OPCODE_CMP; + /* DstReg has been filled out above */ + inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; + inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; + inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); } + } - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = radeonFindFreeTemporary(t); - inst.DstReg.WriteMask = WRITEMASK_XYZW; - } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) { - int tempreg = radeonFindFreeTemporary(t); + /* Cannot write texture to output registers */ + if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; - inst.DstReg.WriteMask = WRITEMASK_XYZW; - destredirect = GL_TRUE; - } + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg = inst->I.DstReg; + inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); - if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { - int tmpreg = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(t->Program, 1); - tgt->Opcode = OPCODE_MOV; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tmpreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tmpreg; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; } - tgt = radeonAppendInstructions(t->Program, 1); - _mesa_copy_instructions(tgt, &inst, 1); - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = radeonFindFreeTemporary(t); - int pass, fail; - - tgt = radeonAppendInstructions(t->Program, 3); - - tgt[0].Opcode = OPCODE_RCP; - tgt[0].DstReg.File = PROGRAM_TEMPORARY; - tgt[0].DstReg.Index = rcptemp; - tgt[0].DstReg.WriteMask = WRITEMASK_W; - tgt[0].SrcReg[0] = inst.SrcReg[0]; - tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; - - tgt[1].Opcode = OPCODE_MAD; - tgt[1].DstReg = inst.DstReg; - tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; - tgt[1].SrcReg[0] = inst.SrcReg[0]; - tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[1].Index = rcptemp; - tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; - tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[2].Index = inst.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; - else - tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; - - tgt[2].Opcode = OPCODE_CMP; - tgt[2].DstReg = orig_inst->DstReg; - tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; - tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; - - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } + /* Cannot read texture coordinate from constants file */ + if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; - tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; - tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); - } else if (destredirect) { - tgt = radeonAppendInstructions(t->Program, 1); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = orig_inst->DstReg; - tgt->SrcReg[0].File = PROGRAM_TEMPORARY; - tgt->SrcReg[0].Index = inst.DstReg.Index; + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; } return GL_TRUE; @@ -249,7 +232,6 @@ GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) */ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) { - struct prog_instruction *inst; GLuint negatebase[2] = { 0, 0 }; int i; @@ -260,20 +242,16 @@ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, negatebase[GET_BIT(src.Negate, i)] |= 1 << i; } - _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); - inst = s->Program->Instructions + s->IP; - for(i = 0; i <= 1; ++i) { if (!negatebase[i]) continue; - inst->Opcode = OPCODE_MOV; - inst->DstReg = dst; - inst->DstReg.WriteMask = negatebase[i]; - inst->SrcReg[0] = src; - inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; - inst++; - s->IP++; + struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg = dst; + inst->I.DstReg.WriteMask = negatebase[i]; + inst->I.SrcReg[0] = src; + inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; } } @@ -375,9 +353,9 @@ static char *to_texop(int val) return NULL; } -void r500FragmentProgramDump(union rX00_fragment_program_code *c) +void r500FragmentProgramDump(struct rX00_fragment_program_code *c) { - struct r500_fragment_program_code *code = &c->r500; + struct r500_fragment_program_code *code = &c->code.r500; fprintf(stderr, "R500 Fragment Program:\n--------\n"); int n; @@ -385,15 +363,6 @@ void r500FragmentProgramDump(union rX00_fragment_program_code *c) uint32_t inst0; char *str = NULL; - if (code->const_nr) { - fprintf(stderr, "--------\nConstants:\n"); - for (n = 0; n < code->const_nr; n++) { - fprintf(stderr, "Constant %d: %i[%i]\n", n, - code->constant[n].File, code->constant[n].Index); - } - fprintf(stderr, "--------\n"); - } - for (n = 0; n < code->inst_end+1; n++) { inst0 = inst = code->inst[n].inst0; fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 1179bf6607..9091f65cd2 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -36,17 +36,20 @@ #include "shader/prog_parameter.h" #include "shader/prog_instruction.h" -#include "r300_context.h" +#include "radeon_compiler.h" #include "radeon_nqssadce.h" -extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); +extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); -extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); +extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); -extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); +extern GLboolean r500_transform_TEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); #endif diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 30f4514897..d694725c9b 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -45,47 +45,22 @@ #include "r500_fragprog.h" +#include "../r300_reg.h" + #include "radeon_program_pair.h" #define PROG_CODE \ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r500_fragment_program_code *code = &c->code->r500 + struct r500_fragment_program_code *code = &c->code->code.r500 #define error(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ __FILE__, __FUNCTION__, ##args); \ } while(0) -/** - * Callback to register hardware constants. - */ -static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex) -{ - PROG_CODE; - - for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { - if (code->constant[*hwindex].File == file && - code->constant[*hwindex].Index == idx) - break; - } - - if (*hwindex >= code->const_nr) { - if (*hwindex >= R500_PFS_NUM_CONST_REGS) { - error("Out of hw constants!\n"); - return GL_FALSE; - } - - code->const_nr++; - code->constant[*hwindex].File = file; - code->constant[*hwindex].Index = idx; - } - - return GL_TRUE; -} - -static GLuint translate_rgb_op(GLuint opcode) +static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; @@ -106,7 +81,7 @@ static GLuint translate_rgb_op(GLuint opcode) } } -static GLuint translate_alpha_op(GLuint opcode) +static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R500_ALPHA_OP_CMP; @@ -189,8 +164,8 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) int ip = ++code->inst_end; - code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode); - code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode); + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) code->inst[ip].inst0 = R500_INST_TYPE_OUT; @@ -202,7 +177,7 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); if (inst->Alpha.DepthWriteMask) { code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; - c->fp->writes_depth = GL_TRUE; + c->code->writes_depth = GL_TRUE; } code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); @@ -234,19 +209,19 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) return GL_TRUE; } -static GLuint translate_strq_swizzle(struct prog_src_register src) +static GLuint translate_strq_swizzle(GLuint swizzle) { GLuint swiz = 0; int i; for (i = 0; i < 4; i++) - swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2; + swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; return swiz; } /** * Emit a single TEX instruction */ -static GLboolean emit_tex(void *data, struct prog_instruction *inst) +static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst) { PROG_CODE; @@ -258,7 +233,7 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst) int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_TEX - | (inst->DstReg.WriteMask << 11) + | (inst->WriteMask << 11) | R500_INST_TEX_SEM_WAIT; code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; @@ -267,25 +242,25 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst) code->inst[ip].inst1 |= R500_TEX_UNSCALED; switch (inst->Opcode) { - case OPCODE_KIL: + case RADEON_OPCODE_KIL: code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; break; - case OPCODE_TEX: + case RADEON_OPCODE_TEX: code->inst[ip].inst1 |= R500_TEX_INST_LD; break; - case OPCODE_TXB: + case RADEON_OPCODE_TXB: code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; break; - case OPCODE_TXP: + case RADEON_OPCODE_TXP: code->inst[ip].inst1 |= R500_TEX_INST_PROJ; break; default: error("emit_tex can't handle opcode %x\n", inst->Opcode); } - code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) - | (translate_strq_swizzle(inst->SrcReg[0]) << 8) - | R500_TEX_DST_ADDR(inst->DstReg.Index) + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex) + | (translate_strq_swizzle(inst->SrcSwizzle) << 8) + | R500_TEX_DST_ADDR(inst->DestIndex) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; @@ -293,35 +268,32 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst) } static const struct radeon_pair_handler pair_handler = { - .EmitConst = emit_const, .EmitPaired = emit_paired, .EmitTex = emit_tex, .MaxHwTemps = 128 }; -GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { - struct r500_fragment_program_code *code = &compiler->code->r500; + struct r500_fragment_program_code *code = &compiler->code->code.r500; _mesa_bzero(code, sizeof(*code)); code->max_temp_idx = 1; - code->inst_offset = 0; code->inst_end = -1; - if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) - return GL_FALSE; + radeonPairProgram(compiler, &pair_handler, compiler); + if (compiler->Base.Error) + return; if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { /* This may happen when dead-code elimination is disabled or * when most of the fragment program logic is leading to a KIL */ if (code->inst_end >= 511) { - error("Introducing fake OUT: Too many instructions"); - return GL_FALSE; + rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); + return; } int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } - - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c new file mode 100644 index 0000000000..c7923004df --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" + +#include "radeon_code.h" + +void rc_constants_init(struct rc_constant_list * c) +{ + memset(c, 0, sizeof(*c)); +} + +/** + * Copy a constants structure, assuming that the destination structure + * is not initialized. + */ +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) +{ + dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); + memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); + dst->Count = src->Count; + dst->_Reserved = src->Count; +} + +void rc_constants_destroy(struct rc_constant_list * c) +{ + free(c->Constants); + memset(c, 0, sizeof(*c)); +} + +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) +{ + unsigned index = c->Count; + + if (c->Count >= c->_Reserved) { + struct rc_constant * newlist; + + c->_Reserved = c->_Reserved * 2; + if (!c->_Reserved) + c->_Reserved = 16; + + newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); + memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); + + free(c->Constants); + c->Constants = newlist; + } + + c->Constants[index] = *constant; + c->Count++; + + return index; +} + + +/** + * Add a state vector to the constant list, while trying to avoid duplicates. + */ +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_STATE) { + if (c->Constants[index].u.State[0] == state0 && + c->Constants[index].u.State[1] == state1) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_STATE; + constant.Size = 4; + constant.u.State[0] = state0; + constant.u.State[1] = state1; + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate vector to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + memcpy(constant.u.Immediate, data, sizeof(float) * 4); + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate scalar to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) +{ + unsigned index; + int free_index = -1; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + for(unsigned comp = 0; comp < c->Constants[index].Size; ++comp) { + if (c->Constants[index].u.Immediate[comp] == data) { + *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + return index; + } + } + + if (c->Constants[index].Size < 4) + free_index = index; + } + } + + if (free_index >= 0) { + unsigned comp = c->Constants[free_index].Size++; + c->Constants[free_index].u.Immediate[comp] = data; + *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + return free_index; + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 1; + constant.u.Immediate[0] = data; + *swizzle = SWIZZLE_XXXX; + + return rc_constants_add(c, &constant); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h new file mode 100644 index 0000000000..3e88554ba1 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -0,0 +1,207 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_CODE_H +#define RADEON_CODE_H + +#include <stdint.h> + +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 + +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 + + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) + +enum { + /** + * External constants are constants whose meaning is unknown to this + * compiler. For example, a Mesa gl_program's constants are turned + * into external constants. + */ + RC_CONSTANT_EXTERNAL = 0, + + RC_CONSTANT_IMMEDIATE, + + /** + * Constant referring to state that is known by this compiler, + * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. + */ + RC_CONSTANT_STATE +}; + +enum { + RC_STATE_SHADOW_AMBIENT = 0, + + RC_STATE_R300_WINDOW_DIMENSION, + RC_STATE_R300_TEXRECT_FACTOR +}; + +struct rc_constant { + unsigned Type:2; /**< RC_CONSTANT_xxx */ + unsigned Size:3; + + union { + unsigned External; + float Immediate[4]; + unsigned State[2]; + } u; +}; + +struct rc_constant_list { + struct rc_constant * Constants; + unsigned Count; + + unsigned _Reserved; +}; + +void rc_constants_init(struct rc_constant_list * c); +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); +void rc_constants_destroy(struct rc_constant_list * c); +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + unsigned depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + unsigned texture_compare_func : 3; + } unit[16]; +}; + + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + int length; /**< total # of texture instructions used */ + uint32_t inst[R300_PFS_MAX_TEX_INST]; + } tex; + + struct { + int length; /**< total # of ALU instructions used */ + struct { + uint32_t rgb_inst; + uint32_t rgb_addr; + uint32_t alpha_inst; + uint32_t alpha_addr; + } inst[R300_PFS_MAX_ALU_INST]; + } alu; + + uint32_t config; /* US_CONFIG */ + uint32_t pixsize; /* US_PIXSIZE */ + uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t code_addr[4]; /* US_CODE_ADDR */ +}; + + +struct r500_fragment_program_code { + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } inst[R500_PFS_MAX_INST]; + + int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ + + int max_temp_idx; +}; + +struct rX00_fragment_program_code { + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; + + unsigned writes_depth:1; + + struct rc_constant_list constants; +}; + + +#define VSF_MAX_FRAGMENT_LENGTH (255*4) +#define VSF_MAX_FRAGMENT_TEMPS (14) + +#define VSF_MAX_INPUTS 32 +#define VSF_MAX_OUTPUTS 32 + +struct r300_vertex_program_code { + int length; + union { + uint32_t d[VSF_MAX_FRAGMENT_LENGTH]; + float f[VSF_MAX_FRAGMENT_LENGTH]; + } body; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VSF_MAX_INPUTS]; + int outputs[VSF_MAX_OUTPUTS]; + + struct rc_constant_list constants; + + uint32_t InputsRead; + uint32_t OutputsWritten; +}; + +void r300_vertex_program_dump(struct r300_vertex_program_code * vs); + +#endif /* RADEON_CODE_H */ + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c new file mode 100644 index 0000000000..da950d5289 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -0,0 +1,262 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdarg.h> + +#include "radeon_program.h" + + +void rc_init(struct radeon_compiler * c) +{ + memset(c, 0, sizeof(*c)); + + memory_pool_init(&c->Pool); + c->Program.Instructions.Prev = &c->Program.Instructions; + c->Program.Instructions.Next = &c->Program.Instructions; + c->Program.Instructions.I.Opcode = OPCODE_END; +} + +void rc_destroy(struct radeon_compiler * c) +{ + rc_constants_destroy(&c->Program.Constants); + memory_pool_destroy(&c->Pool); + free(c->ErrorMsg); +} + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + if (!c->Debug) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void rc_error(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + c->Error = GL_TRUE; + + if (!c->ErrorMsg) { + /* Only remember the first error */ + char buf[1024]; + int written; + + va_start(ap, fmt); + written = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (written < sizeof(buf)) { + c->ErrorMsg = strdup(buf); + } else { + c->ErrorMsg = malloc(written + 1); + + va_start(ap, fmt); + vsnprintf(c->ErrorMsg, written + 1, fmt, ap); + va_end(ap); + } + } + + if (c->Debug) { + fprintf(stderr, "r300compiler error: "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } +} + +/** + * Rewrite the program such that everything that source the given input + * register will source new_input instead. + */ +void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input) +{ + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << input); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + unsigned i; + + for(i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) { + inst->I.SrcReg[i].File = new_input.File; + inst->I.SrcReg[i].Index = new_input.Index; + inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle); + if (!inst->I.SrcReg[i].Abs) { + inst->I.SrcReg[i].Negate ^= new_input.Negate; + inst->I.SrcReg[i].Abs = new_input.Abs; + } + + c->Program.InputsRead |= 1 << new_input.Index; + } + } + } +} + + +/** + * Rewrite the program such that everything that writes into the given + * output register will instead write to new_output. The new_output + * writemask is honoured. + */ +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) +{ + struct rc_instruction * inst; + + c->Program.OutputsWritten &= ~(1 << output); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { + inst->I.DstReg.Index = new_output; + inst->I.DstReg.WriteMask &= writemask; + + c->Program.OutputsWritten |= 1 << new_output; + } + } + } +} + + +/** + * Rewrite the program such that a given output is duplicated. + */ +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) +{ + unsigned tempreg = rc_find_free_temporary(c); + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = tempreg; + } + } + } + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = output; + + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = tempreg; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = dup_output; + + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = tempreg; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + c->Program.OutputsWritten |= 1 << dup_output; +} + + +/** + * Introduce standard code fragment to deal with fragment.position. + */ +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input) +{ + unsigned tempregi = rc_find_free_temporary(c); + + c->Program.InputsRead &= ~(1 << wpos); + c->Program.InputsRead |= 1 << new_input; + + /* perspective divide */ + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_rcp->I.Opcode = OPCODE_RCP; + + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = tempregi; + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + + inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT; + inst_rcp->I.SrcReg[0].Index = new_input; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp); + inst_mul->I.Opcode = OPCODE_MUL; + + inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mul->I.DstReg.Index = tempregi; + inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ; + + inst_mul->I.SrcReg[0].File = PROGRAM_INPUT; + inst_mul->I.SrcReg[0].Index = new_input; + + inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mul->I.SrcReg[1].Index = tempregi; + inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + + /* viewport transformation */ + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->I.Opcode = OPCODE_MAD; + + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = tempregi; + inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ; + + inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[0].Index = tempregi; + inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR; + inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR; + inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index; + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + struct rc_instruction * inst; + for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + unsigned i; + + for(i = 0; i < numsrcs; i++) { + if (inst->I.SrcReg[i].File == PROGRAM_INPUT && + inst->I.SrcReg[i].Index == wpos) { + inst->I.SrcReg[i].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[i].Index = tempregi; + } + } + } +} + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h new file mode 100644 index 0000000000..e63ab8840a --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -0,0 +1,108 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_COMPILER_H +#define RADEON_COMPILER_H + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" + +#include "memory_pool.h" +#include "radeon_code.h" + + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + struct prog_instruction I; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; +}; + +struct radeon_compiler { + struct memory_pool Pool; + struct rc_program Program; + unsigned Debug:1; + unsigned Error:1; + char * ErrorMsg; +}; + +void rc_init(struct radeon_compiler * c); +void rc_destroy(struct radeon_compiler * c); + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...); +void rc_error(struct radeon_compiler * c, const char * fmt, ...); + +void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program); + +void rc_calculate_inputs_outputs(struct radeon_compiler * c); + +void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input); +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input); + +struct r300_fragment_program_compiler { + struct radeon_compiler Base; + struct rX00_fragment_program_code *code; + struct r300_fragment_program_external_state state; + unsigned is_r500; + unsigned OutputDepth; + unsigned OutputColor; + + void * UserData; + void (*AllocateHwInputs)( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata); +}; + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + + +struct r300_vertex_program_compiler { + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + GLbitfield RequiredOutputs; + + void * UserData; + void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); + +#endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c index 840c9733b1..aaaa50ad1f 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c @@ -36,6 +36,8 @@ #include "radeon_nqssadce.h" +#include "radeon_compiler.h" + /** * Return the @ref register_state for the given register (or 0 for untracked @@ -76,9 +78,10 @@ struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register s } -static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, - struct prog_instruction *inst, GLint src, GLuint sourced) +static void track_used_srcreg(struct nqssadce_state* s, + GLint src, GLuint sourced) { + struct prog_instruction * inst = &s->IP->I; int i; GLuint deswz_source = 0; @@ -95,12 +98,11 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { struct prog_dst_register dstreg = inst->DstReg; dstreg.File = PROGRAM_TEMPORARY; - dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + dstreg.Index = rc_find_free_temporary(s->Compiler); dstreg.WriteMask = sourced; s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); - inst = s->Program->Instructions + s->IP; inst->SrcReg[src].File = PROGRAM_TEMPORARY; inst->SrcReg[src].Index = dstreg.Index; inst->SrcReg[src].Swizzle = 0; @@ -117,37 +119,36 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, struct register_state *regstate; - if (inst->SrcReg[src].RelAddr) + if (inst->SrcReg[src].RelAddr) { regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); - else + if (regstate) + regstate->Sourced |= WRITEMASK_X; + } else { regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); - - if (regstate) - regstate->Sourced |= deswz_source & 0xf; - - return inst; + if (regstate) + regstate->Sourced |= deswz_source & 0xf; + } } -static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) +static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex) { - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode); int i; for(i = 0; i < nsrc; ++i) - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) - inst->SrcReg[i].Index = newindex; + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex) + inst->I.SrcReg[i].Index = newindex; } static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) { - GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); - int ip; - for(ip = 0; ip < s->IP; ++ip) { - struct prog_instruction* inst = s->Program->Instructions + ip; - if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) - inst->DstReg.Index = newindex; + GLuint newindex = rc_find_free_temporary(s->Compiler); + struct rc_instruction * inst; + for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex) + inst->I.DstReg.Index = newindex; unalias_srcregs(inst, oldindex, newindex); } - unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); + unalias_srcregs(s->IP, oldindex, newindex); } @@ -156,7 +157,8 @@ static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) */ static void process_instruction(struct nqssadce_state* s) { - struct prog_instruction *inst = s->Program->Instructions + s->IP; + struct prog_instruction *inst = &s->IP->I; + GLuint WriteMask; if (inst->Opcode == OPCODE_END) return; @@ -164,7 +166,7 @@ static void process_instruction(struct nqssadce_state* s) if (inst->Opcode != OPCODE_KIL) { struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); if (!regstate) { - _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", + rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n", inst->DstReg.File, inst->DstReg.Index); return; } @@ -173,7 +175,9 @@ static void process_instruction(struct nqssadce_state* s) regstate->Sourced &= ~inst->DstReg.WriteMask; if (inst->DstReg.WriteMask == 0) { - _mesa_delete_instructions(s->Program, s->IP, 1); + struct rc_instruction * inst_remove = s->IP; + s->IP = s->IP->Prev; + rc_remove_instruction(inst_remove); return; } @@ -181,16 +185,15 @@ static void process_instruction(struct nqssadce_state* s) unalias_temporary(s, inst->DstReg.Index); } - /* Attention: Due to swizzle emulation code, the following - * might change the instruction stream under us, so we have - * to be careful with the inst pointer. */ + WriteMask = inst->DstReg.WriteMask; + switch (inst->Opcode) { case OPCODE_ARL: case OPCODE_DDX: case OPCODE_DDY: case OPCODE_FRC: case OPCODE_MOV: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + track_used_srcreg(s, 0, WriteMask); break; case OPCODE_ADD: case OPCODE_MAX: @@ -198,14 +201,14 @@ static void process_instruction(struct nqssadce_state* s) case OPCODE_MUL: case OPCODE_SGE: case OPCODE_SLT: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + track_used_srcreg(s, 0, WriteMask); + track_used_srcreg(s, 1, WriteMask); break; case OPCODE_CMP: case OPCODE_MAD: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); + track_used_srcreg(s, 0, WriteMask); + track_used_srcreg(s, 1, WriteMask); + track_used_srcreg(s, 2, WriteMask); break; case OPCODE_COS: case OPCODE_EX2: @@ -213,83 +216,79 @@ static void process_instruction(struct nqssadce_state* s) case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_SIN: - inst = track_used_srcreg(s, inst, 0, 0x1); + track_used_srcreg(s, 0, 0x1); break; case OPCODE_DP3: - inst = track_used_srcreg(s, inst, 0, 0x7); - inst = track_used_srcreg(s, inst, 1, 0x7); + track_used_srcreg(s, 0, 0x7); + track_used_srcreg(s, 1, 0x7); break; case OPCODE_DP4: - inst = track_used_srcreg(s, inst, 0, 0xf); - inst = track_used_srcreg(s, inst, 1, 0xf); + track_used_srcreg(s, 0, 0xf); + track_used_srcreg(s, 1, 0xf); break; case OPCODE_KIL: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: - inst = track_used_srcreg(s, inst, 0, 0xf); + track_used_srcreg(s, 0, 0xf); break; case OPCODE_DST: - inst = track_used_srcreg(s, inst, 0, 0x6); - inst = track_used_srcreg(s, inst, 1, 0xa); + track_used_srcreg(s, 0, 0x6); + track_used_srcreg(s, 1, 0xa); break; case OPCODE_EXP: case OPCODE_LOG: case OPCODE_POW: - inst = track_used_srcreg(s, inst, 0, 0x3); + track_used_srcreg(s, 0, 0x3); break; case OPCODE_LIT: - inst = track_used_srcreg(s, inst, 0, 0xb); + track_used_srcreg(s, 0, 0xb); break; default: - _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); + rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode); return; } + + s->IP = s->IP->Prev; } -static void calculateInputsOutputs(struct gl_program *p) +void rc_calculate_inputs_outputs(struct radeon_compiler * c) { - struct prog_instruction *inst; - GLuint InputsRead, OutputsWritten; + struct rc_instruction *inst; - inst = p->Instructions; - InputsRead = 0; - OutputsWritten = 0; - while (inst->Opcode != OPCODE_END) + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - int i, num_src_regs; + int i; + int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode); - num_src_regs = _mesa_num_inst_src_regs(inst->Opcode); for (i = 0; i < num_src_regs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT) - InputsRead |= 1 << inst->SrcReg[i].Index; + if (inst->I.SrcReg[i].File == PROGRAM_INPUT) + c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index; } - if (inst->DstReg.File == PROGRAM_OUTPUT) - OutputsWritten |= 1 << inst->DstReg.Index; - - ++inst; + if (_mesa_num_inst_dst_regs(inst->I.Opcode)) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index; + } } - - p->InputsRead = InputsRead; - p->OutputsWritten = OutputsWritten; } -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) +void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data) { struct nqssadce_state s; _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = p; + s.Compiler = c; s.Descr = descr; + s.UserData = data; s.Descr->Init(&s); - s.IP = p->NumInstructions; + s.IP = c->Program.Instructions.Prev; - while(s.IP > 0) { - s.IP--; + while(s.IP != &c->Program.Instructions && !c->Error) process_instruction(&s); - } - calculateInputsOutputs(p); + rc_calculate_inputs_outputs(c); } diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h index 8626f21c25..b3fc77a35a 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h @@ -30,7 +30,6 @@ #include "radeon_program.h" - struct register_state { /** * Bitmask indicating which components of the register are sourced @@ -44,14 +43,13 @@ struct register_state { * read from, etc. */ struct nqssadce_state { - GLcontext *Ctx; - struct gl_program *Program; + struct radeon_compiler *Compiler; struct radeon_nqssadce_descr *Descr; /** * All instructions after this instruction pointer have been dealt with. */ - int IP; + struct rc_instruction * IP; /** * Which registers are read by subsequent instructions? @@ -59,6 +57,8 @@ struct nqssadce_state { struct register_state Temps[MAX_PROGRAM_TEMPS]; struct register_state Outputs[VERT_RESULT_MAX]; struct register_state Address; + + void * UserData; }; @@ -83,11 +83,9 @@ struct radeon_nqssadce_descr { * The transformation will work recursively on the emitted instruction(s). */ void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - - void *Data; }; -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); +void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data); struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); #endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c new file mode 100644 index 0000000000..b636f90a96 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include "radeon_compiler.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void radeonLocalTransform( + struct radeon_compiler * c, + int num_transformations, + struct radeon_program_transformation* transformations) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + + while(inst != &c->Program.Instructions) { + struct rc_instruction * current = inst; + int i; + + inst = inst->Next; + + for(i = 0; i < num_transformations; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(c, current, t->userData)) + break; + } + } +} + + +GLint rc_find_free_temporary(struct radeon_compiler * c) +{ + GLboolean used[MAX_PROGRAM_TEMPS]; + GLuint i; + + memset(used, 0, sizeof(used)); + + for (struct rc_instruction * rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { + const struct prog_instruction *inst = &rcinst->I; + const GLuint nsrc = _mesa_num_inst_src_regs(inst->Opcode); + const GLuint ndst = _mesa_num_inst_dst_regs(inst->Opcode); + GLuint k; + + for (k = 0; k < nsrc; k++) { + if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) + used[inst->SrcReg[k].Index] = GL_TRUE; + } + + if (ndst) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) + used[inst->DstReg.Index] = GL_TRUE; + } + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!used[i]) + return i; + } + + return -1; +} + + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) +{ + struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); + + inst->Prev = 0; + inst->Next = 0; + + _mesa_init_instructions(&inst->I, 1); + + return inst; +} + + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ + struct rc_instruction * inst = rc_alloc_instruction(c); + + inst->Prev = after; + inst->Next = after->Next; + + inst->Prev->Next = inst; + inst->Next->Prev = inst; + + return inst; +} + +void rc_remove_instruction(struct rc_instruction * inst) +{ + inst->Prev->Next = inst->Next; + inst->Next->Prev = inst->Prev; +} + + +void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program) +{ + struct prog_instruction *source; + unsigned int i; + + for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) { + struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + dest->I = *source; + } + + c->Program.ShadowSamplers = program->ShadowSamplers; + c->Program.InputsRead = program->InputsRead; + c->Program.OutputsWritten = program->OutputsWritten; + + int isNVProgram = 0; + + if (program->Target == GL_VERTEX_PROGRAM_ARB) { + struct gl_vertex_program * vp = (struct gl_vertex_program *) program; + isNVProgram = vp->IsNVProgram; + } + + if (isNVProgram) { + /* NV_vertex_program has a fixed-sized constant environment. + * This could be handled more efficiently for programs that + * do not use relative addressing. + */ + for(i = 0; i < 96; ++i) { + struct rc_constant constant; + + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + + rc_constants_add(&c->Program.Constants, &constant); + } + } else { + for(i = 0; i < program->Parameters->NumParameters; ++i) { + struct rc_constant constant; + + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + + rc_constants_add(&c->Program.Constants, &constant); + } + } +} + + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog) +{ + GLuint indent = 0; + GLuint linenum = 1; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program\n"); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + /* Massive hack: We rely on the fact that the printers do not actually + * use the gl_program argument (last argument) in debug mode */ + indent = _mesa_fprint_instruction_opt( + stderr, &inst->I, + indent, PROG_PRINT_DEBUG, 0); + + linenum++; + } +} diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index 88474d43a2..561958608c 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -34,12 +34,9 @@ #include "shader/program.h" #include "shader/prog_instruction.h" - -enum { - CLAUSE_MIXED = 0, - CLAUSE_ALU, - CLAUSE_TEX -}; +struct radeon_compiler; +struct rc_instruction; +struct rc_program; enum { PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ @@ -83,19 +80,12 @@ static inline GLuint combine_swizzles(GLuint src, GLuint swz) return ret; } +static INLINE void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} -/** - * Transformation context that is passed to local transformations. - * - * Care must be taken with some operations during transformation, - * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary - */ -struct radeon_transform_context { - GLcontext *Ctx; - struct gl_program *Program; - struct prog_instruction *OldInstructions; - GLuint OldNumInstructions; -}; /** * A transformation that can be passed to \ref radeonLocalTransform. @@ -109,23 +99,23 @@ struct radeon_transform_context { */ struct radeon_program_transformation { GLboolean (*function)( - struct radeon_transform_context*, - struct prog_instruction*, + struct radeon_compiler*, + struct rc_instruction*, void*); void *userData; }; void radeonLocalTransform( - GLcontext* ctx, - struct gl_program *program, + struct radeon_compiler *c, int num_transformations, struct radeon_program_transformation* transformations); -/** - * Find a usable free temporary register during program transformation - */ -GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx); +GLint rc_find_free_temporary(struct radeon_compiler * c); + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_remove_instruction(struct rc_instruction * inst); -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count); +void rc_print_program(const struct rc_program *prog); #endif diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 8283723bad..f23ce301ca 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -35,49 +35,52 @@ #include "radeon_program_alu.h" -#include "shader/prog_parameter.h" +#include "radeon_compiler.h" -static struct prog_instruction *emit1(struct gl_program* p, +static struct rc_instruction *emit1( + struct radeon_compiler * c, struct rc_instruction * after, gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, struct prog_src_register SrcReg) { - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg; + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg; return fpi; } -static struct prog_instruction *emit2(struct gl_program* p, +static struct rc_instruction *emit2( + struct radeon_compiler * c, struct rc_instruction * after, gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) { - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg0; + fpi->I.SrcReg[1] = SrcReg1; return fpi; } -static struct prog_instruction *emit3(struct gl_program* p, +static struct rc_instruction *emit3( + struct radeon_compiler * c, struct rc_instruction * after, gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, struct prog_src_register SrcReg2) { - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; - fpi->SrcReg[2] = SrcReg2; + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg0; + fpi->I.SrcReg[1] = SrcReg1; + fpi->I.SrcReg[2] = SrcReg2; return fpi; } @@ -88,6 +91,7 @@ static struct prog_dst_register dstreg(int file, int index) dst.Index = index; dst.WriteMask = WRITEMASK_XYZW; dst.CondMask = COND_TR; + dst.RelAddr = 0; dst.CondSwizzle = SWIZZLE_NOOP; dst.CondSrc = 0; dst.pad = 0; @@ -96,10 +100,11 @@ static struct prog_dst_register dstreg(int file, int index) static struct prog_dst_register dstregtmpmask(int index, int mask) { - struct prog_dst_register dst; + struct prog_dst_register dst = {0}; dst.File = PROGRAM_TEMPORARY; dst.Index = index; dst.WriteMask = mask; + dst.RelAddr = 0; dst.CondMask = COND_TR; dst.CondSwizzle = SWIZZLE_NOOP; dst.CondSrc = 0; @@ -171,44 +176,63 @@ static struct prog_src_register scalar(struct prog_src_register reg) return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); } -static void transform_ABS(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) { - struct prog_src_register src = inst->SrcReg[0]; + struct prog_src_register src = inst->I.SrcReg[0]; src.Abs = 1; src.Negate = NEGATE_NONE; - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); + emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src); + rc_remove_instruction(inst); } -static void transform_DPH(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) { - struct prog_src_register src0 = inst->SrcReg[0]; + struct prog_src_register src0 = inst->I.SrcReg[0]; + struct prog_src_register src1 = inst->I.SrcReg[1]; + src0.Negate &= ~NEGATE_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~NEGATE_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_DPH(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct prog_src_register src0 = inst->I.SrcReg[0]; src0.Negate &= ~NEGATE_W; src0.Swizzle &= ~(7 << (3 * 3)); src0.Swizzle |= SWIZZLE_ONE << (3 * 3); - emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); + emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]); + rc_remove_instruction(inst); } /** * [1, src0.y*src1.y, src0.z, src1.w] * So basically MUL with lotsa swizzling. */ -static void transform_DST(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_DST(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), - swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); + emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg, + swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), + swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); + rc_remove_instruction(inst); } -static void transform_FLR(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_FLR(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); - emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + int tempreg = rc_find_free_temporary(c); + emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]); + emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg, + inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + rc_remove_instruction(inst); } /** @@ -229,152 +253,159 @@ static void transform_FLR(struct radeon_transform_context* t, * 5 slots, if the subsequent optimization passes are clever enough * to pair instructions correctly. */ -static void transform_LIT(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) { - static const GLfloat LitConst[4] = { -127.999999 }; - GLuint constant; GLuint constant_swizzle; GLuint temp; - int needTemporary = 0; struct prog_src_register srctemp; - constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); - if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { - needTemporary = 1; - } else if (inst->DstReg.File != PROGRAM_TEMPORARY) { - // LIT is typically followed by DP3/DP4, so there's no point - // in creating special code for this case - needTemporary = 1; - } + if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) { + struct rc_instruction * inst_mov; - if (needTemporary) { - temp = radeonFindFreeTemporary(t); - } else { - temp = inst->DstReg.Index; + inst_mov = emit1(c, inst, + OPCODE_MOV, 0, inst->I.DstReg, + srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c))); + + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; } + + temp = inst->I.DstReg.Index; srctemp = srcreg(PROGRAM_TEMPORARY, temp); // tmp.x = max(0.0, Src.x); // tmp.y = max(0.0, Src.y); // tmp.w = clamp(Src.z, -128+eps, 128-eps); - emit2(t->Program, OPCODE_MAX, 0, + emit2(c, inst->Prev, OPCODE_MAX, 0, dstregtmpmask(temp, WRITEMASK_XYW), - inst->SrcReg[0], + inst->I.SrcReg[0], swizzle(srcreg(PROGRAM_CONSTANT, constant), SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); - emit2(t->Program, OPCODE_MIN, 0, + emit2(c, inst->Prev, OPCODE_MIN, 0, dstregtmpmask(temp, WRITEMASK_Z), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); // tmp.w = Pow(tmp.y, tmp.w) - emit1(t->Program, OPCODE_LG2, 0, + emit1(c, inst->Prev, OPCODE_LG2, 0, dstregtmpmask(temp, WRITEMASK_W), swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit2(t->Program, OPCODE_MUL, 0, + emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); - emit1(t->Program, OPCODE_EX2, 0, + emit1(c, inst->Prev, OPCODE_EX2, 0, dstregtmpmask(temp, WRITEMASK_W), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, dstregtmpmask(temp, WRITEMASK_Z), negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), builtin_zero); // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, + emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, dstregtmpmask(temp, WRITEMASK_XYW), swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); - if (needTemporary) - emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp); + rc_remove_instruction(inst); } -static void transform_LRP(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_LRP(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_ADD, 0, + emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - inst->SrcReg[1], negate(inst->SrcReg[2])); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, - inst->DstReg, - inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]); + inst->I.SrcReg[1], negate(inst->I.SrcReg[2])); + emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, + inst->I.DstReg, + inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]); + + rc_remove_instruction(inst); } -static void transform_POW(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_POW(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); tempdst.WriteMask = WRITEMASK_W; tempsrc.Swizzle = SWIZZLE_WWWW; - emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0])); - emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1])); - emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc); + emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0])); + emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1])); + emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc); + + rc_remove_instruction(inst); } -static void transform_RSQ(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_RSQ(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0])); + inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]); } -static void transform_SGE(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SGE(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); + + rc_remove_instruction(inst); } -static void transform_SLT(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SLT(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); + + rc_remove_instruction(inst); } -static void transform_SUB(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SUB(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); + inst->I.Opcode = OPCODE_ADD; + inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]); } -static void transform_SWZ(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SWZ(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]); + inst->I.Opcode = OPCODE_MOV; } -static void transform_XPD(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_XPD(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), + swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); + emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg, + swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), + swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + + rc_remove_instruction(inst); } @@ -392,31 +423,64 @@ static void transform_XPD(struct radeon_transform_context* t, * * @note should be applicable to R300 and R500 fragment programs. */ -GLboolean radeonTransformALU(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction* inst, void* unused) { - switch(inst->Opcode) { - case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; - case OPCODE_DST: transform_DST(t, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; - case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; - case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; - case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE; - case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; - case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; + switch(inst->I.Opcode) { + case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; + case OPCODE_DST: transform_DST(c, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; + case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; + case OPCODE_POW: transform_POW(c, inst); return GL_TRUE; + case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE; + case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE; + case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; default: return GL_FALSE; } } -static void sincos_constants(struct radeon_transform_context* t, GLuint *constants) +static void transform_r300_vertex_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Note: r500 can take absolute values, but r300 cannot. */ + inst->I.Opcode = OPCODE_MAX; + inst->I.SrcReg[1] = inst->I.SrcReg[0]; + inst->I.SrcReg[1].Negate ^= NEGATE_XYZW; +} + +/** + * For use with radeonLocalTransform, this transforms non-native ALU + * instructions of the r300 up to r500 vertex engine. + */ +GLboolean r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->I.Opcode) { + case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE; + case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; + default: + return GL_FALSE; + } +} + +static void sincos_constants(struct radeon_compiler* c, GLuint *constants) { static const GLfloat SinCosConsts[2][4] = { { @@ -434,11 +498,8 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan }; int i; - for(i = 0; i < 2; ++i) { - GLuint swz; - constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); - ASSERT(swz == SWIZZLE_NOOP); - } + for(i = 0; i < 2; ++i) + constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); } /** @@ -449,23 +510,24 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x * MAD dest, tmp.y, weight, tmp.x */ -static void sin_approx(struct radeon_transform_context* t, +static void sin_approx( + struct radeon_compiler* c, struct rc_instruction * before, struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) { - GLuint tempreg = radeonFindFreeTemporary(t); + GLuint tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + emit2(c, before->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), srcreg(PROGRAM_CONSTANT, constants[0])); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), + emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), + emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); - emit3(t->Program, OPCODE_MAD, 0, dst, + emit3(c, before->Prev, OPCODE_MAD, 0, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); @@ -476,78 +538,80 @@ static void sin_approx(struct radeon_transform_context* t, * using only the basic instructions * MOV, ADD, MUL, MAD, FRC */ -GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformTrigSimple(struct radeon_compiler* c, + struct rc_instruction* inst, void* unused) { - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) + if (inst->I.Opcode != OPCODE_COS && + inst->I.Opcode != OPCODE_SIN && + inst->I.Opcode != OPCODE_SCS) return GL_FALSE; GLuint constants[2]; - GLuint tempreg = radeonFindFreeTemporary(t); + GLuint tempreg = rc_find_free_temporary(c); - sincos_constants(t, constants); + sincos_constants(c, constants); - if (inst->Opcode == OPCODE_COS) { + if (inst->I.Opcode == OPCODE_COS) { // MAD tmp.x, src, 1/(2*PI), 0.75 // FRC tmp.x, tmp.x // MAD tmp.z, tmp.x, 2*PI, -PI - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - sin_approx(t, inst->DstReg, + sin_approx(c, inst, inst->I.DstReg, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), constants); - } else if (inst->Opcode == OPCODE_SIN) { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + } else if (inst->I.Opcode == OPCODE_SIN) { + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - sin_approx(t, inst->DstReg, + sin_approx(c, inst, inst->I.DstReg, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), constants); } else { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), srcreg(PROGRAM_TEMPORARY, tempreg)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), srcreg(PROGRAM_TEMPORARY, tempreg), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - struct prog_dst_register dst = inst->DstReg; + struct prog_dst_register dst = inst->I.DstReg; - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; - sin_approx(t, dst, + dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X; + sin_approx(c, inst, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), constants); - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; - sin_approx(t, dst, + dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y; + sin_approx(c, inst, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), constants); } + rc_remove_instruction(inst); + return GL_TRUE; } @@ -560,50 +624,52 @@ GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, * * @warning This transformation implicitly changes the semantics of SIN and COS! */ -GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformTrigScale(struct radeon_compiler* c, + struct rc_instruction* inst, void* unused) { - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) + if (inst->I.Opcode != OPCODE_COS && + inst->I.Opcode != OPCODE_SIN && + inst->I.Opcode != OPCODE_SCS) return GL_FALSE; - static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; + static const GLfloat RCP_2PI = 0.15915494309189535; GLuint temp; GLuint constant; GLuint constant_swizzle; - temp = radeonFindFreeTemporary(t); - constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), srcreg(PROGRAM_TEMPORARY, temp)); - if (inst->Opcode == OPCODE_COS) { - emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg, + if (inst->I.Opcode == OPCODE_COS) { + emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SIN) { - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, - inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SCS) { - struct prog_dst_register moddst = inst->DstReg; + } else if (inst->I.Opcode == OPCODE_SIN) { + emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, + inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->I.Opcode == OPCODE_SCS) { + struct prog_dst_register moddst = inst->I.DstReg; - if (inst->DstReg.WriteMask & WRITEMASK_X) { + if (inst->I.DstReg.WriteMask & WRITEMASK_X) { moddst.WriteMask = WRITEMASK_X; - emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst, + emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); } - if (inst->DstReg.WriteMask & WRITEMASK_Y) { + if (inst->I.DstReg.WriteMask & WRITEMASK_Y) { moddst.WriteMask = WRITEMASK_Y; - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst, + emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); } } + rc_remove_instruction(inst); + return GL_TRUE; } @@ -615,21 +681,15 @@ GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, * @warning This explicitly changes the form of DDX and DDY! */ -GLboolean radeonTransformDeriv(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformDeriv(struct radeon_compiler* c, + struct rc_instruction* inst, void* unused) { - if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY) + if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY) return GL_FALSE; - struct prog_src_register B = inst->SrcReg[1]; - - B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, - SWIZZLE_ONE, SWIZZLE_ONE); - B.Negate = NEGATE_XYZW; - - emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], B); + inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); + inst->I.SrcReg[1].Negate = NEGATE_XYZW; return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h index b45958115c..147efec6fc 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h @@ -31,23 +31,28 @@ #include "radeon_program.h" GLboolean radeonTransformALU( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +GLboolean r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction * inst, void*); GLboolean radeonTransformTrigSimple( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, void*); GLboolean radeonTransformTrigScale( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, void*); GLboolean radeonTransformDeriv( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, void*); #endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index d6fb474cf2..4c26db5d24 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -35,17 +35,19 @@ #include "radeon_program_pair.h" -#include "radeon_common.h" - +#include "memory_pool.h" +#include "radeon_compiler.h" #include "shader/prog_print.h" #define error(fmt, args...) do { \ - _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \ + rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \ __FILE__, __FUNCTION__, ##args); \ - s->Error = GL_TRUE; \ } while(0) struct pair_state_instruction { + struct prog_instruction Instruction; + GLuint IP; /**< Position of this instruction in original program */ + GLuint IsTex:1; /**< Is a texture instruction */ GLuint NeedRGB:1; /**< Needs the RGB ALU */ GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ @@ -73,7 +75,7 @@ struct pair_state_instruction { * Used to keep track of which instructions read a value. */ struct reg_value_reader { - GLuint IP; /**< IP of the instruction that performs this access */ + struct pair_state_instruction *Reader; struct reg_value_reader *Next; }; @@ -82,7 +84,7 @@ struct reg_value_reader { * PROGRAM_TEMPORARY. */ struct reg_value { - GLuint IP; /**< IP of the instruction that writes this value */ + struct pair_state_instruction *Writer; struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ /** @@ -116,11 +118,8 @@ struct pair_register_translation { }; struct pair_state { - GLcontext *Ctx; - struct gl_program *Program; + struct r300_fragment_program_compiler * Compiler; const struct radeon_pair_handler *Handler; - GLboolean Error; - GLboolean Debug; GLboolean Verbose; void *UserData; @@ -130,11 +129,6 @@ struct pair_state { struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; - /** - * Derived information about program instructions. - */ - struct pair_state_instruction *Instructions; - struct { GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ } HwTemps[128]; @@ -147,14 +141,6 @@ struct pair_state { struct pair_state_instruction *ReadyRGB; struct pair_state_instruction *ReadyAlpha; struct pair_state_instruction *ReadyTEX; - - /** - * Pool of @ref reg_value structures for fast allocation. - */ - struct reg_value *ValuePool; - GLuint ValuePoolUsed; - struct reg_value_reader *ReaderPool; - GLuint ReaderPoolUsed; }; @@ -183,7 +169,7 @@ static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) struct pair_register_translation *t = get_register(s, file, index); if (!t) { - _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index); + error("get_hw_reg: %i[%i]\n", file, index); return 0; } @@ -221,15 +207,13 @@ static void add_pairinst_to_list(struct pair_state_instruction **list, struct pa } /** - * The instruction at the given IP has become ready. Link it into the ready + * The given instruction has become ready. Link it into the ready * instructions. */ -static void instruction_ready(struct pair_state *s, int ip) +static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; - if (s->Verbose) - _mesa_printf("instruction_ready(%i)\n", ip); + _mesa_printf("instruction_ready(%i)\n", pairinst->IP); if (pairinst->IsTex) add_pairinst_to_list(&s->ReadyTEX, pairinst); @@ -296,12 +280,12 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) * Classify an instruction according to which ALUs etc. it needs */ static void classify_instruction(struct pair_state *s, - struct prog_instruction *inst, struct pair_state_instruction *pairinst) + struct pair_state_instruction *psi) { - pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; - pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; + psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; + psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; - switch(inst->Opcode) { + switch(psi->Instruction.Opcode) { case OPCODE_ADD: case OPCODE_CMP: case OPCODE_DDX: @@ -319,24 +303,24 @@ static void classify_instruction(struct pair_state *s, case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_SIN: - pairinst->IsTranscendent = 1; - pairinst->NeedAlpha = 1; + psi->IsTranscendent = 1; + psi->NeedAlpha = 1; break; case OPCODE_DP4: - pairinst->NeedAlpha = 1; + psi->NeedAlpha = 1; /* fall through */ case OPCODE_DP3: - pairinst->NeedRGB = 1; + psi->NeedRGB = 1; break; case OPCODE_KIL: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: case OPCODE_END: - pairinst->IsTex = 1; + psi->IsTex = 1; break; default: - error("Unknown opcode %d\n", inst->Opcode); + error("Unknown opcode %d\n", psi->Instruction.Opcode); break; } } @@ -348,30 +332,34 @@ static void classify_instruction(struct pair_state *s, */ static void scan_instructions(struct pair_state *s) { - struct prog_instruction *inst; - struct pair_state_instruction *pairinst; + struct rc_instruction *source; GLuint ip; - for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; - inst->Opcode != OPCODE_END; - ++inst, ++pairinst, ++ip) { - final_rewrite(s, inst); - classify_instruction(s, inst, pairinst); + for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0; + source != &s->Compiler->Base.Program.Instructions; + source = source->Next, ++ip) { + struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst)); + memset(pairinst, 0, sizeof(struct pair_state_instruction)); + + pairinst->Instruction = source->I; + pairinst->IP = ip; + final_rewrite(s, &pairinst->Instruction); + classify_instruction(s, pairinst); - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode); int j; for(j = 0; j < nsrc; j++) { struct pair_register_translation *t = - get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); + get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index); if (!t) continue; t->RefCount++; - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) { int i; for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); + GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i); if (swz >= 4) continue; /* constant or NIL swizzle */ if (!t->Value[swz]) @@ -381,36 +369,37 @@ static void scan_instructions(struct pair_state *s) * also rewrites the value. The code below adds * a dependency for the DstReg, which is a superset * of the SrcReg dependency. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == inst->SrcReg[j].Index && - GET_BIT(inst->DstReg.WriteMask, swz)) + if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY && + pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index && + GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz)) continue; - struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; + struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r)); pairinst->NumDependencies++; t->Value[swz]->NumReaders++; - r->IP = ip; + r->Reader = pairinst; r->Next = t->Value[swz]->Readers; t->Value[swz]->Readers = r; } } } - int ndst = _mesa_num_inst_dst_regs(inst->Opcode); + int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode); if (ndst) { struct pair_register_translation *t = - get_register(s, inst->DstReg.File, inst->DstReg.Index); + get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index); if (t) { t->RefCount++; - if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) { int j; for(j = 0; j < 4; ++j) { - if (!GET_BIT(inst->DstReg.WriteMask, j)) + if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j)) continue; - struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; - v->IP = ip; + struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v)); + memset(v, 0, sizeof(struct reg_value)); + v->Writer = pairinst; if (t->Value[j]) { pairinst->NumDependencies++; t->Value[j]->Next = v; @@ -426,7 +415,7 @@ static void scan_instructions(struct pair_state *s) _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); if (!pairinst->NumDependencies) - instruction_ready(s, ip); + instruction_ready(s, pairinst); } /* Clear the PROGRAM_TEMPORARY state */ @@ -438,70 +427,23 @@ static void scan_instructions(struct pair_state *s) } -/** - * Reserve hardware temporary registers for the program inputs. - * - * @note This allocation is performed explicitly, because the order of inputs - * is determined by the RS hardware. - */ -static void allocate_input_registers(struct pair_state *s) -{ - GLuint InputsRead = s->Program->InputsRead; - int i; - GLuint hwindex = 0; - - /* Primary colour */ - if (InputsRead & FRAG_BIT_COL0) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); - InputsRead &= ~FRAG_BIT_COL1; - - /* Texcoords */ - for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* Fogcoords treated as a texcoord */ - if (InputsRead & FRAG_BIT_FOGC) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++); - InputsRead &= ~FRAG_BIT_FOGC; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); - InputsRead &= ~FRAG_BIT_WPOS; - - /* Anything else */ - if (InputsRead) - error("Don't know how to handle inputs 0x%x\n", InputsRead); -} - - -static void decrement_dependencies(struct pair_state *s, int ip) +static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; ASSERT(pairinst->NumDependencies > 0); if (!--pairinst->NumDependencies) - instruction_ready(s, ip); + instruction_ready(s, pairinst); } /** * Update the dependency tracking state based on what the instruction * at the given IP does. */ -static void commit_instruction(struct pair_state *s, int ip) +static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst) { - struct prog_instruction *inst = s->Program->Instructions + ip; - struct pair_state_instruction *pairinst = s->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; if (s->Verbose) - _mesa_printf("commit_instruction(%i)\n", ip); + _mesa_printf("commit_instruction(%i)\n", pairinst->IP); if (inst->DstReg.File == PROGRAM_TEMPORARY) { struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; @@ -516,11 +458,11 @@ static void commit_instruction(struct pair_state *s, int ip) if (t->Value[i]->NumReaders) { struct reg_value_reader *r; for(r = pairinst->Values[i]->Readers; r; r = r->Next) - decrement_dependencies(s, r->IP); + decrement_dependencies(s, r->Reader); } else if (t->Value[i]->Next) { /* This happens when the only reader writes * the register at the same time */ - decrement_dependencies(s, t->Value[i]->Next->IP); + decrement_dependencies(s, t->Value[i]->Next->Writer); } } } @@ -554,7 +496,7 @@ static void commit_instruction(struct pair_state *s, int ip) if (!--t->Value[swz]->NumReaders) { if (t->Value[swz]->Next) - decrement_dependencies(s, t->Value[swz]->Next->IP); + decrement_dependencies(s, t->Value[swz]->Next->Writer); } } } @@ -585,36 +527,52 @@ static void emit_all_tex(struct pair_state *s) // Allocate destination hardware registers in one block to avoid conflicts. for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; if (inst->Opcode != OPCODE_KIL) get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); } - if (s->Debug) + if (s->Compiler->Base.Debug) _mesa_printf(" BEGIN_TEX\n"); if (s->Handler->BeginTexBlock) - s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData); + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData); for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; - commit_instruction(s, ip); + struct prog_instruction *inst = &pairinst->Instruction; + commit_instruction(s, pairinst); if (inst->Opcode != OPCODE_KIL) inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); - if (s->Debug) { + if (s->Compiler->Base.Debug) { _mesa_printf(" "); _mesa_print_instruction(inst); - fflush(stdout); + fflush(stderr); + } + + struct radeon_pair_texture_instruction rpti; + + switch(inst->Opcode) { + case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break; + case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break; + case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break; + default: + case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break; } - s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); + + rpti.DestIndex = inst->DstReg.Index; + rpti.WriteMask = inst->DstReg.WriteMask; + rpti.TexSrcUnit = inst->TexSrcUnit; + rpti.TexSrcTarget = inst->TexSrcTarget; + rpti.SrcIndex = inst->SrcReg[0].Index; + rpti.SrcSwizzle = inst->SrcReg[0].Swizzle; + + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti); } - if (s->Debug) + if (s->Compiler->Base.Debug) _mesa_printf(" END_TEX\n"); } @@ -637,7 +595,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio index = get_hw_reg(s, src.File, src.Index); } else { constant = 1; - s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index); + index = src.Index; } for(i = 0; i < 3; ++i) { @@ -684,10 +642,12 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio * Fill the given ALU instruction's opcodes and source operands into the given pair, * if possible. */ -static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +static GLboolean fill_instruction_into_pair( + struct pair_state *s, + struct radeon_pair_instruction *pair, + struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); @@ -768,16 +728,18 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ * we are absolutely certain that we're going to emit a certain * instruction pairing. */ -static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +static void fill_dest_into_pair( + struct pair_state *s, + struct radeon_pair_instruction *pair, + struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; if (inst->DstReg.File == PROGRAM_OUTPUT) { - if (inst->DstReg.Index == FRAG_RESULT_COLOR) { + if (inst->DstReg.Index == s->Compiler->OutputColor) { pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { + } else if (inst->DstReg.Index == s->Compiler->OutputDepth) { pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); } } else { @@ -804,24 +766,24 @@ static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruc static void emit_alu(struct pair_state *s) { struct radeon_pair_instruction pair; + struct pair_state_instruction *psi; if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - int ip; if (s->ReadyFullALU) { - ip = s->ReadyFullALU - s->Instructions; + psi = s->ReadyFullALU; s->ReadyFullALU = s->ReadyFullALU->NextReady; } else if (s->ReadyRGB) { - ip = s->ReadyRGB - s->Instructions; + psi = s->ReadyRGB; s->ReadyRGB = s->ReadyRGB->NextReady; } else { - ip = s->ReadyAlpha - s->Instructions; + psi = s->ReadyAlpha; s->ReadyAlpha = s->ReadyAlpha->NextReady; } _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); + fill_instruction_into_pair(s, &pair, psi); + fill_dest_into_pair(s, &pair, psi); + commit_instruction(s, psi); } else { struct pair_state_instruction **prgb; struct pair_state_instruction **palpha; @@ -830,65 +792,65 @@ static void emit_alu(struct pair_state *s) * many source slots; try all possible pairings if necessary */ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { - int rgbip = *prgb - s->Instructions; - int alphaip = *palpha - s->Instructions; + struct pair_state_instruction * psirgb = *prgb; + struct pair_state_instruction * psialpha = *palpha; _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, rgbip); - if (!fill_instruction_into_pair(s, &pair, alphaip)) + fill_instruction_into_pair(s, &pair, psirgb); + if (!fill_instruction_into_pair(s, &pair, psialpha)) continue; *prgb = (*prgb)->NextReady; *palpha = (*palpha)->NextReady; - fill_dest_into_pair(s, &pair, rgbip); - fill_dest_into_pair(s, &pair, alphaip); - commit_instruction(s, rgbip); - commit_instruction(s, alphaip); + fill_dest_into_pair(s, &pair, psirgb); + fill_dest_into_pair(s, &pair, psialpha); + commit_instruction(s, psirgb); + commit_instruction(s, psialpha); goto success; } } /* No success in pairing; just take the first RGB instruction */ - int ip = s->ReadyRGB - s->Instructions; + psi = s->ReadyRGB; s->ReadyRGB = s->ReadyRGB->NextReady; + _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); + fill_instruction_into_pair(s, &pair, psi); + fill_dest_into_pair(s, &pair, psi); + commit_instruction(s, psi); success: ; } - if (s->Debug) + if (s->Compiler->Base.Debug) radeonPrintPairInstruction(&pair); - s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair); } +/* Callback function for assigning input registers to hardware registers */ +static void alloc_helper(void * data, unsigned input, unsigned hwreg) +{ + struct pair_state * s = data; + alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg); +} -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, +void radeonPairProgram( + struct r300_fragment_program_compiler * compiler, const struct radeon_pair_handler* handler, void *userdata) { struct pair_state s; _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = _mesa_clone_program(ctx, program); + s.Compiler = compiler; s.Handler = handler; s.UserData = userdata; - s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; - s.Verbose = GL_FALSE && s.Debug; + s.Verbose = GL_FALSE && s.Compiler->Base.Debug; - s.Instructions = (struct pair_state_instruction*)_mesa_calloc( - sizeof(struct pair_state_instruction)*s.Program->NumInstructions); - s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4); - s.ReaderPool = (struct reg_value_reader*)_mesa_calloc( - sizeof(struct reg_value_reader)*s.Program->NumInstructions*12); - - if (s.Debug) + if (s.Compiler->Base.Debug) _mesa_printf("Emit paired program\n"); scan_instructions(&s); - allocate_input_registers(&s); + s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s); - while(!s.Error && + while(!s.Compiler->Base.Error && (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { if (s.ReadyTEX) emit_all_tex(&s); @@ -897,16 +859,8 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, emit_alu(&s); } - if (s.Debug) + if (s.Compiler->Base.Debug) _mesa_printf(" END\n"); - - _mesa_free(s.Instructions); - _mesa_free(s.ValuePool); - _mesa_free(s.ReaderPool); - - _mesa_reference_program(ctx, &s.Program, NULL); - - return !s.Error; } diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 4624a24629..ff76178551 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -30,6 +30,8 @@ #include "radeon_program.h" +struct r300_fragment_program_compiler; + /** * Represents a paired instruction, as found in R300 and R500 @@ -82,18 +84,32 @@ struct radeon_pair_instruction { }; +enum { + RADEON_OPCODE_TEX = 0, + RADEON_OPCODE_TXB, + RADEON_OPCODE_TXP, + RADEON_OPCODE_KIL +}; + +struct radeon_pair_texture_instruction { + GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */ + + GLuint DestIndex:8; + GLuint WriteMask:4; + + GLuint TexSrcUnit:5; + GLuint TexSrcTarget:3; + + GLuint SrcIndex:8; + GLuint SrcSwizzle:12; +}; + + /** * */ struct radeon_pair_handler { /** - * Fill in the proper hardware index for the given constant register. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex); - - /** * Write a paired instruction to the hardware. * * @return GL_FALSE on error. @@ -107,7 +123,7 @@ struct radeon_pair_handler { * * @return GL_FALSE on error. */ - GLboolean (*EmitTex)(void*, struct prog_instruction*); + GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*); /** * Called before a block of contiguous, independent texture @@ -115,10 +131,11 @@ struct radeon_pair_handler { */ GLboolean (*BeginTexBlock)(void*); - GLuint MaxHwTemps; + unsigned MaxHwTemps; }; -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, +void radeonPairProgram( + struct r300_fragment_program_compiler * compiler, const struct radeon_pair_handler*, void *userdata); void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index af535037d0..da5b7ba642 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -54,6 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_mipmap_tree.h" #include "r300_state.h" #include "radeon_reg.h" +#include "radeon_queryobj.h" /** # of dwords reserved for additional instructions that may need to be written * during flushing. @@ -73,48 +74,40 @@ static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) +int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int cnt; + int extra = 1; + cnt = vpu_count(atom->cmd); + + if (r300->radeon.radeonScreen->kernel_mm) { + extra = 5; + } + + return cnt ? (cnt * 4) + extra : 0; +} + + void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); drm_r300_cmd_header_t cmd; - uint32_t addr, ndw, i; - - if (!r300->radeon.radeonScreen->kernel_mm) { - uint32_t dwords; - dwords = (*atom->check) (ctx, atom); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(atom->cmd, dwords); - END_BATCH(); - return; - } + uint32_t addr, ndw; cmd.u = atom->cmd[0]; addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; - ndw = cmd.vpu.count * 4; - if (ndw) { + ndw = atom->check(ctx, atom); - if (r300->vap_flush_needed) { - BEGIN_BATCH_NO_AUTOSTATE(15 + ndw); + BEGIN_BATCH_NO_AUTOSTATE(ndw); - /* flush processing vertices */ - OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0); - OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); - OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff); - OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); - r300->vap_flush_needed = GL_FALSE; - } else { - BEGIN_BATCH_NO_AUTOSTATE(5 + ndw); - } - OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); - OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); - for (i = 0; i < ndw; i++) { - OUT_BATCH(atom->cmd[i+1]); - } - OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); - END_BATCH(); - } + ndw -= 5; + OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); + OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(&atom->cmd[1], ndw); + OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + END_BATCH(); } void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) @@ -122,17 +115,10 @@ void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); drm_r300_cmd_header_t cmd; - uint32_t addr, ndw, i, sz; - int type, clamp, stride; + uint32_t addr, ndw, sz; + int type, clamp; - if (!r300->radeon.radeonScreen->kernel_mm) { - uint32_t dwords; - dwords = (*atom->check) (ctx, atom); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(atom->cmd, dwords); - END_BATCH(); - return; - } + ndw = atom->check(ctx, atom); cmd.u = atom->cmd[0]; sz = cmd.r500fp.count; @@ -143,20 +129,34 @@ void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) addr |= (type << 16); addr |= (clamp << 17); - stride = type ? 4 : 6; - - ndw = sz * stride; - if (ndw) { + BEGIN_BATCH_NO_AUTOSTATE(ndw); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); + OUT_BATCH(addr); + ndw-=3; + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(&atom->cmd[1], ndw); + END_BATCH(); +} - BEGIN_BATCH_NO_AUTOSTATE(3 + ndw); - OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); - OUT_BATCH(addr); - OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR); - for (i = 0; i < ndw; i++) { - OUT_BATCH(atom->cmd[i+1]); - } - END_BATCH(); +static int check_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); + int dw = 0, i; + if (atom->cmd[0] == CP_PACKET2) { + return dw; } + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + if (!t && !r300->radeon.radeonScreen->kernel_mm) { + dw += 0; + } else if (t && t->image_override && !t->bo) { + if (!r300->radeon.radeonScreen->kernel_mm) + dw += 2; + } else + dw += 4; + } + return dw; } static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) @@ -164,47 +164,46 @@ static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); - int notexture = 0; - - if (numtmus) { - int i; - - for(i = 0; i < numtmus; ++i) { - radeonTexObj *t = r300->hw.textures[i]; - - if (!t) - notexture = 1; - } - - if (r300->radeon.radeonScreen->kernel_mm && notexture) { - return; - } - for(i = 0; i < numtmus; ++i) { - radeonTexObj *t = r300->hw.textures[i]; - if (t && !t->image_override) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - } else if (!t) { - /* Texture unit hasn't a texture bound nothings to do */ - } else { /* override cases */ - if (t->bo) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - } else if (!r300->radeon.radeonScreen->kernel_mm) { - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH(t->override_offset); - END_BATCH(); - } else { - /* Texture unit hasn't a texture bound nothings to do */ - } - } + int i; + + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + if (t && !t->image_override) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!t) { + /* Texture unit hasn't a texture bound. + * We assign the current color buffer as a fakery to make + * KIL work on KMS (without it, the CS checker will complain). + */ + if (r300->radeon.radeonScreen->kernel_mm) { + struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&r300->radeon); + if (rrb && rrb->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } + } + } else { /* override cases */ + if (t->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!r300->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH(t->override_offset); + END_BATCH(); + } else { + /* Texture unit hasn't a texture bound nothings to do */ + } } } } @@ -227,8 +226,8 @@ void r300_emit_scissor(GLcontext *ctx) if (r300->radeon.state.scissor.enabled) { x1 = r300->radeon.state.scissor.rect.x1; y1 = r300->radeon.state.scissor.rect.y1; - x2 = r300->radeon.state.scissor.rect.x2 - 1; - y2 = r300->radeon.state.scissor.rect.y2 - 1; + x2 = r300->radeon.state.scissor.rect.x2; + y2 = r300->radeon.state.scissor.rect.y2; } else { x1 = 0; y1 = 0; @@ -247,6 +246,17 @@ void r300_emit_scissor(GLcontext *ctx) OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT)); END_BATCH(); } +static int check_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t dw = 6 + 3 + 16; + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + dw -= 3 + 16; + } + return dw; +} static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) { @@ -256,7 +266,7 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) uint32_t cbpitch; uint32_t offset = r300->radeon.state.color.draw_offset; uint32_t dw = 6; - int i; + int i; rrb = radeon_get_colorbuffer(&r300->radeon); if (!rrb || !rrb->bo) { @@ -264,7 +274,7 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) return; } - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr,"rrb is %p %d %dx%d\n", rrb, offset, rrb->base.Width, rrb->base.Height); cbpitch = (rrb->pitch / rrb->cpp); if (rrb->cpp == 4) @@ -338,13 +348,23 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) } } +static int check_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t dw; + dw = 6; + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + return dw; +} + static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); struct radeon_renderbuffer *rrb; uint32_t zbpitch; - uint32_t dw; + uint32_t dw = atom->check(ctx, atom); rrb = radeon_get_depthbuffer(&r300->radeon); if (!rrb) @@ -360,9 +380,6 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) } } - dw = 6; - if (r300->radeon.radeonScreen->kernel_mm) - dw += 2; BEGIN_BATCH_NO_AUTOSTATE(dw); OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -374,46 +391,6 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) END_BATCH(); } -static void emit_gb_misc(GLcontext *ctx, struct radeon_state_atom * atom) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH(atom->cmd[0]); - OUT_BATCH(atom->cmd[1]); - OUT_BATCH(atom->cmd[2]); - OUT_BATCH(atom->cmd[3]); - END_BATCH(); - } -} - -static void emit_threshold_misc(GLcontext *ctx, struct radeon_state_atom * atom) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - BEGIN_BATCH_NO_AUTOSTATE(3); - OUT_BATCH(atom->cmd[0]); - OUT_BATCH(atom->cmd[1]); - OUT_BATCH(atom->cmd[2]); - END_BATCH(); - } -} - -static void emit_shade_misc(GLcontext *ctx, struct radeon_state_atom * atom) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - - if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH(atom->cmd[0]); - OUT_BATCH(atom->cmd[1]); - END_BATCH(); - } -} - static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -431,6 +408,7 @@ static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; } + BEGIN_BATCH_NO_AUTOSTATE(atom->cmd_size); OUT_BATCH(atom->cmd[0]); atom->cmd[1] &= ~0xf; atom->cmd[1] |= format; @@ -438,6 +416,12 @@ static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom OUT_BATCH(atom->cmd[2]); OUT_BATCH(atom->cmd[3]); OUT_BATCH(atom->cmd[4]); + END_BATCH(); +} + +static int check_never(GLcontext *ctx, struct radeon_state_atom *atom) +{ + return 0; } static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) @@ -456,28 +440,29 @@ static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom) return cnt ? cnt + 1 : 0; } -int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) -{ - int cnt; - - cnt = vpu_count(atom->cmd); - return cnt ? (cnt * 4) + 1 : 0; -} - int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; - + r300ContextPtr r300 = R300_CONTEXT(ctx); + int extra = 1; cnt = r500fp_count(atom->cmd); - return cnt ? (cnt * 6) + 1 : 0; + if (r300->radeon.radeonScreen->kernel_mm) + extra = 3; + + return cnt ? (cnt * 6) + extra : 0; } int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; + r300ContextPtr r300 = R300_CONTEXT(ctx); + int extra = 1; + cnt = r500fp_count(atom->cmd); + if (r300->radeon.radeonScreen->kernel_mm) + extra = 3; cnt = r500fp_count(atom->cmd); - return cnt ? (cnt * 4) + 1 : 0; + return cnt ? (cnt * 4) + extra : 0; } #define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ @@ -509,7 +494,7 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->radeon.hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ mtu = r300->radeon.glCtx->Const.MaxTextureUnits; - if (RADEON_DEBUG & DEBUG_TEXTURE) { + if (RADEON_DEBUG & RADEON_TEXTURE) { fprintf(stderr, "Using %d maximum texture units..\n", mtu); } @@ -567,11 +552,14 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(gb_enable, always, 2, 0); r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1); - ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); + } else { + ALLOC_STATE(gb_misc, never, R300_GB_MISC_CMDSIZE, 0); + } r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 3); - r300->hw.gb_misc.emit = emit_gb_misc; ALLOC_STATE(gb_misc2, always, R300_GB_MISC2_CMDSIZE, 0); - r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2); + r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2); ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1); ALLOC_STATE(ga_point_s0, always, 5, 0); @@ -586,9 +574,12 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1); ALLOC_STATE(ga_line_stipple, always, 4, 0); r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3); - ALLOC_STATE(shade, always, 2, 0); + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + ALLOC_STATE(shade, always, 2, 0); + } else { + ALLOC_STATE(shade, never, 2, 0); + } r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 1); - r300->hw.shade.emit = emit_shade_misc; ALLOC_STATE(shade2, always, 4, 0); r300->hw.shade2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4278, 3); ALLOC_STATE(polygon_mode, always, 4, 0); @@ -637,11 +628,14 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0); - r300->hw.r500fp.emit = emit_r500fp; + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.r500fp.emit = emit_r500fp; + ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0); - r300->hw.r500fp_const.emit = emit_r500fp; + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.r500fp_const.emit = emit_r500fp; } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3); @@ -684,25 +678,36 @@ void r300InitCmdBuf(r300ContextPtr r300) } ALLOC_STATE(rop, always, 2, 0); r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1); - ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0); + ALLOC_STATE(cb, cb_offset, R300_CB_CMDSIZE, 0); r300->hw.cb.emit = &emit_cb_offset; ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9); ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1); - ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); - r300->hw.rb3d_discard_src_pixel_lte_threshold.emit = emit_threshold_misc; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) { + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); + } else { + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, never, 3, 0); + } + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); r300->hw.zs.cmd[R300_ZS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3); + if (is_r500) { + if (r300->radeon.radeonScreen->kernel_mm) + ALLOC_STATE(zsb, always, R300_ZSB_CMDSIZE, 0); + else + ALLOC_STATE(zsb, never, R300_ZSB_CMDSIZE, 0); + r300->hw.zsb.cmd[R300_ZSB_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R500_ZB_STENCILREFMASK_BF, 1); + } ALLOC_STATE(zstencil_format, always, 5, 0); r300->hw.zstencil_format.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4); r300->hw.zstencil_format.emit = emit_zstencil_format; - ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); + ALLOC_STATE(zb, zb_offset, R300_ZB_CMDSIZE, 0); r300->hw.zb.emit = emit_zb_offset; ALLOC_STATE(zb_depthclearvalue, always, 2, 0); r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1); @@ -715,47 +720,72 @@ void r300InitCmdBuf(r300ContextPtr r300) /* VPU only on TCL */ if (has_tcl) { - int i; + int i; + if (r300->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE(vap_flush, always, 10, 0); + /* flush processing vertices */ + r300->hw.vap_flush.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); + r300->hw.vap_flush.cmd[1] = 0; + r300->hw.vap_flush.cmd[2] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DSTCACHE_CTLSTAT, 1); + r300->hw.vap_flush.cmd[3] = R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D; + r300->hw.vap_flush.cmd[4] = cmdpacket0(r300->radeon.radeonScreen, RADEON_WAIT_UNTIL, 1); + r300->hw.vap_flush.cmd[5] = RADEON_WAIT_3D_IDLECLEAN; + r300->hw.vap_flush.cmd[6] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); + r300->hw.vap_flush.cmd[7] = 0xffffff; + r300->hw.vap_flush.cmd[8] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1); + r300->hw.vap_flush.cmd[9] = 0; + } else { + ALLOC_STATE(vap_flush, never, 10, 0); + } + + ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); r300->hw.vpi.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); - r300->hw.vpi.emit = emit_vpu; + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpi.emit = emit_vpu; if (is_r500) { - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); - r300->hw.vpp.emit = emit_vpu; - - ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); - r300->hw.vps.emit = emit_vpu; + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpp.emit = emit_vpu; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vps.emit = emit_vpu; for (i = 0; i < 6; i++) { - ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); - r300->hw.vpucp[i].cmd[0] = - cmdvpu(r300->radeon.radeonScreen, - R500_PVS_UCP_START + i, 1); - r300->hw.vpucp[i].emit = emit_vpu; + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[0] = + cmdvpu(r300->radeon.radeonScreen, + R500_PVS_UCP_START + i, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpucp[i].emit = emit_vpu; } } else { - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); - r300->hw.vpp.emit = emit_vpu; - - ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); - r300->hw.vps.emit = emit_vpu; + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpp.emit = emit_vpu; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vps.emit = emit_vpu; for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); r300->hw.vpucp[i].cmd[0] = cmdvpu(r300->radeon.radeonScreen, - R300_PVS_UCP_START + i, 1); - r300->hw.vpucp[i].emit = emit_vpu; + R300_PVS_UCP_START + i, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpucp[i].emit = emit_vpu; } } } @@ -779,7 +809,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0); - ALLOC_STATE(tex.offset, variable, 1, 0); + ALLOC_STATE(tex.offset, tex_offsets, 1, 0); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0); r300->hw.tex.offset.emit = &emit_tex_offsets; @@ -792,6 +822,17 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0); + radeon_init_query_stateobj(&r300->radeon, R300_QUERYOBJ_CMDSIZE); + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) { + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RV530_FG_ZBREG_DEST, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL; + } else { + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_REG_DEST, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = R300_RASTER_PIPE_SELECT_ALL; + } + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZPASS_DATA, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_1] = 0; + r300->radeon.hw.is_dirty = GL_TRUE; r300->radeon.hw.all_dirty = GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h index 53bcc0eeb4..1b703e518a 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h @@ -38,6 +38,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" +#define CACHE_FLUSH_BUFSZ (4*2) +#define PRE_EMIT_STATE_BUFSZ (2+2) +#define AOS_BUFSZ(nr) (3+(nr >>1)*3 + (nr&1)*2 + (nr*2)) +#define FIREAOS_BUFSZ (3) +#define SCISSORS_BUFSZ (3) + extern void r300InitCmdBuf(r300ContextPtr r300); void r300_emit_scissor(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 6f3aab986d..6fcf209af6 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -64,24 +64,27 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" -#include "r300_render.h" #include "r300_swtcl.h" #include "radeon_bocs_wrapper.h" - +#include "radeon_buffer_objects.h" +#include "radeon_queryobj.h" #include "vblank.h" #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ #define need_GL_VERSION_2_0 +#define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_vertex_program #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate #define need_GL_EXT_blend_minmax +#define need_GL_EXT_framebuffer_blit #define need_GL_EXT_framebuffer_object #define need_GL_EXT_fog_coord #define need_GL_EXT_gpu_program_parameters +#define need_GL_EXT_provoking_vertex #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side #define need_GL_ATI_separate_stencil @@ -94,6 +97,7 @@ const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ {"GL_ARB_depth_texture", NULL}, {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, {"GL_ARB_multitexture", NULL}, {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, {"GL_ARB_shadow", NULL}, @@ -113,6 +117,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_packed_depth_stencil", NULL}, {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, + {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, {"GL_EXT_shadow_funcs", NULL}, {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, @@ -141,6 +146,7 @@ const struct dri_extension card_extensions[] = { const struct dri_extension mm_extensions[] = { + { "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions }, { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, { NULL, NULL } }; @@ -154,7 +160,6 @@ const struct dri_extension gl_20_extension[] = { }; static const struct tnl_pipeline_stage *r300_pipeline[] = { - /* Catch any t&l fallbacks */ &_tnl_vertex_transform_stage, @@ -165,21 +170,7 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { &_tnl_texture_transform_stage, &_tnl_point_attenuation_stage, &_tnl_vertex_program_stage, - - /* Try again to go to tcl? - * - no good for asymmetric-twoside (do with multipass) - * - no good for asymmetric-unfilled (do with multipass) - * - good for material - * - good for texgen - * - need to manipulate a bit of state - * - * - worth it/not worth it? - */ - - /* Else do them here. - */ - &_r300_render_stage, - &_tnl_render_stage, /* FALLBACK */ + &_tnl_render_stage, 0, }; @@ -225,11 +216,8 @@ static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmes static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon) { - r300ContextPtr r300 = (r300ContextPtr)radeon; BATCH_LOCALS(radeon); - r300->vap_flush_needed = GL_TRUE; - cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); BEGIN_BATCH_NO_AUTOSTATE(2); OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH); @@ -246,6 +234,81 @@ static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) r300->radeon.Fallback &= ~bit; } +static void r300_emit_query_finish(radeonContextPtr radeon) +{ + r300ContextPtr r300 = (r300ContextPtr)radeon; + struct radeon_query_object *query = radeon->query.current; + BATCH_LOCALS(radeon); + + BEGIN_BATCH_NO_AUTOSTATE(3 * 2 *r300->radeon.radeonScreen->num_gb_pipes + 2); + switch (r300->radeon.radeonScreen->num_gb_pipes) { + case 4: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+3*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 3: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_2); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+2*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 2: + if (r300->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV380) { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + } else { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_1); + } + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+1*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 1: + default: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + break; + } + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + END_BATCH(); + query->curr_offset += r300->radeon.radeonScreen->num_gb_pipes * sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +static void rv530_emit_query_finish_single_z(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(8); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_BATCH(); + + query->curr_offset += sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +static void rv530_emit_query_finish_double_z(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(14); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_BATCH(); + + query->curr_offset += 2 * sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + static void r300_init_vtbl(radeonContextPtr radeon) { radeon->vtbl.get_lock = r300_get_lock; @@ -254,6 +317,13 @@ static void r300_init_vtbl(radeonContextPtr radeon) radeon->vtbl.swtcl_flush = r300_swtcl_flush; radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms; radeon->vtbl.fallback = r300_fallback; + if (radeon->radeonScreen->chip_family == CHIP_FAMILY_RV530) { + if (radeon->radeonScreen->num_z_pipes == 2) + radeon->vtbl.emit_query_finish = rv530_emit_query_finish_double_z; + else + radeon->vtbl.emit_query_finish = rv530_emit_query_finish_single_z; + } else + radeon->vtbl.emit_query_finish = r300_emit_query_finish; } static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) @@ -295,13 +365,10 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) /* currently bogus data */ if (r300->options.hw_tcl_enabled) { - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeAluInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; + ctx->Const.VertexProgram.MaxNativeTemps = 32; ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; } @@ -325,6 +392,7 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT; ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; } + } static void r300ParseOptions(r300ContextPtr r300, radeonScreenPtr screen) @@ -361,12 +429,16 @@ static void r300InitGLExtensions(GLcontext *ctx) if (r300->options.stencil_two_side_disabled) _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); - if (ctx->Mesa_DXTn && !r300->options.s3tc_force_enabled) { + if (r300->options.s3tc_force_enabled) { _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); _mesa_enable_extension(ctx, "GL_S3_s3tc"); } else if (r300->options.s3tc_force_disabled) { _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } + + if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) { + _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); + } } /* Create the device specific rendering context. @@ -391,6 +463,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300ParseOptions(r300, screen); + r300->radeon.radeonScreen = screen; r300_init_vtbl(&r300->radeon); _mesa_init_driver_functions(&functions); @@ -398,6 +471,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitStateFuncs(&functions); r300InitTextureFuncs(&functions); r300InitShaderFuncs(&functions); + radeonInitQueryObjFunctions(&functions); + radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r300->radeon, &functions, glVisual, driContextPriv, @@ -451,11 +526,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitState(r300); r300InitShaderFunctions(r300); - if (screen->chip_family == CHIP_FAMILY_RS600 || screen->chip_family == CHIP_FAMILY_RS690 || - screen->chip_family == CHIP_FAMILY_RS740) { - r300->radeon.texture_row_align = 64; - } - r300InitGLExtensions(ctx); return GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index f7af7d4e57..518d5cdbf4 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -44,28 +44,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/mtypes.h" #include "shader/prog_instruction.h" +#include "compiler/radeon_code.h" struct r300_context; typedef struct r300_context r300ContextRec; typedef struct r300_context *r300ContextPtr; -/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . - I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble - with other compilers ... GLUE! -*/ -#define WARN_ONCE(a, ...) { \ - static int warn##__LINE__=1; \ - if(warn##__LINE__){ \ - fprintf(stderr, "*********************************WARN_ONCE*********************************\n"); \ - fprintf(stderr, "File %s function %s line %d\n", \ - __FILE__, __FUNCTION__, __LINE__); \ - fprintf(stderr, a, ## __VA_ARGS__);\ - fprintf(stderr, "***************************************************************************\n"); \ - warn##__LINE__=0;\ - } \ - } - #include "r300_vertprog.h" @@ -249,6 +234,10 @@ typedef struct r300_context *r300ContextPtr; #define R300_ZS_CNTL_2 3 #define R300_ZS_CMDSIZE 4 +#define R300_ZSB_CMD_0 0 +#define R300_ZSB_CNTL_0 1 +#define R300_ZSB_CMDSIZE 2 + #define R300_ZB_CMD_0 0 #define R300_ZB_OFFSET 1 #define R300_ZB_PITCH 2 @@ -289,6 +278,12 @@ typedef struct r300_context *r300ContextPtr; #define R300_TEX_CMDSIZE (MAX_TEXTURE_UNITS+1) */ +#define R300_QUERYOBJ_CMD_0 0 +#define R300_QUERYOBJ_DATA_0 1 +#define R300_QUERYOBJ_CMD_1 2 +#define R300_QUERYOBJ_DATA_1 3 +#define R300_QUERYOBJ_CMDSIZE 4 + /** * Cache for hardware register state. */ @@ -352,6 +347,7 @@ struct r300_hw_state { struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */ struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */ struct radeon_state_atom zs; /* zstencil control (4F00) */ + struct radeon_state_atom zsb; /* zstencil bf */ struct radeon_state_atom zstencil_format; struct radeon_state_atom zb; /* z buffer (4F20) */ struct radeon_state_atom zb_depthclearvalue; /* (4F28) */ @@ -359,6 +355,7 @@ struct r300_hw_state { struct radeon_state_atom zb_hiz_offset; /* (4F44) */ struct radeon_state_atom zb_hiz_pitch; /* (4F54) */ + struct radeon_state_atom vap_flush; struct radeon_state_atom vpi; /* vp instructions */ struct radeon_state_atom vpp; /* vp parameters */ struct radeon_state_atom vps; /* vertex point size (?) */ @@ -379,7 +376,6 @@ struct r300_hw_state { struct radeon_state_atom border_color; } tex; struct radeon_state_atom txe; /* tex enable (4104) */ - radeonTexObj *textures[R300_MAX_TEXTURE_UNITS]; }; @@ -389,46 +385,25 @@ struct r300_hw_state { /* Vertex shader state */ -/* Perhaps more if we store programs in vmem? */ -/* drm_r300_cmd_header_t->vpu->count is unsigned char */ -#define VSF_MAX_FRAGMENT_LENGTH (255*4) - -/* Can be tested with colormat currently. */ -#define VSF_MAX_FRAGMENT_TEMPS (14) - -#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) -#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) - #define COLOR_IS_RGBA #define TAG(x) r300##x #include "tnl_dd/t_dd_vertex.h" #undef TAG +struct r300_vertex_program_key { + GLbitfield FpReads; + GLuint FogAttr; + GLuint WPosAttr; +}; + struct r300_vertex_program { struct gl_vertex_program *Base; struct r300_vertex_program *next; - struct r300_vertex_program_key { - GLuint FpReads; - GLuint FogAttr; - GLuint WPosAttr; - } key; - - struct r300_vertex_shader_hw_code { - int length; - union { - GLuint d[VSF_MAX_FRAGMENT_LENGTH]; - float f[VSF_MAX_FRAGMENT_LENGTH]; - } body; - } hw_code; - - GLboolean translated; - GLboolean error; + struct r300_vertex_program_key key; + struct r300_vertex_program_code code; - int pos_end; - int num_temporaries; /* Number of temp vars used by program */ - int inputs[VERT_ATTRIB_MAX]; - int outputs[VERT_RESULT_MAX]; + GLboolean error; }; struct r300_vertex_program_cont { @@ -441,131 +416,18 @@ struct r300_vertex_program_cont { struct r300_vertex_program *progs; }; -#define R300_PFS_MAX_ALU_INST 64 -#define R300_PFS_MAX_TEX_INST 32 -#define R300_PFS_MAX_TEX_INDIRECT 4 -#define R300_PFS_NUM_TEMP_REGS 32 -#define R300_PFS_NUM_CONST_REGS 32 - -#define R500_PFS_MAX_INST 512 -#define R500_PFS_NUM_TEMP_REGS 128 -#define R500_PFS_NUM_CONST_REGS 256 - -struct r300_pfs_compile_state; -struct r500_pfs_compile_state; - -/** - * Stores state that influences the compilation of a fragment program. - */ -struct r300_fragment_program_external_state { - struct { - /** - * If the sampler is used as a shadow sampler, - * this field is: - * 0 - GL_LUMINANCE - * 1 - GL_INTENSITY - * 2 - GL_ALPHA - * depending on the depth texture mode. - */ - GLuint depth_texture_mode : 2; - - /** - * If the sampler is used as a shadow sampler, - * this field is (texture_compare_func - GL_NEVER). - * [e.g. if compare function is GL_LEQUAL, this field is 3] - * - * Otherwise, this field is 0. - */ - GLuint texture_compare_func : 3; - } unit[16]; -}; - - -struct r300_fragment_program_node { - int tex_offset; /**< first tex instruction */ - int tex_end; /**< last tex instruction, relative to tex_offset */ - int alu_offset; /**< first ALU instruction */ - int alu_end; /**< last ALU instruction, relative to alu_offset */ - int flags; -}; - -/** - * Stores an R300 fragment program in its compiled-to-hardware form. - */ -struct r300_fragment_program_code { - struct { - int length; /**< total # of texture instructions used */ - GLuint inst[R300_PFS_MAX_TEX_INST]; - } tex; - - struct { - int length; /**< total # of ALU instructions used */ - struct { - GLuint inst0; - GLuint inst1; - GLuint inst2; - GLuint inst3; - } inst[R300_PFS_MAX_ALU_INST]; - } alu; - - struct r300_fragment_program_node node[4]; - int cur_node; - int first_node_has_tex; - - /** - * Remember which program register a given hardware constant - * belongs to. - */ - struct prog_src_register constant[R300_PFS_NUM_CONST_REGS]; - int const_nr; - - int max_temp_idx; -}; - - -struct r500_fragment_program_code { - struct { - GLuint inst0; - GLuint inst1; - GLuint inst2; - GLuint inst3; - GLuint inst4; - GLuint inst5; - } inst[R500_PFS_MAX_INST]; - - int inst_offset; - int inst_end; - - /** - * Remember which program register a given hardware constant - * belongs to. - */ - struct prog_src_register constant[R500_PFS_NUM_CONST_REGS]; - int const_nr; - - int max_temp_idx; -}; /** * Store everything about a fragment program that is needed * to render with that program. */ struct r300_fragment_program { - struct gl_program *Base; - - GLboolean translated; GLboolean error; - + struct r300_fragment_program *next; struct r300_fragment_program_external_state state; - union rX00_fragment_program_code { - struct r300_fragment_program_code r300; - struct r500_fragment_program_code r500; - } code; - GLboolean writes_depth; - GLuint optimization; - - struct r300_fragment_program *next; + struct rX00_fragment_program_code code; + GLbitfield InputsRead; /* attribute that we are sending the WPOS in */ gl_frag_attrib wpos_attr; @@ -583,12 +445,6 @@ struct r300_fragment_program_cont { struct r300_fragment_program *progs; }; -struct r300_fragment_program_compiler { - r300ContextPtr r300; - struct r300_fragment_program *fp; - union rX00_fragment_program_code *code; - struct gl_program *program; -}; #define R300_MAX_AOS_ARRAYS 16 @@ -610,8 +466,6 @@ struct r300_swtcl_info { struct r300_vtable { void (* SetupRSUnit)(GLcontext *ctx); void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); - GLboolean (* BuildFragmentProgramHwCode)(struct r300_fragment_program_compiler *compiler); - void (* FragmentProgramDump)(union rX00_fragment_program_code *code); void (* SetupPixelShader)(GLcontext *ctx); }; @@ -619,11 +473,12 @@ struct r300_vertex_buffer { struct vertex_attribute { /* generic */ GLubyte element; - GLvoid *data; - GLboolean free_needed; GLuint stride; GLuint dwords; GLubyte size; /* number of components */ + GLboolean is_named_bo; + struct radeon_bo *bo; + GLint bo_offset; /* hw specific */ uint32_t data_type:4; @@ -638,12 +493,14 @@ struct r300_vertex_buffer { }; struct r300_index_buffer { - GLvoid *ptr; + struct radeon_bo *bo; + int bo_offset; + GLboolean is_32bit; - GLboolean free_needed; GLuint count; }; + /** * \brief R300 context structure. */ @@ -669,11 +526,10 @@ struct r300_context { uint32_t s3tc_force_disabled:1; uint32_t stencil_two_side_disabled:1; } options; - + struct r300_swtcl_info swtcl; struct r300_vertex_buffer vbuf; struct r300_index_buffer ind_buf; - GLboolean vap_flush_needed; uint32_t fallback; diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 9769ff5399..e9968f9ffe 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -31,6 +31,7 @@ #include "main/state.h" #include "main/api_validate.h" #include "main/enums.h" +#include "main/simple_list.h" #include "r300_reg.h" #include "r300_context.h" @@ -38,6 +39,10 @@ #include "r300_render.h" #include "r300_state.h" #include "r300_tex.h" +#include "r300_cmdbuf.h" + +#include "radeon_buffer_objects.h" +#include "radeon_common_context.h" #include "tnl/tnl.h" #include "tnl/t_vp_build.h" @@ -45,32 +50,59 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" -static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf, struct gl_buffer_object **bo, GLuint *nr_bo) + +static int getTypeSize(GLenum type) +{ + switch (type) { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_index_buffer *ind_buf = &r300->ind_buf; GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; - if (!mesa_ind_buf) { - ind_buf->ptr = NULL; - return; - } - - ind_buf->count = mesa_ind_buf->count; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - bo[*nr_bo] = mesa_ind_buf->obj; - (*nr_bo)++; ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, + "%s: Fixing index buffer format. type %d\n", + __func__, mesa_ind_buf->type); + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); GLubyte *in = (GLubyte *)src_ptr; - GLuint *out = _mesa_malloc(sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1)); - int i; - ind_buf->ptr = out; + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + + assert(r300->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; @@ -80,16 +112,16 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *out++ = in[i]; } - ind_buf->free_needed = GL_TRUE; - ind_buf->is_32bit = GL_FALSE; - } else if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) { #if MESA_BIG_ENDIAN + } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ GLushort *in = (GLushort *)src_ptr; - GLuint *out = _mesa_malloc(sizeof(GLushort) * - ((mesa_ind_buf->count + 1) & ~1)); - int i; + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, + &r300->ind_buf.bo_offset, size, 4); - ind_buf->ptr = out; + assert(r300->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; @@ -98,42 +130,61 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer if (i < mesa_ind_buf->count) { *out++ = in[i]; } - - ind_buf->free_needed = GL_TRUE; -#else - ind_buf->ptr = src_ptr; - ind_buf->free_needed = GL_FALSE; #endif - ind_buf->is_32bit = GL_FALSE; - } else { - ind_buf->ptr = src_ptr; - ind_buf->free_needed = GL_FALSE; - ind_buf->is_32bit = GL_TRUE; + } + + r300->ind_buf.is_32bit = GL_FALSE; + r300->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); } } -static int getTypeSize(GLenum type) + +static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { - switch (type) { - case GL_DOUBLE: - return sizeof(GLdouble); - case GL_FLOAT: - return sizeof(GLfloat); - case GL_INT: - return sizeof(GLint); - case GL_UNSIGNED_INT: - return sizeof(GLuint); - case GL_SHORT: - return sizeof(GLshort); - case GL_UNSIGNED_SHORT: - return sizeof(GLushort); - case GL_BYTE: - return sizeof(GLbyte); - case GL_UNSIGNED_BYTE: - return sizeof(GLubyte); - default: - assert(0); - return 0; + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (!mesa_ind_buf) { + r300->ind_buf.bo = NULL; + return; + } + radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__); + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + + assert(r300->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); + _mesa_memcpy(dst_ptr, src_ptr, size); + + r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + r300->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } else { + r300FixupIndexBuffer(ctx, mesa_ind_buf); } } @@ -161,27 +212,123 @@ static int getTypeSize(GLenum type) } \ } while (0) -static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input, struct gl_buffer_object **bo, GLuint *nr_bo) +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r300ConvertAttrib(GLcontext *ctx, int count, const struct gl_client_array *input, struct vertex_attribute *attr) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_vertex_buffer *vbuf = &r300->vbuf; - struct vertex_attribute r300_attr; - const void *src_ptr; - GLenum type; + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; GLuint stride; + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + count = 1; + if (input->BufferObj->Name) { if (!input->BufferObj->Pointer) { - bo[*nr_bo] = input->BufferObj; - (*nr_bo)++; ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); - assert(input->BufferObj->Pointer != NULL); + mapped_named_bo = GL_TRUE; } src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); - } else + } else { src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, + "%s: Converting vertex attributes, attribute data format %x," + "stride %d, components %d\n" + , __FUNCTION__, input->Type + , stride, input->Size); + + assert(src_ptr != NULL); + + switch (input->Type) { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *input, int count, struct vertex_attribute *attr) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, size, 32); + + if (!input->BufferObj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, "%s. Vertex alignment doesn't match hw requirements.\n", __func__); + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) { + _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + struct vertex_attribute r300_attr; + GLenum type; + GLuint stride; + + radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__); stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || @@ -189,62 +336,29 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st getTypeSize(input->Type) != 4 || #endif stride < 4) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) { - fprintf(stderr, "%s: Converting vertex attributes, attribute data format %x,", __FUNCTION__, input->Type); - fprintf(stderr, "stride %d, components %d\n", stride, input->Size); - } - - GLfloat *dst_ptr, *tmp; - - /* Convert value for first element only */ - if (input->StrideB == 0) - count = 1; - - tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count); - - switch (input->Type) { - case GL_DOUBLE: - CONVERT(GLdouble, (GLfloat)); - break; - case GL_UNSIGNED_INT: - CONVERT(GLuint, UINT_TO_FLOAT); - break; - case GL_INT: - CONVERT(GLint, INT_TO_FLOAT); - break; - case GL_UNSIGNED_SHORT: - CONVERT(GLushort, USHORT_TO_FLOAT); - break; - case GL_SHORT: - CONVERT(GLshort, SHORT_TO_FLOAT); - break; - case GL_UNSIGNED_BYTE: - assert(input->Format != GL_BGRA); - CONVERT(GLubyte, UBYTE_TO_FLOAT); - break; - case GL_BYTE: - CONVERT(GLbyte, BYTE_TO_FLOAT); - break; - default: - assert(0); - break; - } type = GL_FLOAT; - r300_attr.free_needed = GL_TRUE; - r300_attr.data = tmp; + if (input->StrideB == 0) { r300_attr.stride = 0; } else { r300_attr.stride = sizeof(GLfloat) * input->Size; } r300_attr.dwords = input->Size; + r300_attr.is_named_bo = GL_FALSE; } else { type = input->Type; - r300_attr.free_needed = GL_FALSE; - r300_attr.data = (GLvoid *)src_ptr; - r300_attr.stride = input->StrideB; - r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; + r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; + if (!input->BufferObj->Name) { + + if (input->StrideB == 0) { + r300_attr.stride = 0; + } else { + r300_attr.stride = (getTypeSize(type) * input->Size + 3) & ~3; + } + + r300_attr.is_named_bo = GL_FALSE; + } } r300_attr.size = input->Size; @@ -333,15 +447,15 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st ++vbuf->num_attribs; } -static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count, struct gl_buffer_object **bo, GLuint *nr_bo) +static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_buffer *vbuf = &r300->vbuf; - + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__); { int i, tmp; - tmp = r300->selected_vp->Base->Base.InputsRead; + tmp = r300->selected_vp->code.InputsRead; i = 0; vbuf->num_attribs = 0; while (tmp) { @@ -351,7 +465,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar ++i; } - r300TranslateAttrib(ctx, i, count, arrays[i], bo, nr_bo); + r300TranslateAttrib(ctx, i, count, arrays[i]); tmp >>= 1; ++i; @@ -361,45 +475,139 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS); if (r300->fallback) return; +} - { - int i; +static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + GLuint stride; + int ret; + int i, index; + radeon_print(RADEON_RENDER, RADEON_VERBOSE, + "%s: count %d num_attribs %d\n", + __func__, count, vbuf->num_attribs); + + for (index = 0; index < vbuf->num_attribs; index++) { + struct radeon_aos *aos = &r300->radeon.tcl.aos[index]; + i = vbuf->attribs[index].element; + + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) { - for (i = 0; i < vbuf->num_attribs; i++) { - rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], - vbuf->attribs[i].data, vbuf->attribs[i].dwords, - vbuf->attribs[i].stride, count); + r300ConvertAttrib(ctx, count, input[i], &vbuf->attribs[index]); + } else { + if (input[i]->BufferObj->Name) { + if (stride % 4 != 0) { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + r300AlignDataToDword(ctx, input[i], count, &vbuf->attribs[index]); + vbuf->attribs[index].is_named_bo = GL_FALSE; + } else { + vbuf->attribs[index].stride = input[i]->StrideB; + vbuf->attribs[index].bo_offset = (intptr_t) input[i]->Ptr; + vbuf->attribs[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + vbuf->attribs[index].is_named_bo = GL_TRUE; + } + } else { + + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } else { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&r300->radeon, &vbuf->attribs[index].bo, &vbuf->attribs[index].bo_offset, size, 32); + assert(vbuf->attribs[index].bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(vbuf->attribs[index].bo->ptr, vbuf->attribs[index].bo_offset); + switch (vbuf->attribs[index].dwords) { + case 1: radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 3: radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + default: assert(0); break; + } + + } } - r300->radeon.tcl.aos_count = vbuf->num_attribs; + aos->count = vbuf->attribs[index].stride == 0 ? 1 : count; + aos->stride = vbuf->attribs[index].stride / sizeof(float); + aos->components = vbuf->attribs[index].dwords; + aos->bo = vbuf->attribs[index].bo; + aos->offset = vbuf->attribs[index].bo_offset; + + if (vbuf->attribs[index].is_named_bo) { + radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[index].bo, RADEON_GEM_DOMAIN_GTT, 0); + } } + + r300->radeon.tcl.aos_count = vbuf->num_attribs; + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, first_elem(&r300->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); + r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, ret); + } -static void r300FreeData(GLcontext *ctx, struct gl_buffer_object **bo, GLuint nr_bo) +static void r300FreeData(GLcontext *ctx) { + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__); + r300ContextPtr r300 = R300_CONTEXT(ctx); { - struct r300_vertex_buffer *vbuf = &R300_CONTEXT(ctx)->vbuf; int i; - for (i = 0; i < vbuf->num_attribs; i++) { - if (vbuf->attribs[i].free_needed) - _mesa_free(vbuf->attribs[i].data); + for (i = 0; i < r300->vbuf.num_attribs; i++) { + if (!r300->vbuf.attribs[i].is_named_bo) { + radeon_bo_unref(r300->vbuf.attribs[i].bo); + } + r300->radeon.tcl.aos[i].bo = NULL; } } { - struct r300_index_buffer *ind_buf = &R300_CONTEXT(ctx)->ind_buf; - if (ind_buf->free_needed) - _mesa_free(ind_buf->ptr); + if (r300->ind_buf.bo != NULL) { + radeon_bo_unref(r300->ind_buf.bo); + } } +} - { - int i; +static GLuint r300PredictTryDrawPrimsSize(GLcontext *ctx, GLuint nr_prims) +{ + struct r300_context *r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + GLboolean flushed; + GLuint dwords; + GLuint state_size; + + dwords = 2*CACHE_FLUSH_BUFSZ; + dwords += PRE_EMIT_STATE_BUFSZ; + dwords += (AOS_BUFSZ(vbuf->num_attribs) + + SCISSORS_BUFSZ*2 + + FIREAOS_BUFSZ )*nr_prims; + + state_size = radeonCountStateEmitSize(&r300->radeon); + flushed = rcommonEnsureCmdBufSpace(&r300->radeon, + dwords + state_size, + __FUNCTION__); + if (flushed) + dwords += radeonCountStateEmitSize(&r300->radeon); + else + dwords += state_size; - for (i = 0; i < nr_bo; ++i) { - ctx->Driver.UnmapBuffer(ctx, 0, bo[i]); - } - } + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + return dwords; } static GLboolean r300TryDrawPrims(GLcontext *ctx, @@ -411,8 +619,10 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, GLuint max_index ) { struct r300_context *r300 = R300_CONTEXT(ctx); - struct gl_buffer_object *bo[VERT_ATTRIB_MAX+1]; - GLuint i, nr_bo = 0; + GLuint i; + + radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: %u (%d-%d) cs begin at %d\n", + __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw ); if (ctx->NewState) _mesa_update_state( ctx ); @@ -424,23 +634,27 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); - r300FixupIndexBuffer(ctx, ib, bo, &nr_bo); - - /* ensure we have the cmd buf space in advance to cover - * the state + DMA AOS pointers */ - rcommonEnsureCmdBufSpace(&r300->radeon, - r300->radeon.hw.max_state_size + (50*sizeof(int)), - __FUNCTION__); - - r300SetVertexFormat(ctx, arrays, max_index + 1, bo, &nr_bo); + r300SetVertexFormat(ctx, arrays, max_index + 1); if (r300->fallback) return GL_FALSE; - r300SetupVAP(ctx, r300->selected_vp->Base->Base.InputsRead, r300->selected_vp->Base->Base.OutputsWritten); + r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten); r300UpdateShaderStates(r300); + /* ensure we have the cmd buf space in advance to cover + * the state + DMA AOS pointers */ + GLuint emit_end = r300PredictTryDrawPrimsSize(ctx, nr_prims) + + r300->radeon.cmdbuf.cs->cdw; + + r300SetupIndexBuffer(ctx, ib); + + r300AllocDmaRegions(ctx, arrays, max_index + 1); + + if (r300->fallback) + return GL_FALSE; + r300EmitCacheFlush(r300); radeonEmitState(&r300->radeon); @@ -450,9 +664,14 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300EmitCacheFlush(r300); - radeonReleaseArrays(ctx, ~0); + r300FreeData(ctx); - r300FreeData(ctx, bo, nr_bo); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: %u (%d-%d) cs ending at %d\n", + __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw ); + + if (emit_end < r300->radeon.cmdbuf.cs->cdw) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", r300->radeon.cmdbuf.cs->cdw - emit_end); return GL_TRUE; } @@ -462,28 +681,26 @@ static void r300DrawPrims(GLcontext *ctx, const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index) { - struct split_limits limits; GLboolean retval; - if (ib) - limits.max_verts = 0xffffffff; - else - limits.max_verts = 65535; - - limits.max_indices = 65535; - limits.max_vb_size = 1024*1024; + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } if (min_index) { + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, + "%s: Rebasing primitives. %p nr_prims %d min_index %u max_index %u\n", + __func__, prim, nr_prims, min_index, max_index); vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims ); return; } - if ((ib && ib->count > 65535)) { - vbo_split_prims (ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims, &limits); - return; - } /* Make an attempt at drawing */ retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index feb3370f37..07e6223087 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -124,41 +124,6 @@ GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes) return ret; } -GLboolean r300EmitArrays(GLcontext * ctx) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_vertex_buffer *vbuf = &r300->vbuf; - GLuint InputsRead, OutputsWritten; - - r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten); - - r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS); - if (r300->fallback & R300_RASTER_FALLBACK_MASK) - return GL_FALSE; - - { - struct vertex_buffer *mesa_vb = &TNL_CONTEXT(ctx)->vb; - GLuint attr, i; - - for (i = 0; i < vbuf->num_attribs; i++) { - attr = vbuf->attribs[i].element; - rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], mesa_vb->AttribPtr[attr]->data, - mesa_vb->AttribPtr[attr]->size, mesa_vb->AttribPtr[attr]->stride, mesa_vb->Count); - } - - r300->radeon.tcl.aos_count = vbuf->num_attribs; - - /* Fill index buffer info */ - r300->ind_buf.ptr = mesa_vb->Elts; - r300->ind_buf.is_32bit = GL_TRUE; - r300->ind_buf.free_needed = GL_FALSE; - } - - r300SetupVAP(ctx, InputsRead, OutputsWritten); - - return GL_TRUE; -} - void r300EmitCacheFlush(r300ContextPtr rmesa) { BATCH_LOCALS(&rmesa->radeon); diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 3f8c60ffae..8e57e354d1 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -104,7 +104,7 @@ static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet) return cmd.u; } -static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn, +static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn, unsigned short count) { drm_r300_cmd_header_t cmd; @@ -216,8 +216,6 @@ void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags) } } -extern GLboolean r300EmitArrays(GLcontext * ctx); - extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c deleted file mode 100644 index 55c1cfe631..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ /dev/null @@ -1,451 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "r300_fragprog.h" - -#include "shader/prog_parameter.h" - -#include "r300_context.h" -#include "r300_fragprog_swizzle.h" - -static void reset_srcreg(struct prog_src_register* reg) -{ - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; -} - -static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) -{ - gl_state_index fail_value_tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 - }; - struct prog_src_register reg = { 0, }; - - fail_value_tokens[2] = tmu; - reg.File = PROGRAM_STATE_VAR; - reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); - reg.Swizzle = SWIZZLE_WWWW; - return reg; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - * - * \todo If/when r5xx uses the radeon_program architecture, this can probably - * be reused. - */ -GLboolean r300_transform_TEX( - struct radeon_transform_context *t, - struct prog_instruction* orig_inst, void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - struct prog_instruction inst = *orig_inst; - struct prog_instruction* tgt; - GLboolean destredirect = GL_FALSE; - - if (inst.Opcode != OPCODE_TEX && - inst.Opcode != OPCODE_TXB && - inst.Opcode != OPCODE_TXP && - inst.Opcode != OPCODE_KIL) - return GL_FALSE; - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = inst.DstReg; - if (comparefunc == GL_ALWAYS) { - tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_1111; - } else { - tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); - } - return GL_TRUE; - } - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = radeonFindFreeTemporary(t); - inst.DstReg.WriteMask = WRITEMASK_XYZW; - } - - - /* Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - */ - if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - - int tempreg = radeonFindFreeTemporary(t); - int factor_index; - - tokens[2] = inst.TexSrcUnit; - factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens); - - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MUL; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tempreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - tgt->SrcReg[1].File = PROGRAM_STATE_VAR; - tgt->SrcReg[1].Index = factor_index; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tempreg; - } - - if (inst.Opcode != OPCODE_KIL) { - if (inst.DstReg.File != PROGRAM_TEMPORARY || - inst.DstReg.WriteMask != WRITEMASK_XYZW) { - int tempreg = radeonFindFreeTemporary(t); - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; - inst.DstReg.WriteMask = WRITEMASK_XYZW; - destredirect = GL_TRUE; - } else if (inst.SaturateMode) { - destredirect = GL_TRUE; - } - } - - if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { - int tmpreg = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(t->Program, 1); - tgt->Opcode = OPCODE_MOV; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tmpreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tmpreg; - } - - tgt = radeonAppendInstructions(t->Program, 1); - _mesa_copy_instructions(tgt, &inst, 1); - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = radeonFindFreeTemporary(t); - int pass, fail; - - tgt = radeonAppendInstructions(t->Program, 3); - - tgt[0].Opcode = OPCODE_RCP; - tgt[0].DstReg.File = PROGRAM_TEMPORARY; - tgt[0].DstReg.Index = rcptemp; - tgt[0].DstReg.WriteMask = WRITEMASK_W; - tgt[0].SrcReg[0] = inst.SrcReg[0]; - tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; - - tgt[1].Opcode = OPCODE_MAD; - tgt[1].DstReg = inst.DstReg; - tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; - tgt[1].SrcReg[0] = inst.SrcReg[0]; - tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[1].Index = rcptemp; - tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; - tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[2].Index = inst.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; - else - tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; - - tgt[2].Opcode = OPCODE_CMP; - tgt[2].DstReg = orig_inst->DstReg; - tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; - tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; - - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; - tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; - tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); - } else if (destredirect) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = orig_inst->DstReg; - tgt->SaturateMode = inst.SaturateMode; - tgt->SrcReg[0].File = PROGRAM_TEMPORARY; - tgt->SrcReg[0].Index = inst.DstReg.Index; - } - - return GL_TRUE; -} - -/* just some random things... */ -void r300FragmentProgramDump(union rX00_fragment_program_code *c) -{ - struct r300_fragment_program_code *code = &c->r300; - int n, i, j; - static int pc = 0; - - fprintf(stderr, "pc=%d*************************************\n", pc++); - - fprintf(stderr, "Hardware program\n"); - fprintf(stderr, "----------------\n"); - - for (n = 0; n < (code->cur_node + 1); n++) { - fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d, flags: %08x\n", n, - code->node[n].alu_offset, - code->node[n].tex_offset, - code->node[n].alu_end, code->node[n].tex_end, - code->node[n].flags); - - if (n > 0 || code->first_node_has_tex) { - fprintf(stderr, " TEX:\n"); - for (i = code->node[n].tex_offset; - i <= code->node[n].tex_offset + code->node[n].tex_end; - ++i) { - const char *instr; - - switch ((code->tex. - inst[i] >> R300_TEX_INST_SHIFT) & - 15) { - case R300_TEX_OP_LD: - instr = "TEX"; - break; - case R300_TEX_OP_KIL: - instr = "KIL"; - break; - case R300_TEX_OP_TXP: - instr = "TXP"; - break; - case R300_TEX_OP_TXB: - instr = "TXB"; - break; - default: - instr = "UNKNOWN"; - } - - fprintf(stderr, - " %s t%i, %c%i, texture[%i] (%08x)\n", - instr, - (code->tex. - inst[i] >> R300_DST_ADDR_SHIFT) & 31, - 't', - (code->tex. - inst[i] >> R300_SRC_ADDR_SHIFT) & 31, - (code->tex. - inst[i] & R300_TEX_ID_MASK) >> - R300_TEX_ID_SHIFT, - code->tex.inst[i]); - } - } - - for (i = code->node[n].alu_offset; - i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { - char srcc[3][10], dstc[20]; - char srca[3][10], dsta[20]; - char argc[3][20]; - char arga[3][20]; - char flags[5], tmp[10]; - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst1 >> (j * 6); - int rega = code->alu.inst[i].inst3 >> (j * 6); - - sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', regc & 31); - sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', rega & 31); - } - - dstc[0] = 0; - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(dstc, "t%i.%s ", - (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, - flags); - } - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(tmp, "o%i.%s", - (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, - flags); - strcat(dstc, tmp); - } - - dsta[0] = 0; - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { - sprintf(dsta, "t%i.w ", - (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); - } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { - sprintf(tmp, "o%i.w ", - (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); - strcat(dsta, tmp); - } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { - strcat(dsta, "Z"); - } - - fprintf(stderr, - "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" - " w: %3s %3s %3s -> %-20s (%08x)\n", i, - srcc[0], srcc[1], srcc[2], dstc, - code->alu.inst[i].inst1, srca[0], srca[1], - srca[2], dsta, code->alu.inst[i].inst3); - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst0 >> (j * 7); - int rega = code->alu.inst[i].inst2 >> (j * 7); - int d; - char buf[20]; - - d = regc & 31; - if (d < 12) { - switch (d % 4) { - case R300_ALU_ARGC_SRC0C_XYZ: - sprintf(buf, "%s.xyz", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_XXX: - sprintf(buf, "%s.xxx", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_YYY: - sprintf(buf, "%s.yyy", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_ZZZ: - sprintf(buf, "%s.zzz", - srcc[d / 4]); - break; - } - } else if (d < 15) { - sprintf(buf, "%s.www", srca[d - 12]); - } else if (d == 20) { - sprintf(buf, "0.0"); - } else if (d == 21) { - sprintf(buf, "1.0"); - } else if (d == 22) { - sprintf(buf, "0.5"); - } else if (d >= 23 && d < 32) { - d -= 23; - switch (d / 3) { - case 0: - sprintf(buf, "%s.yzx", - srcc[d % 3]); - break; - case 1: - sprintf(buf, "%s.zxy", - srcc[d % 3]); - break; - case 2: - sprintf(buf, "%s.Wzy", - srcc[d % 3]); - break; - } - } else { - sprintf(buf, "%i", d); - } - - sprintf(argc[j], "%s%s%s%s", - (regc & 32) ? "-" : "", - (regc & 64) ? "|" : "", - buf, (regc & 64) ? "|" : ""); - - d = rega & 31; - if (d < 9) { - sprintf(buf, "%s.%c", srcc[d / 3], - 'x' + (char)(d % 3)); - } else if (d < 12) { - sprintf(buf, "%s.w", srca[d - 9]); - } else if (d == 16) { - sprintf(buf, "0.0"); - } else if (d == 17) { - sprintf(buf, "1.0"); - } else if (d == 18) { - sprintf(buf, "0.5"); - } else { - sprintf(buf, "%i", d); - } - - sprintf(arga[j], "%s%s%s%s", - (rega & 32) ? "-" : "", - (rega & 64) ? "|" : "", - buf, (rega & 64) ? "|" : ""); - } - - fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" - " w: %8s %8s %8s op: %08x\n", - argc[0], argc[1], argc[2], - code->alu.inst[i].inst0, arga[0], arga[1], - arga[2], code->alu.inst[i].inst2); - } - } -} diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h deleted file mode 100644 index 5ce6f33cee..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/* - * Authors: - * Ben Skeggs <darktama@iinet.net.au> - * Jerome Glisse <j.glisse@gmail.com> - */ -#ifndef __R300_FRAGPROG_H_ -#define __R300_FRAGPROG_H_ - -#include "shader/program.h" -#include "shader/prog_instruction.h" - -#include "r300_context.h" -#include "radeon_program.h" - -#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 -#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 - -#if 1 - -/** - * Fragment program helper macros - */ - -/* Produce unshifted source selectors */ -#define FP_TMP(idx) (idx) -#define FP_CONST(idx) ((idx) | (1 << 5)) - -/* Produce source/dest selector dword */ -#define FP_SELC_MASK_NO 0 -#define FP_SELC_MASK_X 1 -#define FP_SELC_MASK_Y 2 -#define FP_SELC_MASK_XY 3 -#define FP_SELC_MASK_Z 4 -#define FP_SELC_MASK_XZ 5 -#define FP_SELC_MASK_YZ 6 -#define FP_SELC_MASK_XYZ 7 - -#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTC_SHIFT) | \ - (FP_SELC_MASK_##regmask << 23) | \ - (FP_SELC_MASK_##outmask << 26) | \ - ((src0) << R300_ALU_SRC0C_SHIFT) | \ - ((src1) << R300_ALU_SRC1C_SHIFT) | \ - ((src2) << R300_ALU_SRC2C_SHIFT)) - -#define FP_SELA_MASK_NO 0 -#define FP_SELA_MASK_W 1 - -#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTA_SHIFT) | \ - (FP_SELA_MASK_##regmask << 23) | \ - (FP_SELA_MASK_##outmask << 24) | \ - ((src0) << R300_ALU_SRC0A_SHIFT) | \ - ((src1) << R300_ALU_SRC1A_SHIFT) | \ - ((src2) << R300_ALU_SRC2A_SHIFT)) - -/* Produce unshifted argument selectors */ -#define FP_ARGC(source) R300_ALU_ARGC_##source -#define FP_ARGA(source) R300_ALU_ARGA_##source -#define FP_ABS(arg) ((arg) | (1 << 6)) -#define FP_NEG(arg) ((arg) ^ (1 << 5)) - -/* Produce instruction dword */ -#define FP_INSTRC(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTC_##opcode | \ - ((arg0) << R300_ALU_ARG0C_SHIFT) | \ - ((arg1) << R300_ALU_ARG1C_SHIFT) | \ - ((arg2) << R300_ALU_ARG2C_SHIFT)) - -#define FP_INSTRA(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTA_##opcode | \ - ((arg0) << R300_ALU_ARG0A_SHIFT) | \ - ((arg1) << R300_ALU_ARG1A_SHIFT) | \ - ((arg2) << R300_ALU_ARG2A_SHIFT)) - -#endif - -extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); - -extern void r300FragmentProgramDump(union rX00_fragment_program_code *c); - -extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); - -#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index f5c4c0f4a0..469c278b51 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -42,21 +42,51 @@ #include "shader/prog_parameter.h" #include "shader/prog_print.h" +#include "compiler/radeon_compiler.h" + #include "r300_state.h" -#include "r300_fragprog.h" -#include "r300_fragprog_swizzle.h" -#include "r500_fragprog.h" -#include "radeon_program.h" -#include "radeon_program_alu.h" -static void nqssadce_init(struct nqssadce_state* s) +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; + } +} + +static GLuint build_func(GLuint comparefunc) { - s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; + return comparefunc - GL_NEVER; } /** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct gl_fragment_program *fp, + struct r300_fragment_program_external_state *state) +{ + int unit; + + _mesa_bzero(state, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + + +/** * Transform the program to support fragment.position. * * Introduce a small fragment at the start of the program that will be @@ -65,104 +95,26 @@ static void nqssadce_init(struct nqssadce_state* s) * to read from a newly allocated temporary. * */ -static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp) { - GLuint InputsRead = compiler->fp->Base->InputsRead; + int i; - if (!(InputsRead & FRAG_BIT_WPOS)) { - compiler->fp->wpos_attr = FRAG_ATTRIB_MAX; + if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) { + fp->wpos_attr = FRAG_ATTRIB_MAX; return; } - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) { - if (!(InputsRead & (1 << i))) { - InputsRead &= ~(1 << FRAG_ATTRIB_WPOS); - InputsRead |= 1 << i; - compiler->fp->Base->InputsRead = InputsRead; - compiler->fp->wpos_attr = i; + if (!(compiler->Base.Program.InputsRead & (1 << i))) { + fp->wpos_attr = i; break; } } - GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - - _mesa_insert_instructions(compiler->program, 0, 3); - fpi = compiler->program->Instructions; - i = 0; - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - for (; i < compiler->program->NumInstructions; ++i) { - int reg; - for (reg = 0; reg < 3; reg++) { - if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && - fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { - fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[reg].Index = tempregi; - } - } - } + rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr); } - /** * Rewrite fragment.fogcoord to use a texture coordinate slot. * Note that fogcoord is forced into an X001 pattern, and this enforcement @@ -170,205 +122,117 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) * * See also the counterpart rewriting for vertex programs. */ -static void rewriteFog(struct r300_fragment_program_compiler *compiler) +static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp) { - struct r300_fragment_program *fp = compiler->fp; - GLuint InputsRead; + struct prog_src_register src; int i; - InputsRead = fp->Base->InputsRead; - - if (!(InputsRead & FRAG_BIT_FOGC)) { + if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) { fp->fog_attr = FRAG_ATTRIB_MAX; return; } for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) { - if (!(InputsRead & (1 << i))) { - InputsRead &= ~(1 << FRAG_ATTRIB_FOGC); - InputsRead |= 1 << i; - fp->Base->InputsRead = InputsRead; + if (!(compiler->Base.Program.InputsRead & (1 << i))) { fp->fog_attr = i; break; } } - { - struct prog_instruction *inst; - - inst = compiler->program->Instructions; - while (inst->Opcode != OPCODE_END) { - const int src_regs = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < src_regs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) { - inst->SrcReg[i].Index = fp->fog_attr; - inst->SrcReg[i].Swizzle = combine_swizzles( - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE), - inst->SrcReg[i].Swizzle); - } - } - ++inst; - } - } + memset(&src, 0, sizeof(src)); + src.File = PROGRAM_INPUT; + src.Index = fp->fog_attr; + src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src); } -static GLuint build_dtm(GLuint depthmode) -{ - switch(depthmode) { - default: - case GL_LUMINANCE: return 0; - case GL_INTENSITY: return 1; - case GL_ALPHA: return 2; - } -} - -static GLuint build_func(GLuint comparefunc) -{ - return comparefunc - GL_NEVER; -} /** - * Collect all external state that is relevant for compiling the given - * fragment program. + * Reserve hardware temporary registers for the program inputs. + * + * @note This allocation is performed explicitly, because the order of inputs + * is determined by the RS hardware. */ -static void build_state( - r300ContextPtr r300, - struct gl_fragment_program *fp, - struct r300_fragment_program_external_state *state) +static void allocate_hw_inputs( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata) { - int unit; - - _mesa_bzero(state, sizeof(*state)); - - for(unit = 0; unit < 16; ++unit) { - if (fp->Base.ShadowSamplers & (1 << unit)) { - struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - - state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); - state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); - } + GLuint InputsRead = c->Base.Program.InputsRead; + int i; + GLuint hwindex = 0; + + /* Primary colour */ + if (InputsRead & FRAG_BIT_COL0) + allocate(mydata, FRAG_ATTRIB_COL0, hwindex++); + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) + allocate(mydata, FRAG_ATTRIB_COL1, hwindex++); + InputsRead &= ~FRAG_BIT_COL1; + + /* Texcoords */ + for (i = 0; i < 8; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) + allocate(mydata, FRAG_ATTRIB_TEX0+i, hwindex++); } -} + InputsRead &= ~FRAG_BITS_TEX_ANY; -static void rewrite_depth_out(struct gl_program *prog) -{ - struct prog_instruction *inst; + /* Fogcoords treated as a texcoord */ + if (InputsRead & FRAG_BIT_FOGC) + allocate(mydata, FRAG_ATTRIB_FOGC, hwindex++); + InputsRead &= ~FRAG_BIT_FOGC; - for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) { - if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH) - continue; + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) + allocate(mydata, FRAG_ATTRIB_WPOS, hwindex++); + InputsRead &= ~FRAG_BIT_WPOS; - if (inst->DstReg.WriteMask & WRITEMASK_Z) { - inst->DstReg.WriteMask = WRITEMASK_W; - } else { - inst->DstReg.WriteMask = 0; - continue; - } - - switch (inst->Opcode) { - case OPCODE_FRC: - case OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case OPCODE_CMP: - case OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; - } - } + /* Anything else */ + if (InputsRead) + rc_error(&c->Base, "Don't know how to handle inputs 0x%x\n", InputsRead); } -void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp) + +static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_program_cont *cont, struct r300_fragment_program *fp) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_fragment_program_compiler compiler; - compiler.r300 = r300; - compiler.fp = fp; - compiler.code = &fp->code; - compiler.program = fp->Base; + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & RADEON_PIXEL) ? GL_TRUE : GL_FALSE; - if (RADEON_DEBUG & DEBUG_PIXEL) { - fflush(stdout); + compiler.code = &fp->code; + compiler.state = fp->state; + compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE; + compiler.OutputDepth = FRAG_RESULT_DEPTH; + compiler.OutputColor = FRAG_RESULT_COLOR; + compiler.AllocateHwInputs = &allocate_hw_inputs; + + if (compiler.Base.Debug) { + fflush(stderr); _mesa_printf("Fragment Program: Initial program:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); + _mesa_print_program(&cont->Base.Base); + fflush(stderr); } - insert_WPOS_trailer(&compiler); - - rewriteFog(&compiler); - - rewrite_depth_out(compiler.program); - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_program_transformation transformations[] = { - { &r500_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformDeriv, 0 }, - { &radeonTransformTrigScale, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 4, transformations); - } else { - struct radeon_program_transformation transformations[] = { - { &r300_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformTrigSimple, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 3, transformations); - } - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: After native rewrite:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } + rc_mesa_to_rc_program(&compiler.Base, &cont->Base.Base); - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } else { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } + insert_WPOS_trailer(&compiler, fp); - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } + rewriteFog(&compiler, fp); - if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler)) - fp->error = GL_TRUE; + r3xx_compile_fragment_program(&compiler); + fp->error = compiler.Base.Error; - fp->translated = GL_TRUE; + fp->InputsRead = compiler.Base.Program.InputsRead; - if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300->vtbl.FragmentProgramDump(&fp->code); + rc_destroy(&compiler.Base); } -struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx) +struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_fragment_program_cont *fp_list; @@ -389,11 +253,11 @@ struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx) fp = _mesa_calloc(sizeof(struct r300_fragment_program)); fp->state = state; - fp->translated = GL_FALSE; - fp->Base = _mesa_clone_program(ctx, &ctx->FragmentProgram._Current->Base); fp->next = fp_list->progs; fp_list->progs = fp; + translate_fragment_program(ctx, fp_list, fp); + return r300->selected_fp = fp; } diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h index 5e103ee408..3d64c08cee 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h @@ -32,8 +32,6 @@ #include "r300_context.h" -extern void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp); - -struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx); +struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx); #endif diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index ddabd53992..5cb04e2bb6 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/macros.h" #include "main/context.h" +#include "main/simple_list.h" #include "swrast/swrast.h" #include "radeon_common.h" @@ -55,7 +56,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_vertprog.h" #include "radeon_reg.h" #include "r300_emit.h" -#include "r300_fragprog.h" #include "r300_context.h" #include "vblank.h" @@ -66,6 +66,66 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define CLEARBUFFER_DEPTH 0x2 #define CLEARBUFFER_STENCIL 0x4 +#if 1 + +/** + * Fragment program helper macros + */ + +/* Produce unshifted source selectors */ +#define FP_TMP(idx) (idx) +#define FP_CONST(idx) ((idx) | (1 << 5)) + +/* Produce source/dest selector dword */ +#define FP_SELC_MASK_NO 0 +#define FP_SELC_MASK_X 1 +#define FP_SELC_MASK_Y 2 +#define FP_SELC_MASK_XY 3 +#define FP_SELC_MASK_Z 4 +#define FP_SELC_MASK_XZ 5 +#define FP_SELC_MASK_YZ 6 +#define FP_SELC_MASK_XYZ 7 + +#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTC_SHIFT) | \ + (FP_SELC_MASK_##regmask << 23) | \ + (FP_SELC_MASK_##outmask << 26) | \ + ((src0) << R300_ALU_SRC0C_SHIFT) | \ + ((src1) << R300_ALU_SRC1C_SHIFT) | \ + ((src2) << R300_ALU_SRC2C_SHIFT)) + +#define FP_SELA_MASK_NO 0 +#define FP_SELA_MASK_W 1 + +#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTA_SHIFT) | \ + (FP_SELA_MASK_##regmask << 23) | \ + (FP_SELA_MASK_##outmask << 24) | \ + ((src0) << R300_ALU_SRC0A_SHIFT) | \ + ((src1) << R300_ALU_SRC1A_SHIFT) | \ + ((src2) << R300_ALU_SRC2A_SHIFT)) + +/* Produce unshifted argument selectors */ +#define FP_ARGC(source) R300_ALU_ARGC_##source +#define FP_ARGA(source) R300_ALU_ARGA_##source +#define FP_ABS(arg) ((arg) | (1 << 6)) +#define FP_NEG(arg) ((arg) ^ (1 << 5)) + +/* Produce instruction dword */ +#define FP_INSTRC(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTC_##opcode | \ + ((arg0) << R300_ALU_ARG0C_SHIFT) | \ + ((arg1) << R300_ALU_ARG1C_SHIFT) | \ + ((arg2) << R300_ALU_ARG2C_SHIFT)) + +#define FP_INSTRA(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTA_##opcode | \ + ((arg0) << R300_ALU_ARG0A_SHIFT) | \ + ((arg1) << R300_ALU_ARG1A_SHIFT) | \ + ((arg2) << R300_ALU_ARG2A_SHIFT)) + +#endif + static void r300EmitClearState(GLcontext * ctx); static void r300ClearBuffer(r300ContextPtr r300, int flags, @@ -78,7 +138,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, GLuint cbpitch = 0; r300ContextPtr rmesa = r300; - if (RADEON_DEBUG & DEBUG_IOCTL) + if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n", __FUNCTION__, rrb, dPriv->x, dPriv->y, dPriv->w, dPriv->h); @@ -109,18 +169,21 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, } #if 1 if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) { - assert(rrbd != 0); - cbpitch = (rrbd->pitch / rrbd->cpp); + uint32_t zbpitch = (rrbd->pitch / rrbd->cpp); if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ - cbpitch |= R300_DEPTHMACROTILE_ENABLE; + zbpitch |= R300_DEPTHMACROTILE_ENABLE; } if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ - cbpitch |= R300_DEPTHMICROTILE_TILED; + zbpitch |= R300_DEPTHMICROTILE_TILED; } BEGIN_BATCH_NO_AUTOSTATE(6); OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1); + if (!r300->radeon.radeonScreen->kernel_mm) + OUT_BATCH(zbpitch); + else + OUT_BATCH_RELOC(zbpitch, rrbd->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); } #endif @@ -444,7 +507,15 @@ static void r300EmitClearState(GLcontext * ctx) R500_ALU_RGBA_A_SWIZ_0; r500fp.cmd[7] = 0; - emit_r500fp(ctx, &r500fp); + if (r300->radeon.radeonScreen->kernel_mm) { + emit_r500fp(ctx, &r500fp); + } else { + int dwords = r500fp.check(ctx,&r500fp); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(r500fp.cmd, dwords); + END_BATCH(); + } + } BEGIN_BATCH(2); @@ -488,6 +559,7 @@ static void r300EmitClearState(GLcontext * ctx) struct radeon_state_atom vpu; uint32_t _cmd[10]; R300_STATECHANGE(r300, pvs); + R300_STATECHANGE(r300, vap_flush); R300_STATECHANGE(r300, vpi); BEGIN_BATCH(4); @@ -508,12 +580,12 @@ static void r300EmitClearState(GLcontext * ctx) 0, 0xf, PVS_DST_REG_OUT); vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[4] = 0x0; vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, @@ -521,17 +593,27 @@ static void r300EmitClearState(GLcontext * ctx) vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, - - VSF_FLAG_NONE); + NEGATE_NONE); vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[8] = 0x0; - r300->vap_flush_needed = GL_TRUE; - emit_vpu(ctx, &vpu); + if (r300->radeon.radeonScreen->kernel_mm) { + int dwords = r300->hw.vap_flush.check(ctx,&r300->hw.vap_flush); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(r300->hw.vap_flush.cmd, dwords); + END_BATCH(); + emit_vpu(ctx, &vpu); + } else { + int dwords = vpu.check(ctx,&vpu); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(vpu.cmd, dwords); + END_BATCH(); + } + } } @@ -546,7 +628,7 @@ static int r300KernelClear(GLcontext *ctx, GLuint flags) /* Make sure it fits there. */ radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs); - + if (flags & BUFFER_BIT_COLOR0) { rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0); radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, @@ -623,7 +705,7 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) int i, ret; struct gl_framebuffer *fb = ctx->DrawBuffer; - if (RADEON_DEBUG & DEBUG_IOCTL) + if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "r300Clear\n"); if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { @@ -685,14 +767,13 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) } if (swrast_mask) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, swrast_mask); _swrast_Clear(ctx, swrast_mask); } } - void r300InitIoctlFuncs(struct dd_function_table *functions) { functions->Clear = r300Clear; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 357c600af9..b9ccd098dc 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1022,15 +1022,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \ R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD) /** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */ # define R300_RE_SHADE_MODEL_FLAT ( \ R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \ R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ - R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD) /* Specifies red & green components of fill color -- S312 format -- Backwards comp. */ #define R300_GA_SOLID_RG 0x427c @@ -1128,6 +1126,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* SU Depth Offset value */ #define R300_SU_DEPTH_OFFSET 0x42c4 +#define R300_SU_REG_DEST 0x42c8 +# define R300_RASTER_PIPE_SELECT_0 (1 << 0) +# define R300_RASTER_PIPE_SELECT_1 (1 << 1) +# define R300_RASTER_PIPE_SELECT_2 (1 << 2) +# define R300_RASTER_PIPE_SELECT_3 (1 << 3) +# define R300_RASTER_PIPE_SELECT_ALL 0xf + /* BEGIN: Rasterization / Interpolators - many guesses */ @@ -2014,6 +2019,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_FG_ALPHA_VALUE 0x4be0 # define R500_FG_ALPHA_VALUE_MASK 0x0000ffff +#define RV530_FG_ZBREG_DEST 0x4be8 +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0) + /* gap */ /* Fragment program parameters in 7.16 floating point */ @@ -2303,6 +2313,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_Z_WRITE_ENABLE (1 << 2) # define R300_Z_SIGNED_COMPARE (1 << 3) # define R300_STENCIL_FRONT_BACK (1 << 4) +# define R400_ZSIGNED_MAGNITUDE (1 << 5) +# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6) #define R300_ZB_ZSTENCILCNTL 0x4f04 /* functions */ @@ -2667,6 +2679,24 @@ enum { PVS_SRC_ADDR_MODE_1_SHIFT = 32, }; + +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + /*\}*/ /* BEGIN: Packet 3 commands */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 08d67b73ed..3cd38753b8 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -64,6 +64,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "vbo/vbo.h" +#include "vbo/vbo_split.h" #include "tnl/tnl.h" #include "tnl/t_vp_build.h" #include "radeon_reg.h" @@ -172,64 +173,45 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) return num_verts - verts_off; } -static void r300EmitElts(GLcontext * ctx, unsigned long n_elts) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - void *out; - GLuint size; - - size = ((rmesa->ind_buf.is_32bit ? 4 : 2) * n_elts + 3) & ~3; - - radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo, - &rmesa->radeon.tcl.elt_dma_offset, size, 4); - radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); - out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; - memcpy(out, rmesa->ind_buf.ptr, size); - radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); -} - -static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) +static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset) { BATCH_LOCALS(&rmesa->radeon); + int size; + + /* offset is in indices */ + BEGIN_BATCH(10); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); + if (rmesa->ind_buf.is_32bit) { + /* convert to bytes */ + offset *= 4; + size = vertex_count; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + /* convert to bytes */ + offset *= 2; + size = (vertex_count + 1) >> 1; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type); + } - r300_emit_scissor(rmesa->radeon.glCtx); - if (vertex_count > 0) { - int size; - - BEGIN_BATCH(10); - OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); - if (rmesa->ind_buf.is_32bit) { - size = vertex_count; - OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - ((vertex_count + 0) << 16) | type | - R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - size = (vertex_count + 1) >> 1; - OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - ((vertex_count + 0) << 16) | type); - } - - if (!rmesa->radeon.radeonScreen->kernel_mm) { - OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); - OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | - (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset, - rmesa->radeon.tcl.elt_dma_bo, - rmesa->radeon.tcl.elt_dma_offset, - RADEON_GEM_DOMAIN_GTT, 0, 0); - OUT_BATCH(size); - } else { - OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); - OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | - (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset); - OUT_BATCH(size); - radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, - rmesa->radeon.tcl.elt_dma_bo, - RADEON_GEM_DOMAIN_GTT, 0, 0); - } - END_BATCH(); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(size); + } else { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH(rmesa->ind_buf.bo_offset + offset); + OUT_BATCH(size); + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0); } + END_BATCH(); } static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) @@ -239,7 +221,7 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; int i; - if (RADEON_DEBUG & DEBUG_VERTS) + if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, offset); @@ -340,7 +322,7 @@ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) { BATCH_LOCALS(&rmesa->radeon); - r300_emit_scissor(rmesa->radeon.glCtx); + r300_emit_scissor(rmesa->radeon.glCtx); BEGIN_BATCH(3); OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); @@ -359,14 +341,16 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) if (type < 0 || num_verts <= 0) return; - /* Make space for at least 128 dwords. - * This is supposed to ensure that we can get all rendering - * commands into a single command buffer. - */ - rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__); + if (rmesa->ind_buf.bo) { + GLuint first, incr, offset = 0; + + if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) && + num_verts > 65500) { + WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim); + return; + } + - if (rmesa->ind_buf.ptr) { - r300EmitElts(ctx, num_verts); r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0); if (rmesa->radeon.radeonScreen->kernel_mm) { BEGIN_BATCH_NO_AUTOSTATE(2); @@ -374,45 +358,56 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) OUT_BATCH(rmesa->radeon.tcl.aos[0].count); END_BATCH(); } - r300FireEB(rmesa, num_verts, type); - } else { - r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start); - r300FireAOS(rmesa, num_verts, type); - } - COMMIT_BATCH(); -} -static void r300RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - int i; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - r300UpdateShaders(rmesa); - r300EmitArrays(ctx); + r300_emit_scissor(rmesa->radeon.glCtx); + while (num_verts > 0) { + int nr; + int align; + + nr = MIN2(num_verts, 65535); + nr -= (nr - first) % incr; + + /* get alignment for IB correct */ + if (nr != num_verts) { + do { + align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2); + if (align % 4) + nr -= incr; + } while(align % 4); + if (nr <= 0) { + WARN_ONCE("did the impossible happen? we never aligned nr to dword\n"); + return; + } + + } + r300FireEB(rmesa, nr, type, offset); - r300UpdateShaderStates(rmesa); + num_verts -= nr; + offset += nr; + } - r300EmitCacheFlush(rmesa); - radeonEmitState(&rmesa->radeon); + } else { + GLuint first, incr, offset = 0; - for (i = 0; i < vb->PrimitiveCount; i++) { - GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); - GLuint start = vb->Primitive[i].start; - GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - r300RunRenderPrimitive(ctx, start, end, prim); + if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) && + num_verts > 65535) { + WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim); + return; + } + r300_emit_scissor(rmesa->radeon.glCtx); + while (num_verts > 0) { + int nr; + nr = MIN2(num_verts, 65535); + nr -= (nr - first) % incr; + r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start + offset); + r300FireAOS(rmesa, nr, type); + num_verts -= nr; + offset += nr; + } } - - r300EmitCacheFlush(rmesa); - - radeonReleaseArrays(ctx, ~0); + COMMIT_BATCH(); } - static const char *getFallbackString(uint32_t bit) { switch (bit) { @@ -449,10 +444,10 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) r300ContextPtr rmesa = R300_CONTEXT(ctx); uint32_t old_fallback = rmesa->fallback; static uint32_t fallback_warn = 0; - + if (mode) { if ((fallback_warn & bit) == 0) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) _mesa_fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit)); fallback_warn |= bit; } @@ -470,7 +465,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) /* update only if we change from no raster fallbacks to some raster fallbacks */ if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) && ((bit & R300_RASTER_FALLBACK_MASK) > 0)) { - + radeon_firevertices(&rmesa->radeon); rmesa->radeon.swtcl.RenderIndex = ~0; _swsetup_Wakeup( ctx ); @@ -480,7 +475,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) /* update only if we have disabled all tcl fallbacks */ if (rmesa->options.hw_tcl_enabled) { - if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { + if ((old_fallback & R300_TCL_FALLBACK_MASK) == bit) { R300_STATECHANGE(rmesa, vap_cntl_status); rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS; } @@ -489,7 +484,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) /* update only if we have disabled all raster fallbacks */ if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { _swrast_flush( ctx ); - + tnl->Driver.Render.Start = r300RenderStart; tnl->Driver.Render.Finish = r300RenderFinish; tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; @@ -497,38 +492,10 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) tnl->Driver.Render.BuildVertices = _tnl_build_vertices; tnl->Driver.Render.CopyPV = _tnl_copy_pv; tnl->Driver.Render.Interp = _tnl_interp; - + _tnl_invalidate_vertex_state( ctx, ~0 ); _tnl_invalidate_vertices( ctx, ~0 ); } } - -} - -static GLboolean r300RunNonTCLRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (rmesa->fallback & R300_RASTER_FALLBACK_MASK) - return GL_TRUE; - if (rmesa->options.hw_tcl_enabled == GL_FALSE) - return GL_TRUE; - - r300RunRender(ctx, stage); - - return GL_FALSE; } - -const struct tnl_pipeline_stage _r300_render_stage = { - "r300 Hardware Rasterization", - NULL, - NULL, - NULL, - NULL, - r300RunNonTCLRender -}; diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 62228a3786..a4f9db13ec 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -38,7 +38,7 @@ static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont while (fp) { tmp = fp->next; - _mesa_reference_program(ctx, &fp->Base, NULL); + rc_constants_destroy(&fp->code.constants); _mesa_free(fp); fp = tmp; } @@ -50,6 +50,7 @@ static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *c while (vp) { tmp = vp->next; + rc_constants_destroy(&vp->code.constants); _mesa_reference_vertprog(ctx, &vp->Base, NULL); _mesa_free(vp); vp = tmp; @@ -122,15 +123,11 @@ static GLboolean r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { if (target == GL_FRAGMENT_PROGRAM_ARB) { - struct r300_fragment_program *fp = r300SelectFragmentShader(ctx); - if (!fp->translated) - r300TranslateFragmentShader(ctx, fp); + struct r300_fragment_program *fp = r300SelectAndTranslateFragmentShader(ctx); return !fp->error; } else { - struct r300_vertex_program *vp = r300SelectVertexShader(ctx); - if (!vp->translated) - r300TranslateVertexShader(vp); + struct r300_vertex_program *vp = r300SelectAndTranslateVertexShader(ctx); return !vp->error; } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 12fbf281d9..3060f49aaf 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -62,8 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_emit.h" #include "r300_tex.h" #include "r300_fragprog_common.h" -#include "r300_fragprog.h" -#include "r500_fragprog.h" #include "r300_render.h" #include "r300_vertprog.h" @@ -373,6 +371,7 @@ static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) p = (GLint) plane - (GLint) GL_CLIP_PLANE0; ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + R300_STATECHANGE( rmesa, vap_flush ); R300_STATECHANGE( rmesa, vpucp[p] ); rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; @@ -458,7 +457,7 @@ static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth; + return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth; } static void r300SetEarlyZState(GLcontext * ctx) @@ -473,6 +472,8 @@ static void r300SetEarlyZState(GLcontext * ctx) topZ = R300_ZTOP_DISABLE; else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill) topZ = R300_ZTOP_DISABLE; + else if (r300->radeon.query.current) + topZ = R300_ZTOP_DISABLE; if (topZ != r300->hw.zstencil_format.cmd[2]) { /* Note: This completely reemits the stencil format. @@ -589,7 +590,9 @@ static void r300SetDepthState(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, zs); - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE|R300_STENCIL_FRONT_BACK; + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= (R300_STENCIL_ENABLE | + R300_STENCIL_FRONT_BACK | + R500_STENCIL_REFMASK_FRONT_BACK); r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); if (ctx->Depth.Test) { @@ -603,11 +606,16 @@ static void r300SetDepthState(GLcontext * ctx) static void r300CatchStencilFallback(GLcontext *ctx) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); const unsigned back = ctx->Stencil._BackFace; - if (ctx->Stencil._Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] - || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back] - || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) { + if (rmesa->radeon.radeonScreen->kernel_mm && + (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)) { + r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE); + } else if (ctx->Stencil._Enabled && + (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] + || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back] + || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) { r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE); } else { r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE); @@ -914,11 +922,24 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (flag << R300_S_BACK_FUNC_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R500_STENCIL_REFMASK_FRONT_BACK; + R300_STATECHANGE(rmesa, zsb); + refmask = ((ctx->Stencil.Ref[back] & 0xff) << R300_STENCILREF_SHIFT) + | ((ctx->Stencil.ValueMask[back] & 0xff) << R300_STENCILMASK_SHIFT); + + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] &= + ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) | + (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT)); + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= refmask; + } } static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) { r300ContextPtr rmesa = R300_CONTEXT(ctx); + const unsigned back = ctx->Stencil._BackFace; r300CatchStencilFallback(ctx); @@ -930,6 +951,13 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) (ctx->Stencil. WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT; + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + R300_STATECHANGE(rmesa, zsb); + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= + (ctx->Stencil. + WriteMask[back] & R300_STENCILREF_MASK) << + R300_STENCILWRITEMASK_SHIFT; + } } static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, @@ -1046,53 +1074,6 @@ void r300UpdateViewportOffset(GLcontext * ctx) radeonUpdateScissor(ctx); } -static void -r300FetchStateParameter(GLcontext * ctx, - const gl_state_index state[STATE_LENGTH], - GLfloat * value) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - - switch (state[0]) { - case STATE_INTERNAL: - switch (state[1]) { - case STATE_R300_WINDOW_DIMENSION: { - __DRIdrawablePrivate * drawable = radeon_get_drawable(&r300->radeon); - value[0] = drawable->w * 0.5f; /* width*0.5 */ - value[1] = drawable->h * 0.5f; /* height*0.5 */ - value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ - value[3] = 1.0F; /* not used */ - break; - } - - case STATE_R300_TEXRECT_FACTOR:{ - struct gl_texture_object *t = - ctx->Texture.Unit[state[2]].CurrentTex[TEXTURE_RECT_INDEX]; - - if (t && t->Image[0][t->BaseLevel]) { - struct gl_texture_image *image = - t->Image[0][t->BaseLevel]; - value[0] = 1.0 / image->Width2; - value[1] = 1.0 / image->Height2; - } else { - value[0] = 1.0; - value[1] = 1.0; - } - value[2] = 1.0; - value[3] = 1.0; - break; - } - - default: - break; - } - break; - - default: - break; - } -} - /** * Update R300's own internal state parameters. * For now just STATE_R300_WINDOW_DIMENSION @@ -1101,7 +1082,6 @@ static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) { r300ContextPtr rmesa = R300_CONTEXT(ctx); struct gl_program_parameter_list *paramList; - GLuint i; if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) return; @@ -1109,21 +1089,12 @@ static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) if (!ctx->FragmentProgram._Current || !rmesa->selected_fp) return; - paramList = rmesa->selected_fp->Base->Parameters; + paramList = ctx->FragmentProgram._Current->Base.Parameters; if (!paramList) return; _mesa_load_state_parameters(ctx, paramList); - - for (i = 0; i < paramList->NumParameters; i++) { - if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { - r300FetchStateParameter(ctx, - paramList->Parameters[i]. - StateIndexes, - paramList->ParameterValues[i]); - } - } } /* ============================================================= @@ -1230,7 +1201,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program_code *code = &r300->selected_fp->code.r300; + struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300; R300_STATECHANGE(r300, fpt); @@ -1272,7 +1243,7 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r500_fragment_program_code *code = &r300->selected_fp->code.r500; + struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500; /* find all the texture instructions and relocate the texture units */ for (i = 0; i < code->inst_end + 1; i++) { @@ -1312,6 +1283,7 @@ static GLuint translate_lod_bias(GLfloat bias) return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK; } + static void r300SetupTextures(GLcontext * ctx) { int i, mtu; @@ -1334,7 +1306,7 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.txe.cmd[R300_TXE_ENABLE] = 0x0; mtu = r300->radeon.glCtx->Const.MaxTextureUnits; - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "mtu=%d\n", mtu); if (mtu > R300_MAX_TEXTURE_UNITS) { @@ -1359,7 +1331,7 @@ static void r300SetupTextures(GLcontext * ctx) t->pp_txformat & 0xff); } - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "Activating texture unit %d\n", i); @@ -1404,6 +1376,28 @@ static void r300SetupTextures(GLcontext * ctx) } } + /* R3xx and R4xx chips require that the texture unit corresponding to + * KIL instructions is really enabled. + * + * We do some fakery here and in the state atom emit logic to enable + * the texture without tripping up the CS checker in the kernel. + */ + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) { + last_hw_tmu++; + + r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; + + r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.size.cmd[R300_TEX_VALUE_0] = 0; /* 1x1 texture */ + r300->hw.tex.format.cmd[R300_TEX_VALUE_0] = 0; /* A8 format */ + r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0] = 0; + } + } + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1); r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = @@ -1421,19 +1415,9 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); - if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) { - // The KILL operation requires the first texture unit - // to be enabled. - r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; - r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; - r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = - cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1); - } - } r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings); - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu); } @@ -1460,11 +1444,11 @@ static void r300SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - InputsRead = r300->selected_fp->Base->InputsRead; + InputsRead = r300->selected_fp->InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1554,11 +1538,11 @@ static void r500SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - InputsRead = r300->selected_fp->Base->InputsRead; + InputsRead = r300->selected_fp->InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1703,7 +1687,7 @@ void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr(cap), state ? "GL_TRUE" : "GL_FALSE"); @@ -1780,7 +1764,7 @@ static void r300ResetHwState(r300ContextPtr r300) has_tcl = r300->options.hw_tcl_enabled; - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "%s\n", __FUNCTION__); radeon_firevertices(&r300->radeon); @@ -1997,9 +1981,7 @@ void r300UpdateShaders(r300ContextPtr rmesa) { struct r300_fragment_program *fp; - fp = r300SelectFragmentShader(ctx); - if (!fp->translated) - r300TranslateFragmentShader(ctx, fp); + fp = r300SelectAndTranslateFragmentShader(ctx); r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error); } @@ -2024,9 +2006,7 @@ void r300UpdateShaders(r300ContextPtr rmesa) } } - vp = r300SelectVertexShader(ctx); - if (!vp->translated) - r300TranslateVertexShader(vp); + vp = r300SelectAndTranslateVertexShader(ctx); r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error); } @@ -2035,24 +2015,61 @@ void r300UpdateShaders(r300ContextPtr rmesa) rmesa->radeon.NewGLState = 0; } -static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, - struct gl_program *program, struct prog_src_register srcreg) +static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, GLfloat * buffer) { static const GLfloat dummy[4] = { 0, 0, 0, 0 }; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct rc_constant * rcc = &rmesa->selected_fp->code.constants.Constants[index]; + + switch(rcc->Type) { + case RC_CONSTANT_EXTERNAL: + return ctx->FragmentProgram._Current->Base.Parameters->ParameterValues[rcc->u.External]; + case RC_CONSTANT_IMMEDIATE: + return rcc->u.Immediate; + case RC_CONSTANT_STATE: + switch(rcc->u.State[0]) { + case RC_STATE_SHADOW_AMBIENT: { + const int unit = (int) rcc->u.State[1]; + const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current; + if (texObj) { + buffer[0] = + buffer[1] = + buffer[2] = + buffer[3] = texObj->CompareFailValue; + } + return buffer; + } - switch(srcreg.File) { - case PROGRAM_LOCAL_PARAM: - return program->LocalParams[srcreg.Index]; - case PROGRAM_ENV_PARAM: - return ctx->FragmentProgram.Parameters[srcreg.Index]; - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - return program->Parameters->ParameterValues[srcreg.Index]; - default: - _mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n"); - return dummy; + case RC_STATE_R300_WINDOW_DIMENSION: { + __DRIdrawablePrivate * drawable = radeon_get_drawable(&rmesa->radeon); + buffer[0] = drawable->w * 0.5f; /* width*0.5 */ + buffer[1] = drawable->h * 0.5f; /* height*0.5 */ + buffer[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ + buffer[3] = 1.0F; /* not used */ + return buffer; + } + + case RC_STATE_R300_TEXRECT_FACTOR: { + struct gl_texture_object *t = + ctx->Texture.Unit[rcc->u.State[1]].CurrentTex[TEXTURE_RECT_INDEX]; + + if (t && t->Image[0][t->BaseLevel]) { + struct gl_texture_image *image = + t->Image[0][t->BaseLevel]; + buffer[0] = 1.0 / image->Width2; + buffer[1] = 1.0 / image->Height2; + } else { + buffer[0] = 1.0; + buffer[1] = 1.0; + } + buffer[2] = 1.0; + buffer[3] = 1.0; + return buffer; + } + } } + + return dummy; } @@ -2061,9 +2078,9 @@ static void r300SetupPixelShader(GLcontext *ctx) r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_fragment_program *fp = rmesa->selected_fp; struct r300_fragment_program_code *code; - int i, k; + int i; - code = &fp->code.r300; + code = &fp->code.code.r300; R300_STATECHANGE(rmesa, fpi[0]); R300_STATECHANGE(rmesa, fpi[1]); @@ -2074,38 +2091,24 @@ static void r300SetupPixelShader(GLcontext *ctx) rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length); rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length); for (i = 0; i < code->alu.length; i++) { - rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0; - rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1; - rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2; - rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3; + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_inst; + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_addr; + rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_inst; + rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_addr; } R300_STATECHANGE(rmesa, fp); - rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3); - rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx; - rmesa->hw.fp.cmd[R300_FP_CNTL2] = - (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | - ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | - (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | - ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); - /* I just want to say, the way these nodes are stored.. weird.. */ - for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) { - if (i < (code->cur_node + 1)) { - rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = - (code->node[i].alu_offset << R300_ALU_START_SHIFT) | - (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) | - (code->node[i].tex_offset << R300_TEX_START_SHIFT) | - (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) | - code->node[i].flags; - } else { - rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; - } - } + rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->config; + rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->pixsize; + rmesa->hw.fp.cmd[R300_FP_CNTL2] = code->code_offset; + for (i = 0; i < 4; i++) + rmesa->hw.fp.cmd[R300_FP_NODE0 + i] = code->code_addr[i]; R300_STATECHANGE(rmesa, fpp); - rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4); - for (i = 0; i < code->const_nr; i++) { - const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]); + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4); + for (i = 0; i < fp->code.constants.Count; i++) { + GLfloat buffer[4]; + const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); @@ -2137,19 +2140,19 @@ static void r500SetupPixelShader(GLcontext *ctx) ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; - code = &fp->code.r500; + code = &fp->code.code.r500; R300_STATECHANGE(rmesa, fp); rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] = - R500_US_CODE_START_ADDR(code->inst_offset) | + R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end); rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] = - R500_US_CODE_RANGE_ADDR(code->inst_offset) | + R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end); rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] = - R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */ + R500_US_CODE_OFFSET_ADDR(0); R300_STATECHANGE(rmesa, r500fp); /* Emit our shader... */ @@ -2165,14 +2168,15 @@ static void r500SetupPixelShader(GLcontext *ctx) bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6); R300_STATECHANGE(rmesa, r500fp_const); - for (i = 0; i < code->const_nr; i++) { - const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]); + for (i = 0; i < fp->code.constants.Count; i++) { + GLfloat buffer[4]; + const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]); } - bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->code.constants.Count * 4); } void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten) @@ -2273,6 +2277,15 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) _mesa_update_draw_buffer_bounds(ctx); R300_STATECHANGE(r300, cb); + R300_STATECHANGE(r300, zb); + } + + if (new_state & (_NEW_LIGHT)) { + R300_STATECHANGE(r300, shade2); + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) + r300->hw.shade2.cmd[1] |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; + else + r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; } r300->radeon.NewGLState |= new_state; @@ -2345,13 +2358,9 @@ void r300InitShaderFunctions(r300ContextPtr r300) r300->vtbl.SetupRSUnit = r500SetupRSUnit; r300->vtbl.SetupPixelShader = r500SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; - r300->vtbl.BuildFragmentProgramHwCode = r500BuildFragmentProgramHwCode; - r300->vtbl.FragmentProgramDump = r500FragmentProgramDump; } else { r300->vtbl.SetupRSUnit = r300SetupRSUnit; r300->vtbl.SetupPixelShader = r300SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; - r300->vtbl.BuildFragmentProgramHwCode = r300BuildFragmentProgramHwCode; - r300->vtbl.FragmentProgramDump = r300FragmentProgramDump; } } diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 56ed519cf4..ee2c71e1a7 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -39,12 +39,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_emit.h" #include "r300_tex.h" #include "r300_render.h" +#include "main/simple_list.h" #define EMIT_ATTR( ATTR, STYLE ) \ do { \ - rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ - rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ - rmesa->radeon.swtcl.vertex_attr_count++; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ } while (0) #define EMIT_PAD( N ) \ @@ -76,12 +77,16 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ GLuint InputsRead = 0; GLuint OutputsWritten = 0; int num_attrs = 0; - GLuint fp_reads = rmesa->selected_fp->Base->InputsRead; + GLuint fp_reads = rmesa->selected_fp->InputsRead; struct vertex_attribute *attrs = rmesa->vbuf.attribs; + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; rmesa->radeon.swtcl.vertex_attr_count = 0; + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); + /* We always want non Ndc coords format */ VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; @@ -225,6 +230,7 @@ static void r300PrepareVertices(GLcontext *ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint InputsRead, OutputsWritten; + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten); r300SetupVAP(ctx, InputsRead, OutputsWritten); @@ -238,6 +244,33 @@ static void r300PrepareVertices(GLcontext *ctx) rmesa->radeon.swtcl.vertex_size /= 4; } +static void r300_predict_emit_size( r300ContextPtr rmesa ) +{ + if (!rmesa->radeon.swtcl.emit_prediction) { + const int vertex_size = 7; + const int prim_size = 3; + const int cache_flush_size = 4; + const int pre_emit_state = 4; + const int scissor_size = 3; + const int state_size = radeonCountStateEmitSize(&rmesa->radeon); + + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, + state_size + pre_emit_state + scissor_size + + vertex_size + prim_size + cache_flush_size * 2, + __FUNCTION__)) + rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon); + else + rmesa->radeon.swtcl.emit_prediction = state_size; + + rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw + + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state; + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, + "%s, size %d\n", + __func__, rmesa->radeon.cmdbuf.cs->cdw + + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state); + } +} + static GLuint reduced_prim[] = { GL_POINTS, @@ -271,11 +304,21 @@ static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); #define HAVE_POLYGONS 1 #define HAVE_ELTS 1 +static void* r300_alloc_verts(r300ContextPtr rmesa, GLuint n, GLuint size) +{ + void *rv; + do { + r300_predict_emit_size( rmesa ); + rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ); + } while (!rv); + return rv; +} + #undef LOCAL_VARS #undef ALLOC_VERTS #define CTX_ARG r300ContextPtr rmesa #define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size -#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ) +#define ALLOC_VERTS( n, size ) r300_alloc_verts(rmesa, n, size); #define LOCAL_VARS \ r300ContextPtr rmesa = R300_CONTEXT(ctx); \ const char *r300verts = (char *)rmesa->radeon.swtcl.verts; @@ -461,6 +504,7 @@ static void r300ChooseRenderState( GLcontext *ctx ) r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint index = 0; GLuint flags = ctx->_TriangleCaps; + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; @@ -485,9 +529,9 @@ static void r300ChooseRenderState( GLcontext *ctx ) } } - void r300RenderStart(GLcontext *ctx) { + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); r300ContextPtr rmesa = R300_CONTEXT( ctx ); r300ChooseRenderState(ctx); @@ -500,7 +544,6 @@ void r300RenderStart(GLcontext *ctx) r300UpdateShaderStates(rmesa); - r300EmitCacheFlush(rmesa); /* investigate if we can put back flush optimisation if needed */ if (rmesa->radeon.dma.flush != NULL) { @@ -515,6 +558,7 @@ void r300RenderFinish(GLcontext *ctx) static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r300ContextPtr rmesa = R300_CONTEXT(ctx); + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); if (rmesa->radeon.swtcl.hw_primitive != hwprim) { R300_NEWPRIM( rmesa ); @@ -527,6 +571,7 @@ void r300RenderPrimitive(GLcontext *ctx, GLenum prim) r300ContextPtr rmesa = R300_CONTEXT(ctx); rmesa->radeon.swtcl.render_primitive = prim; + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) return; @@ -536,6 +581,8 @@ void r300RenderPrimitive(GLcontext *ctx, GLenum prim) void r300ResetLineStipple(GLcontext *ctx) { + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); } void r300InitSwtcl(GLcontext *ctx) @@ -543,11 +590,13 @@ void r300InitSwtcl(GLcontext *ctx) TNLcontext *tnl = TNL_CONTEXT(ctx); r300ContextPtr rmesa = R300_CONTEXT(ctx); static int firsttime = 1; + radeon_print(RADEON_SWRENDER, RADEON_NORMAL, "%s\n", __func__); if (firsttime) { init_rast_tab(); firsttime = 0; } + rmesa->radeon.swtcl.emit_prediction = 0; tnl->Driver.Render.Start = r300RenderStart; tnl->Driver.Render.Finish = r300RenderFinish; @@ -580,8 +629,8 @@ static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct r { BATCH_LOCALS(&rmesa->radeon); - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", + radeon_print(RADEON_SWRENDER, RADEON_TRACE, + "%s: vertex_size %d, offset 0x%x \n", __FUNCTION__, vertex_size, offset); BEGIN_BATCH(7); @@ -596,6 +645,8 @@ static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vert { BATCH_LOCALS(&rmesa->radeon); int type, num_verts; + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); type = r300PrimitiveType(rmesa, primitive); num_verts = r300NumVerts(rmesa, vertex_nr, primitive); @@ -608,21 +659,26 @@ static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vert void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) { + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); r300ContextPtr rmesa = R300_CONTEXT(ctx); - rcommonEnsureCmdBufSpace(&rmesa->radeon, - rmesa->radeon.hw.max_state_size + (12*sizeof(int)), - __FUNCTION__); + r300EmitCacheFlush(rmesa); + radeonEmitState(&rmesa->radeon); r300_emit_scissor(ctx); r300EmitVertexAOS(rmesa, rmesa->radeon.swtcl.vertex_size, - rmesa->radeon.dma.current, + first_elem(&rmesa->radeon.dma.reserved)->bo, current_offset); r300EmitVbufPrim(rmesa, rmesa->radeon.swtcl.hw_primitive, rmesa->radeon.swtcl.numverts); r300EmitCacheFlush(rmesa); + if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", + rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); + rmesa->radeon.swtcl.emit_prediction = 0; COMMIT_BATCH(); } diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 0af5bb4f46..433e5a87d4 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -137,7 +137,7 @@ static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa | R300_TX_MIN_FILTER_ANISO | R300_TX_MIN_FILTER_MIP_LINEAR | aniso_filter(anisotropy); - if (RADEON_DEBUG & DEBUG_TEXTURE) + if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy); return; } @@ -197,7 +197,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, { radeonTexObj* t = radeon_tex_obj(texObj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { fprintf(stderr, "%s( %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr(pname)); } @@ -260,7 +260,7 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) r300ContextPtr rmesa = R300_CONTEXT(ctx); radeonTexObj* t = radeon_tex_obj(texObj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, _mesa_lookup_enum_by_nr(texObj->Target)); @@ -302,7 +302,7 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, t, _mesa_lookup_enum_by_nr(target)); } diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 6f489ace7b..f030451b28 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/teximage.h" #include "main/texobj.h" #include "main/enums.h" +#include "main/simple_list.h" #include "r300_context.h" #include "r300_state.h" @@ -323,7 +324,7 @@ GLboolean r300ValidateBuffers(GLcontext * ctx) RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } - ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) return GL_FALSE; return GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index de32013032..2f7b67c143 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -40,927 +40,125 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/prog_statevars.h" #include "tnl/tnl.h" -#include "radeon_nqssadce.h" +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_nqssadce.h" #include "r300_context.h" +#include "r300_fragprog_common.h" #include "r300_state.h" -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ - t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ - (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ - t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ - -/* - * Take an already-setup and valid source then swizzle it appropriately to - * obtain a constant ZERO or ONE source. +/** + * Write parameter array for the given vertex program into dst. + * Return the total number of components written. */ -#define __CONST(x, y) \ - (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_src_class(src[x].File), \ - VSF_FLAG_NONE) | (src[x].RelAddr << 4)) - -#define FREE_TEMPS() \ - do { \ - int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ - if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ - vp->error = GL_TRUE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) - -static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst) +static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst) { - int pi; - float *dst_o = dst; - struct gl_program_parameter_list *paramList; + int i; - if (vp->IsNVProgram) { + if (vp->Base->IsNVProgram) { _mesa_load_tracked_matrices(ctx); - - for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { - *dst++ = ctx->VertexProgram.Parameters[pi][0]; - *dst++ = ctx->VertexProgram.Parameters[pi][1]; - *dst++ = ctx->VertexProgram.Parameters[pi][2]; - *dst++ = ctx->VertexProgram.Parameters[pi][3]; + } else { + if (vp->Base->Base.Parameters) { + _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters); } - return dst - dst_o; } - if (!vp->Base.Parameters) - return 0; - - _mesa_load_state_parameters(ctx, vp->Base.Parameters); - - if (vp->Base.Parameters->NumParameters * 4 > - VSF_MAX_FRAGMENT_LENGTH) { + if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) { + /* Should have checked this earlier... */ fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); _mesa_exit(-1); } - paramList = vp->Base.Parameters; - for (pi = 0; pi < paramList->NumParameters; pi++) { - switch (paramList->Parameters[pi].Type) { - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); - case PROGRAM_CONSTANT: - *dst++ = paramList->ParameterValues[pi][0]; - *dst++ = paramList->ParameterValues[pi][1]; - *dst++ = paramList->ParameterValues[pi][2]; - *dst++ = paramList->ParameterValues[pi][3]; - break; - default: - _mesa_problem(NULL, "Bad param type in %s", - __FUNCTION__); - } - - } - - return dst - dst_o; -} - -static unsigned long t_dst_mask(GLuint mask) -{ - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & VSF_FLAG_ALL; -} - -static unsigned long t_dst_class(gl_register_file file) -{ - - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_DST_REG_TEMPORARY; - case PROGRAM_OUTPUT: - return PVS_DST_REG_OUT; - case PROGRAM_ADDRESS: - return PVS_DST_REG_A0; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; - } -} + for(i = 0; i < vp->code.constants.Count; ++i) { + const float * src = 0; + const struct rc_constant * constant = &vp->code.constants.Constants[i]; -static unsigned long t_dst_index(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT) - return vp->outputs[dst->Index]; + switch(constant->Type) { + case RC_CONSTANT_EXTERNAL: + if (vp->Base->IsNVProgram) { + src = ctx->VertexProgram.Parameters[constant->u.External]; + } else { + src = vp->Base->Base.Parameters->ParameterValues[constant->u.External]; + } + break; - return dst->Index; -} + case RC_CONSTANT_IMMEDIATE: + src = constant->u.Immediate; + break; + } -static unsigned long t_src_class(gl_register_file file) -{ - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_SRC_REG_TEMPORARY; - case PROGRAM_INPUT: - return PVS_SRC_REG_INPUT; - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - case PROGRAM_STATE_VAR: - return PVS_SRC_REG_CONSTANT; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; + dst[4*i] = src[0]; + dst[4*i + 1] = src[1]; + dst[4*i + 2] = src[2]; + dst[4*i + 3] = src[3]; } -} -static INLINE unsigned long t_swizzle(GLubyte swizzle) -{ -/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ - return swizzle; + return 4 * vp->code.constants.Count; } -#if 0 -static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) +static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads) { + GLbitfield outputs = 0; int i; - if (vp == NULL) { - fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, - caller); - return; - } - - fprintf(stderr, "%s:<", caller); - for (i = 0; i < VERT_ATTRIB_MAX; i++) - fprintf(stderr, "%d ", vp->inputs[i]); - fprintf(stderr, ">\n"); - -} -#endif - -static unsigned long t_src_index(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - if (src->File == PROGRAM_INPUT) { - assert(vp->inputs[src->Index] != -1); - return vp->inputs[src->Index]; - } else { - if (src->Index < 0) { - fprintf(stderr, - "negative offsets for indirect addressing do not work.\n"); - return 0; - } - return src->Index; - } -} - -/* these two functions should probably be merged... */ - -static unsigned long t_src(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 1)), - t_swizzle(GET_SWZ(src->Swizzle, 2)), - t_swizzle(GET_SWZ(src->Swizzle, 3)), - t_src_class(src->File), - src->Negate) | (src->RelAddr << 4); -} +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if (fpreads & (1 << (fp_attr))) \ + outputs |= (1 << (vp_result)); \ + } while (0) -static unsigned long t_src_scalar(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_src_class(src->File), - src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src->RelAddr << 4); -} + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); -static GLboolean valid_dst(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); + for (i = 0; i <= 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); } - return GL_TRUE; -} - -static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = 0; - - return inst; -} - -static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = - PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - PVS_SRC_SELECT_FORCE_1, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(*u_temp_i, - PVS_SRC_SELECT_X, - PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, - PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, - /* Not 100% sure about this */ - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE - /*VSF_FLAG_ALL */ ); - inst[3] = __CONST(0, SWIZZLE_ZERO); - (*u_temp_i)--; - - return inst; -} - -static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - - inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - return inst; -} - -static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - GL_FALSE, - GL_TRUE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = t_src(vp, &src[2]); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = t_src_scalar(vp, &src[1]); - - return inst; -} - -static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} +#undef ADD_OUTPUT -static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} + if ((fpreads & (1 << FRAG_ATTRIB_COL0)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0))) + outputs |= 1 << VERT_RESULT_BFC0; + if ((fpreads & (1 << FRAG_ATTRIB_COL1)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1))) + outputs |= 1 << VERT_RESULT_BFC1; -static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} + outputs |= 1 << VERT_RESULT_HPOS; + if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + outputs |= 1 << VERT_RESULT_PSIZ; -static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; + return outputs; } -static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - -#if 0 - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = 0; -#else - inst[0] = - PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ONE); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); -#endif - - return inst; -} -static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - */ - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = - PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE); - - (*u_temp_i)--; - - return inst; -} - -static void t_inputs_outputs(struct r300_vertex_program *vp) +static void t_inputs_outputs(struct r300_vertex_program_compiler * c) { int i; int cur_reg; GLuint OutputsWritten, InputsRead; - OutputsWritten = vp->Base->Base.OutputsWritten; - InputsRead = vp->Base->Base.InputsRead; + OutputsWritten = c->Base.Program.OutputsWritten; + InputsRead = c->Base.Program.InputsRead; cur_reg = -1; for (i = 0; i < VERT_ATTRIB_MAX; i++) { if (InputsRead & (1 << i)) - vp->inputs[i] = ++cur_reg; + c->code->inputs[i] = ++cur_reg; else - vp->inputs[i] = -1; + c->code->inputs[i] = -1; } cur_reg = 0; for (i = 0; i < VERT_RESULT_MAX; i++) - vp->outputs[i] = -1; + c->code->outputs[i] = -1; assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { - vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + c->code->outputs[VERT_RESULT_HPOS] = cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { - vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++; } /* If we're writing back facing colors we need to send @@ -970,668 +168,130 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) * get written into appropriate output vectors. */ if (OutputsWritten & (1 << VERT_RESULT_COL0)) { - vp->outputs[VERT_RESULT_COL0] = cur_reg++; + c->code->outputs[VERT_RESULT_COL0] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_COL1)) { - vp->outputs[VERT_RESULT_COL1] = cur_reg++; + c->code->outputs[VERT_RESULT_COL1] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + c->code->outputs[VERT_RESULT_BFC0] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + c->code->outputs[VERT_RESULT_BFC1] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { cur_reg++; } for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { if (OutputsWritten & (1 << i)) { - vp->outputs[i] = cur_reg++; + c->code->outputs[i] = cur_reg++; } } if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { - vp->outputs[VERT_RESULT_FOGC] = cur_reg++; - } -} - -void r300TranslateVertexShader(struct r300_vertex_program *vp) -{ - struct prog_instruction *vpi = vp->Base->Base.Instructions; - int i; - GLuint *inst; - unsigned long num_operands; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; - - vp->pos_end = 0; /* Not supported yet */ - vp->hw_code.length = 0; - vp->translated = GL_TRUE; - vp->error = GL_FALSE; - - t_inputs_outputs(vp); - - for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END; - vpi++, inst += 4) { - - FREE_TEMPS(); - - if (!valid_dst(vp, &vpi->DstReg)) { - /* redirect result to unused temp */ - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = u_temp_i; - } - - num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - - /* copy the sources (src) from mesa into a local variable... is this needed? */ - for (i = 0; i < num_operands; i++) { - src[i] = vpi->SrcReg[i]; - } - - if (num_operands == 3) { /* TODO: scalars */ - if (CMP_SRCS(src[1], src[2]) - || CMP_SRCS(src[0], src[2])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[2]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[2].File), - VSF_FLAG_NONE) | (src[2]. - RelAddr << - 4); - inst[2] = __CONST(2, SWIZZLE_ZERO); - inst[3] = __CONST(2, SWIZZLE_ZERO); - inst += 4; - - src[2].File = PROGRAM_TEMPORARY; - src[2].Index = u_temp_i; - src[2].RelAddr = 0; - u_temp_i--; - } - } - - if (num_operands >= 2) { - if (CMP_SRCS(src[1], src[0])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[0].File), - VSF_FLAG_NONE) | (src[0]. - RelAddr << - 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - src[0].File = PROGRAM_TEMPORARY; - src[0].Index = u_temp_i; - src[0].RelAddr = 0; - u_temp_i--; - } - } - - switch (vpi->Opcode) { - case OPCODE_ABS: - inst = r300TranslateOpcodeABS(vp, vpi, inst, src); - break; - case OPCODE_ADD: - inst = r300TranslateOpcodeADD(vp, vpi, inst, src); - break; - case OPCODE_ARL: - inst = r300TranslateOpcodeARL(vp, vpi, inst, src); - break; - case OPCODE_DP3: - inst = r300TranslateOpcodeDP3(vp, vpi, inst, src); - break; - case OPCODE_DP4: - inst = r300TranslateOpcodeDP4(vp, vpi, inst, src); - break; - case OPCODE_DPH: - inst = r300TranslateOpcodeDPH(vp, vpi, inst, src); - break; - case OPCODE_DST: - inst = r300TranslateOpcodeDST(vp, vpi, inst, src); - break; - case OPCODE_EX2: - inst = r300TranslateOpcodeEX2(vp, vpi, inst, src); - break; - case OPCODE_EXP: - inst = r300TranslateOpcodeEXP(vp, vpi, inst, src); - break; - case OPCODE_FLR: - inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - case OPCODE_FRC: - inst = r300TranslateOpcodeFRC(vp, vpi, inst, src); - break; - case OPCODE_LG2: - inst = r300TranslateOpcodeLG2(vp, vpi, inst, src); - break; - case OPCODE_LIT: - inst = r300TranslateOpcodeLIT(vp, vpi, inst, src); - break; - case OPCODE_LOG: - inst = r300TranslateOpcodeLOG(vp, vpi, inst, src); - break; - case OPCODE_MAD: - inst = r300TranslateOpcodeMAD(vp, vpi, inst, src); - break; - case OPCODE_MAX: - inst = r300TranslateOpcodeMAX(vp, vpi, inst, src); - break; - case OPCODE_MIN: - inst = r300TranslateOpcodeMIN(vp, vpi, inst, src); - break; - case OPCODE_MOV: - inst = r300TranslateOpcodeMOV(vp, vpi, inst, src); - break; - case OPCODE_MUL: - inst = r300TranslateOpcodeMUL(vp, vpi, inst, src); - break; - case OPCODE_POW: - inst = r300TranslateOpcodePOW(vp, vpi, inst, src); - break; - case OPCODE_RCP: - inst = r300TranslateOpcodeRCP(vp, vpi, inst, src); - break; - case OPCODE_RSQ: - inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src); - break; - case OPCODE_SGE: - inst = r300TranslateOpcodeSGE(vp, vpi, inst, src); - break; - case OPCODE_SLT: - inst = r300TranslateOpcodeSLT(vp, vpi, inst, src); - break; - case OPCODE_SUB: - inst = r300TranslateOpcodeSUB(vp, vpi, inst, src); - break; - case OPCODE_SWZ: - inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src); - break; - case OPCODE_XPD: - inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - default: - vp->error = GL_TRUE; - break; - } - } - - vp->hw_code.length = (inst - vp->hw_code.body.d); - if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) { - vp->error = GL_TRUE; + c->code->outputs[VERT_RESULT_FOGC] = cur_reg++; } } -static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) -{ - struct prog_instruction *vpi; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); - - vpi = &prog->Instructions[prog->NumInstructions - 3]; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_HPOS; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_END; -} - -static void pos_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - GLuint tempregi = prog->NumTemporaries; - - prog->NumTemporaries++; - - for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = tempregi; - } - } - - insert_wpos(prog, tempregi, tex_id); - - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - /** - * The fogcoord attribute is special in that only the first component - * is relevant, and the remaining components are always fixed (when read - * from by the fragment program) to yield an X001 pattern. + * The NV_vertex_program spec mandates that all registers be + * initialized to zero. We do this here unconditionally. * - * We need to enforce this either in the vertex program or in the fragment - * program, and this code chooses not to enforce it in the vertex program. - * This is slightly cheaper, as long as the fragment program does not use - * weird swizzles. - * - * And it seems that usually, weird swizzles are not used, so... - * - * See also the counterpart rewriting for fragment programs. + * \note We rely on dead-code elimination in the compiler. */ -static void fog_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - - vpi = prog->Instructions; - while (vpi->Opcode != OPCODE_END) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_X; - } - - ++vpi; - } - - prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - -static int translateABS(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_MAX; - inst->SrcReg[1] = inst->SrcReg[0]; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateDP3(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - return 0; -} - -static int translateDPH(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); - - return 0; -} - -static int translateFLR(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - struct prog_dst_register dst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - dst = inst->DstReg; - - inst->Opcode = OPCODE_FRC; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - ++inst; - - inst->Opcode = OPCODE_ADD; - inst->DstReg = dst; - inst->SrcReg[0] = (inst-1)->SrcReg[0]; - inst->SrcReg[1].File = PROGRAM_TEMPORARY; - inst->SrcReg[1].Index = tmp_idx; - inst->SrcReg[1].Negate = NEGATE_XYZW; - - return 1; -} - -static int translateSUB(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_ADD; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateSWZ(struct gl_program *prog, int pos) -{ - prog->Instructions[pos].Opcode = OPCODE_MOV; - - return 0; -} - -static int translateXPD(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - - *(inst+1) = *inst; - - inst->Opcode = OPCODE_MUL; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - ++inst; - - inst->Opcode = OPCODE_MAD; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - inst->SrcReg[2].File = PROGRAM_TEMPORARY; - inst->SrcReg[2].Index = tmp_idx; - - return 1; -} - -static void translateInsts(struct gl_program *prog) +static void initialize_NV_registers(struct radeon_compiler * compiler) { - struct prog_instruction *inst; - int i; + unsigned int reg; + struct rc_instruction * inst; - for (i = 0; i < prog->NumInstructions; ++i) { - inst = &prog->Instructions[i]; - - switch (inst->Opcode) { - case OPCODE_ABS: - i += translateABS(prog, i); - break; - case OPCODE_DP3: - i += translateDP3(prog, i); - break; - case OPCODE_DPH: - i += translateDPH(prog, i); - break; - case OPCODE_FLR: - i += translateFLR(prog, i); - break; - case OPCODE_SUB: - i += translateSUB(prog, i); - break; - case OPCODE_SWZ: - i += translateSWZ(prog, i); - break; - case OPCODE_XPD: - i += translateXPD(prog, i); - break; - default: - break; - } + for(reg = 0; reg < 12; ++reg) { + inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = reg; + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_0000; } -} - -#define ADD_OUTPUT(fp_attr, vp_result) \ - do { \ - if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \ - OutputsAdded |= 1 << (vp_result); \ - count++; \ - } \ - } while (0) - -static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - GLuint OutputsAdded, FpReads; - int i, count; - - OutputsAdded = 0; - count = 0; - FpReads = r300->selected_fp->Base->InputsRead; - - ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); - ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); - - for (i = 0; i < 7; ++i) { - ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); - } - - /* Some outputs may be artificially added, to match the inputs of the fragment program. - * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by - * vertex program are undefined, so just use MOV [vertex_result], CONST[0] - */ - if (count > 0) { - struct prog_instruction *inst; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, count); - inst = &prog->Instructions[prog->NumInstructions - 1 - count]; - for (i = 0; i < VERT_RESULT_MAX; ++i) { - if (OutputsAdded & (1 << i)) { - inst->Opcode = OPCODE_MOV; - - inst->DstReg.File = PROGRAM_OUTPUT; - inst->DstReg.Index = i; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->DstReg.CondMask = COND_TR; - - inst->SrcReg[0].File = PROGRAM_CONSTANT; - inst->SrcReg[0].Index = 0; - inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++inst; - } - } - - prog->OutputsWritten |= OutputsAdded; - } -} - -#undef ADD_OUTPUT - -static void nqssadceInit(struct nqssadce_state* s) -{ - r300ContextPtr r300 = R300_CONTEXT(s->Ctx); - GLuint fp_reads; - - fp_reads = r300->selected_fp->Base->InputsRead; - { - if (fp_reads & FRAG_BIT_COL0) { - s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; - } - - if (fp_reads & FRAG_BIT_COL1) { - s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; - } - } - - { - int i; - for (i = 0; i < 8; ++i) { - if (fp_reads & FRAG_BIT_TEX(i)) { - s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; - } - } - } - - s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; - if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) - s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; -} - -static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) -{ - (void) opcode; - (void) reg; - - return GL_TRUE; + inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); + inst->I.Opcode = OPCODE_ARL; + inst->I.DstReg.File = PROGRAM_ADDRESS; + inst->I.DstReg.Index = 0; + inst->I.DstReg.WriteMask = WRITEMASK_X; + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_0000; } static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program_key *wanted_key, const struct gl_vertex_program *mesa_vp) { - r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program *vp; - struct gl_program *prog; + struct r300_vertex_program_compiler compiler; vp = _mesa_calloc(sizeof(*vp)); vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - prog = &vp->Base->Base; + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE; - if (RADEON_DEBUG & DEBUG_VERTS) { + compiler.code = &vp->code; + compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads); + compiler.SetHwInputOutput = &t_inputs_outputs; + + if (compiler.Base.Debug) { fprintf(stderr, "Initial vertex program:\n"); - _mesa_print_program(prog); - fflush(stdout); + _mesa_print_program(&vp->Base->Base); + fflush(stderr); } - if (vp->Base->IsPositionInvariant) { + if (mesa_vp->IsPositionInvariant) { _mesa_insert_mvp_code(ctx, vp->Base); } - if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { - pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0); - } + rc_mesa_to_rc_program(&compiler.Base, &vp->Base->Base); - if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { - fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0); - } + if (mesa_vp->IsNVProgram) + initialize_NV_registers(&compiler.Base); - addArtificialOutputs(ctx, prog); + rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X); - translateInsts(prog); - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after native rewrite:\n"); - _mesa_print_program(prog); - fflush(stdout); + if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) { + rc_copy_output(&compiler.Base, + VERT_RESULT_HPOS, + vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0); } - { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadceInit, - .IsNativeSwizzle = &swizzleIsNative, - .BuildSwizzle = NULL - }; - radeonNqssaDce(ctx, prog, &nqssadce); - - /* We need this step for reusing temporary registers */ - _mesa_optimize_program(ctx, prog); - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after NQSSADCE:\n"); - _mesa_print_program(prog); - fflush(stdout); - } + if (vp->key.FogAttr != FRAG_ATTRIB_MAX) { + rc_move_output(&compiler.Base, + VERT_RESULT_FOGC, + vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X); } - assert(prog->NumInstructions); - { - struct prog_instruction *inst; - int max, i, tmp; - - inst = prog->Instructions; - max = -1; - while (inst->Opcode != OPCODE_END) { - tmp = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < tmp; ++i) { - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { - if ((int) inst->SrcReg[i].Index > max) { - max = inst->SrcReg[i].Index; - } - } - } + r3xx_compile_vertex_program(&compiler); + vp->error = compiler.Base.Error; - if (_mesa_num_inst_dst_regs(inst->Opcode)) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - if ((int) inst->DstReg.Index > max) { - max = inst->DstReg.Index; - } - } - } - ++inst; - } + vp->Base->Base.InputsRead = vp->code.InputsRead; + vp->Base->Base.OutputsWritten = vp->code.OutputsWritten; - /* We actually want highest index of used temporary register, - * not the number of temporaries used. - * These values aren't always the same. - */ - vp->num_temporaries = max + 1; - } + rc_destroy(&compiler.Base); return vp; } -struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) +struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program_key wanted_key = { 0 }; @@ -1639,7 +299,21 @@ struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) struct r300_vertex_program *vp; vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; - wanted_key.FpReads = r300->selected_fp->Base->InputsRead; + + if (!r300->selected_fp) { + /* This can happen when GetProgramiv is called to check + * whether the program runs natively. + * + * To be honest, this is not a very good solution, + * but solving the problem of reporting good values + * for those queries is tough anyway considering that + * we recompile vertex programs based on the precise + * fragment program that is in use. + */ + r300SelectAndTranslateFragmentShader(ctx); + } + + wanted_key.FpReads = r300->selected_fp->InputsRead; wanted_key.FogAttr = r300->selected_fp->fog_attr; wanted_key.WPosAttr = r300->selected_fp->wpos_attr; @@ -1664,12 +338,14 @@ struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ } while(0) -static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code) +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code) { int i; assert((code->length > 0) && (code->length % 4 == 0)); + R300_STATECHANGE( r300, vap_flush ); + switch ((dest >> 8) & 0xf) { case 0: R300_STATECHANGE(r300, vpi); @@ -1707,16 +383,17 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + R300_STATECHANGE(rmesa, vap_flush); R300_STATECHANGE(rmesa, vpp); - param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code)); - inst_count = (prog->hw_code.length / 4) - 1; + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code)); + inst_count = (prog->code.length / 4) - 1; - r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead), - _mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries); + r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead), + _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries); R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index 2dab11c337..ccec896be4 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -3,39 +3,9 @@ #include "r300_reg.h" -#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ - (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ - | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ - | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ - | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ - | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ - | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) - -#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ - (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ - | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ - | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ - | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ - | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ - | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ - | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) - -#if 1 - -#define VSF_FLAG_X 1 -#define VSF_FLAG_Y 2 -#define VSF_FLAG_Z 4 -#define VSF_FLAG_W 8 -#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z) -#define VSF_FLAG_ALL 0xf -#define VSF_FLAG_NONE 0 - -#endif void r300SetupVertexProgram(r300ContextPtr rmesa); -struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx); - -void r300TranslateVertexShader(struct r300_vertex_program *vp); +struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx); #endif diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.c b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c new file mode 120000 index 0000000000..f6a5f66470 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.h b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h new file mode 120000 index 0000000000..2f134fd17b --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h index 250570f6b8..da4812d323 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.h +++ b/src/mesa/drivers/dri/r300/radeon_context.h @@ -51,26 +51,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_screen.h" -#if R200_MERGED -extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode); - -#define FALLBACK( radeon, bit, mode ) do { \ - if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ - __FUNCTION__, bit, mode ); \ - radeonFallback( (radeon)->glCtx, bit, mode ); \ -} while (0) -#else #define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__); -#endif /* TCL fallbacks */ extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode); -#if R200_MERGED -#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode ) -#else #define TCL_FALLBACK( ctx, bit, mode ) ; -#endif #endif /* __RADEON_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r300/radeon_debug.c b/src/mesa/drivers/dri/r300/radeon_debug.c new file mode 120000 index 0000000000..c98c2e074c --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_debug.c @@ -0,0 +1 @@ +../radeon/radeon_debug.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_debug.h b/src/mesa/drivers/dri/r300/radeon_debug.h new file mode 120000 index 0000000000..bd8aa28e89 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_debug.h @@ -0,0 +1 @@ +../radeon/radeon_debug.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c deleted file mode 100644 index da5e7aefce..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program.h" - -#include "shader/prog_print.h" - - -/** - * Transform the given clause in the following way: - * 1. Replace it with an empty clause - * 2. For every instruction in the original clause, try the given - * transformations in order. - * 3. If one of the transformations returns GL_TRUE, assume that it - * has emitted the appropriate instruction(s) into the new clause; - * otherwise, copy the instruction verbatim. - * - * \note The transformation is currently not recursive; in other words, - * instructions emitted by transformations are not transformed. - * - * \note The transform is called 'local' because it can only look at - * one instruction at a time. - */ -void radeonLocalTransform( - GLcontext *Ctx, - struct gl_program *program, - int num_transformations, - struct radeon_program_transformation* transformations) -{ - struct radeon_transform_context ctx; - int ip; - - ctx.Ctx = Ctx; - ctx.Program = program; - ctx.OldInstructions = program->Instructions; - ctx.OldNumInstructions = program->NumInstructions; - - program->Instructions = 0; - program->NumInstructions = 0; - - for(ip = 0; ip < ctx.OldNumInstructions; ++ip) { - struct prog_instruction *instr = ctx.OldInstructions + ip; - int i; - - for(i = 0; i < num_transformations; ++i) { - struct radeon_program_transformation* t = transformations + i; - - if (t->function(&ctx, instr, t->userData)) - break; - } - - if (i >= num_transformations) { - struct prog_instruction* dest = radeonAppendInstructions(program, 1); - _mesa_copy_instructions(dest, instr, 1); - } - } - - _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions); -} - - -static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count) -{ - GLuint i; - for (i = 0; i < count; i++) { - const struct prog_instruction *inst = insts + i; - const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); - GLuint k; - - for (k = 0; k < n; k++) { - if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) - used[inst->SrcReg[k].Index] = GL_TRUE; - } - } -} - -GLint radeonFindFreeTemporary(struct radeon_transform_context *t) -{ - GLboolean used[MAX_PROGRAM_TEMPS]; - GLuint i; - - _mesa_memset(used, 0, sizeof(used)); - scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions); - scan_instructions(used, t->OldInstructions, t->OldNumInstructions); - - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { - if (!used[i]) - return i; - } - - return -1; -} - - -/** - * Append the given number of instructions to the program and return a - * pointer to the first new instruction. - */ -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count) -{ - int oldnum = program->NumInstructions; - _mesa_insert_instructions(program, oldnum, count); - return program->Instructions + oldnum; -} diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.c b/src/mesa/drivers/dri/r300/radeon_queryobj.c new file mode 120000 index 0000000000..1d6ebc1c48 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_queryobj.c @@ -0,0 +1 @@ +../radeon/radeon_queryobj.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.h b/src/mesa/drivers/dri/r300/radeon_queryobj.h new file mode 120000 index 0000000000..8f6f842b0a --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_queryobj.h @@ -0,0 +1 @@ +../radeon/radeon_queryobj.h
\ No newline at end of file |