From b81a7dc2d8ba09b48d5022cf9ff65f2fad890e11 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Thu, 30 Oct 2008 23:52:59 +0100 Subject: gallivm: replace the temp parameters of the JIT function with alloca'ed temps. This avoids useless writes of temporary results. --- src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 6 ++-- src/gallium/auxiliary/gallivm/storagesoa.cpp | 44 +++++++++++++++++++-------- src/gallium/auxiliary/gallivm/storagesoa.h | 10 +++--- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 11 ++----- 4 files changed, 41 insertions(+), 30 deletions(-) (limited to 'src/gallium/auxiliary/gallivm') diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 3a2f2878a3..93a9748bdb 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -179,8 +179,7 @@ struct gallivm_cpu_engine * gallivm_global_cpu_engine() typedef void (*vertex_shader_runner)(void *ainputs, void *dests, - float (*aconsts)[4], - void *temps); + float (*aconsts)[4]); #define MAX_TGSI_VERTICES 4 /*! @@ -223,8 +222,7 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog, /* run shader */ runner(machine->Inputs, machine->Outputs, - (float (*)[4]) constants, - machine->Temps); + (float (*)[4]) constants); /* Unswizzle all output results */ diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index 4fc075cf6d..e1e5cabcf5 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -48,13 +48,11 @@ using namespace llvm; StorageSoa::StorageSoa(llvm::BasicBlock *block, llvm::Value *input, llvm::Value *output, - llvm::Value *consts, - llvm::Value *temps) + llvm::Value *consts) : m_block(block), m_input(input), m_output(output), m_consts(consts), - m_temps(temps), m_immediates(0), m_idx(0) { @@ -169,7 +167,7 @@ std::vector StorageSoa::constElement(llvm::IRBuilder<>* m_builder, { llvm::Value* res; std::vector res2(4); - llvm::Value *xChannel, *yChannel, *zChannel, *wChannel; + llvm::Value *xChannel; xChannel = elementPointer(m_consts, idx, 0); @@ -195,14 +193,15 @@ std::vector StorageSoa::outputElement(llvm::Value *idx) return res; } -std::vector StorageSoa::tempElement(llvm::Value *idx) +std::vector StorageSoa::tempElement(llvm::IRBuilder<>* m_builder, int idx) { std::vector res(4); + llvm::Value *temp = m_temps[idx]; - res[0] = element(m_temps, idx, 0); - res[1] = element(m_temps, idx, 1); - res[2] = element(m_temps, idx, 2); - res[3] = element(m_temps, idx, 3); + res[0] = element(temp, constantInt(0), 0); + res[1] = element(temp, constantInt(0), 1); + res[2] = element(temp, constantInt(0), 2); + res[3] = element(temp, constantInt(0), 3); return res; } @@ -326,7 +325,7 @@ std::vector StorageSoa::load(enum tgsi_file_type type, int idx, in val = outputElement(realIndex); break; case TGSI_FILE_TEMPORARY: - val = tempElement(realIndex); + val = tempElement(m_builder, idx); break; case TGSI_FILE_CONSTANT: val = constElement(m_builder, realIndex); @@ -355,19 +354,39 @@ std::vector StorageSoa::load(enum tgsi_file_type type, int idx, in return res; } +llvm::Value * StorageSoa::allocaTemp(llvm::IRBuilder<>* m_builder) +{ + VectorType *vector = VectorType::get(Type::FloatTy, 4); + ArrayType *vecArray = ArrayType::get(vector, 4); + AllocaInst *alloca = new AllocaInst(vecArray, "temp", + m_builder->GetInsertBlock()); + + return alloca; +} + + void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector &val, - int mask) + int mask, llvm::IRBuilder<>* m_builder) { llvm::Value *out = 0; + llvm::Value *realIndex = 0; switch(type) { case TGSI_FILE_OUTPUT: out = m_output; + realIndex = constantInt(idx); break; case TGSI_FILE_TEMPORARY: - out = m_temps; + // if that temp doesn't already exist, alloca it + if (m_temps.find(idx) == m_temps.end()) + m_temps[idx] = allocaTemp(m_builder); + + out = m_temps[idx]; + + realIndex = constantInt(0); break; case TGSI_FILE_INPUT: out = m_input; + realIndex = constantInt(idx); break; case TGSI_FILE_ADDRESS: { llvm::Value *addr = m_addresses[idx]; @@ -385,7 +404,6 @@ void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector load(enum tgsi_file_type type, int idx, int swizzle, llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0); void store(enum tgsi_file_type type, int idx, const std::vector &val, - int mask); + int mask, llvm::IRBuilder<>* m_builder); void addImmediate(float *vec); void declareImmediates(); @@ -84,7 +83,7 @@ private: llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc); std::vector constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx); std::vector outputElement(llvm::Value *indIdx); - std::vector tempElement(llvm::Value *indIdx); + std::vector tempElement(llvm::IRBuilder<>* m_builder, int idx); std::vector immediateElement(llvm::Value *indIdx); private: llvm::BasicBlock *m_block; @@ -92,12 +91,13 @@ private: llvm::Value *m_input; llvm::Value *m_output; llvm::Value *m_consts; - llvm::Value *m_temps; + std::map m_temps; llvm::GlobalVariable *m_immediates; std::map m_addresses; std::vector > m_immediatesToFlush; + llvm::Value * allocaTemp(llvm::IRBuilder<>* m_builder); mutable std::map m_constInts; mutable char m_name[32]; diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 1191a6cae9..c11b88af9e 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -53,7 +53,6 @@ static inline FunctionType *vertexShaderFunctionType() // [4 x <4 x float>] inputs, // [4 x <4 x float>] output, // [4 x [1 x float]] consts, - // [4 x <4 x float>] temps std::vector funcArgs; VectorType *vectorType = VectorType::get(Type::FloatTy, 4); @@ -67,7 +66,6 @@ static inline FunctionType *vertexShaderFunctionType() funcArgs.push_back(vectorArrayPtr);//inputs funcArgs.push_back(vectorArrayPtr);//output funcArgs.push_back(constsArrayPtr);//consts - funcArgs.push_back(vectorArrayPtr);//temps FunctionType *functionType = FunctionType::get( /*Result=*/Type::VoidTy, @@ -246,7 +244,6 @@ translate_instruction(llvm::Module *module, val = storage->constElement(src->SrcRegister.Index, indIdx); } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { val = storage->inputElement(src->SrcRegister.Index, indIdx); - // FIXME we should not be generating elements for temporaries, this creates useless memory writes } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { val = storage->tempElement(src->SrcRegister.Index); } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { @@ -677,7 +674,6 @@ translate_instruction(llvm::Module *module, if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - // FIXME we should not be generating elements for temporaries, this creates useless memory writes } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { @@ -1027,7 +1023,8 @@ translate_instructionir(llvm::Module *module, for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; storage->store((enum tgsi_file_type)dst->DstRegister.File, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + dst->DstRegister.Index, out, dst->DstRegister.WriteMask, + instr->getIRBuilder() ); } } @@ -1122,8 +1119,6 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, output->setName("outputs"); Value *consts = args++; consts->setName("consts"); - Value *temps = args++; - temps->setName("temps"); BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); @@ -1132,7 +1127,7 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, fi = tgsi_default_full_instruction(); fd = tgsi_default_full_declaration(); - StorageSoa storage(label_entry, input, output, consts, temps); + StorageSoa storage(label_entry, input, output, consts); InstructionsSoa instr(mod, shader, label_entry, &storage); while(!tgsi_parse_end_of_tokens(&parse)) { -- cgit v1.2.3