summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/llvmpipe/lp_state_fs.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_state_fs.c')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c170
1 files changed, 148 insertions, 22 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index b73ca2d41e..3a669ba859 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -84,8 +84,8 @@
#include "lp_screen.h"
#include "lp_context.h"
#include "lp_buffer.h"
+#include "lp_setup.h"
#include "lp_state.h"
-#include "lp_quad.h"
#include "lp_tex_sample.h"
#include "lp_debug.h"
@@ -187,7 +187,93 @@ generate_depth(LLVMBuilderRef builder,
/**
+ * Generate the code to do inside/outside triangle testing for the
+ * four pixels in a 2x2 quad. This will set the four elements of the
+ * quad mask vector to 0 or ~0.
+ * \param i which quad of the quad group to test, in [0,3]
+ */
+static void
+generate_tri_edge_mask(LLVMBuilderRef builder,
+ unsigned i,
+ LLVMValueRef *mask, /* ivec4, out */
+ LLVMValueRef c0, /* int32 */
+ LLVMValueRef c1, /* int32 */
+ LLVMValueRef c2, /* int32 */
+ LLVMValueRef step0_ptr, /* ivec4 */
+ LLVMValueRef step1_ptr, /* ivec4 */
+ LLVMValueRef step2_ptr) /* ivec4 */
+{
+ /*
+ c0_vec = splat(c0)
+ c1_vec = splat(c1)
+ c2_vec = splat(c2)
+ m0_vec = step0_ptr[i] > c0_vec
+ m1_vec = step1_ptr[i] > c1_vec
+ m2_vec = step2_ptr[i] > c2_vec
+ mask = m0_vec & m1_vec & m2_vec
+ */
+ struct lp_type i32_type;
+ LLVMTypeRef i32vec4_type;
+
+ LLVMValueRef index;
+ LLVMValueRef c0_vec, c1_vec, c2_vec;
+ LLVMValueRef step0_vec, step1_vec, step2_vec;
+ LLVMValueRef m0_vec, m1_vec, m2_vec;
+ LLVMValueRef m;
+
+ assert(i < 4);
+
+ /* int32 vector type */
+ memset(&i32_type, 0, sizeof i32_type);
+ i32_type.floating = FALSE; /* values are integers */
+ i32_type.sign = TRUE; /* values are signed */
+ i32_type.norm = FALSE; /* values are not normalized */
+ i32_type.width = 32; /* 32-bit int values */
+ i32_type.length = 4; /* 4 elements per vector */
+
+ i32vec4_type = lp_build_int32_vec4_type();
+
+ /* c0_vec = {c0, c0, c0, c0}
+ * Note that we emit this code four times but LLVM optimizes away
+ * three instances of it.
+ */
+ c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
+ c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
+ c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
+
+ lp_build_name(c0_vec, "edgeconst0vec");
+ lp_build_name(c1_vec, "edgeconst1vec");
+ lp_build_name(c2_vec, "edgeconst2vec");
+
+ index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
+ step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
+ step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
+
+ lp_build_name(step0_vec, "step0vec");
+ lp_build_name(step1_vec, "step1vec");
+ lp_build_name(step2_vec, "step2vec");
+
+ m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
+ m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
+ m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);
+
+ m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
+ m = LLVMBuildAnd(builder, m, m2_vec, "");
+
+ lp_build_name(m, "inoutmaskvec");
+
+ *mask = m;
+
+ /*
+ * if mask = {0,0,0,0} skip quad
+ */
+}
+
+
+/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
+ * \param i which quad in the tile, in range [0,3]
*/
static void
generate_fs(struct llvmpipe_context *lp,
@@ -201,7 +287,13 @@ generate_fs(struct llvmpipe_context *lp,
struct lp_build_sampler_soa *sampler,
LLVMValueRef *pmask,
LLVMValueRef *color,
- LLVMValueRef depth_ptr)
+ LLVMValueRef depth_ptr,
+ LLVMValueRef c0,
+ LLVMValueRef c1,
+ LLVMValueRef c2,
+ LLVMValueRef step0_ptr,
+ LLVMValueRef step1_ptr,
+ LLVMValueRef step2_ptr)
{
const struct tgsi_token *tokens = shader->base.tokens;
LLVMTypeRef elem_type;
@@ -216,6 +308,8 @@ generate_fs(struct llvmpipe_context *lp,
unsigned attrib;
unsigned chan;
+ assert(i < 4);
+
elem_type = lp_build_elem_type(type);
vec_type = lp_build_vec_type(type);
int_vec_type = lp_build_int_vec_type(type);
@@ -235,8 +329,14 @@ generate_fs(struct llvmpipe_context *lp,
}
lp_build_flow_scope_declare(flow, &z);
+ /* do triangle edge testing */
+ generate_tri_edge_mask(builder, i, pmask,
+ c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
+
+ /* 'mask' will control execution based on quad's pixel alive/killed state */
lp_build_mask_begin(&mask, flow, type, *pmask);
+
early_depth_test =
key->depth.enabled &&
!key->alpha.enabled &&
@@ -369,6 +469,9 @@ generate_blend(const struct pipe_blend_state *blend,
/**
* Generate the runtime callable function for the whole fragment pipeline.
+ * Note that the function which we generate operates on a block of 16
+ * pixels at at time. The block contains 2x2 quads. Each quad contains
+ * 2x2 pixels.
*/
static struct lp_fragment_shader_variant *
generate_fragment(struct llvmpipe_context *lp,
@@ -384,17 +487,18 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMTypeRef fs_int_vec_type;
LLVMTypeRef blend_vec_type;
LLVMTypeRef blend_int_vec_type;
- LLVMTypeRef arg_types[9];
+ LLVMTypeRef arg_types[14];
LLVMTypeRef func_type;
+ LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type();
LLVMValueRef context_ptr;
LLVMValueRef x;
LLVMValueRef y;
LLVMValueRef a0_ptr;
LLVMValueRef dadx_ptr;
LLVMValueRef dady_ptr;
- LLVMValueRef mask_ptr;
LLVMValueRef color_ptr;
LLVMValueRef depth_ptr;
+ LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef x0;
@@ -476,8 +580,8 @@ generate_fragment(struct llvmpipe_context *lp,
fs_type.sign = TRUE; /* values are signed */
fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
fs_type.width = 32; /* 32-bit float */
- fs_type.length = 4; /* 4 element per vector */
- num_fs = 4;
+ fs_type.length = 4; /* 4 elements per vector */
+ num_fs = 4; /* number of quads per block */
memset(&blend_type, 0, sizeof blend_type);
blend_type.floating = FALSE; /* values are integers */
@@ -504,9 +608,17 @@ generate_fragment(struct llvmpipe_context *lp,
arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */
arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */
arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */
- arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */
- arg_types[7] = LLVMPointerType(blend_vec_type, 0); /* color */
- arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
+ arg_types[6] = LLVMPointerType(blend_vec_type, 0); /* color */
+ arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
+ arg_types[8] = LLVMInt32Type(); /* c0 */
+ arg_types[9] = LLVMInt32Type(); /* c1 */
+ arg_types[10] = LLVMInt32Type(); /* c2 */
+ /* Note: the step arrays are built as int32[16] but we interpret
+ * them here as int32_vec4[4].
+ */
+ arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */
+ arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */
+ arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
@@ -522,9 +634,14 @@ generate_fragment(struct llvmpipe_context *lp,
a0_ptr = LLVMGetParam(variant->function, 3);
dadx_ptr = LLVMGetParam(variant->function, 4);
dady_ptr = LLVMGetParam(variant->function, 5);
- mask_ptr = LLVMGetParam(variant->function, 6);
- color_ptr = LLVMGetParam(variant->function, 7);
- depth_ptr = LLVMGetParam(variant->function, 8);
+ color_ptr = LLVMGetParam(variant->function, 6);
+ depth_ptr = LLVMGetParam(variant->function, 7);
+ c0 = LLVMGetParam(variant->function, 8);
+ c1 = LLVMGetParam(variant->function, 9);
+ c2 = LLVMGetParam(variant->function, 10);
+ step0_ptr = LLVMGetParam(variant->function, 11);
+ step1_ptr = LLVMGetParam(variant->function, 12);
+ step2_ptr = LLVMGetParam(variant->function, 13);
lp_build_name(context_ptr, "context");
lp_build_name(x, "x");
@@ -532,9 +649,14 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(a0_ptr, "a0");
lp_build_name(dadx_ptr, "dadx");
lp_build_name(dady_ptr, "dady");
- lp_build_name(mask_ptr, "mask");
lp_build_name(color_ptr, "color");
lp_build_name(depth_ptr, "depth");
+ lp_build_name(c0, "c0");
+ lp_build_name(c1, "c1");
+ lp_build_name(c2, "c2");
+ lp_build_name(step0_ptr, "step0");
+ lp_build_name(step1_ptr, "step1");
+ lp_build_name(step2_ptr, "step2");
/*
* Function body
@@ -548,20 +670,20 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type,
a0_ptr, dadx_ptr, dady_ptr,
- x0, y0, 2, 0);
+ x0, y0);
/* code generated texture sampling */
sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
+ /* loop over quads in the block */
for(i = 0; i < num_fs; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
LLVMValueRef out_color[NUM_CHANNELS];
LLVMValueRef depth_ptr_i;
if(i != 0)
- lp_build_interp_soa_update(&interp);
+ lp_build_interp_soa_update(&interp, i);
- fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");
depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
generate_fs(lp, shader, key,
@@ -571,9 +693,11 @@ generate_fragment(struct llvmpipe_context *lp,
i,
&interp,
sampler,
- &fs_mask[i],
+ &fs_mask[i], /* output */
out_color,
- depth_ptr_i);
+ depth_ptr_i,
+ c0, c1, c2,
+ step0_ptr, step1_ptr, step2_ptr);
for(chan = 0; chan < NUM_CHANNELS; ++chan)
fs_out_color[chan][i] = out_color[chan];
@@ -724,15 +848,14 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
+ if(llvmpipe->constants[shader].buffer == buffer)
+ return;
+
draw_flush(llvmpipe->draw);
/* note: reference counting */
pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer);
- if(shader == PIPE_SHADER_FRAGMENT) {
- llvmpipe->jit_context.constants = data;
- }
-
if(shader == PIPE_SHADER_VERTEX) {
draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX,
data, size);
@@ -814,4 +937,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
variant = generate_fragment(lp, shader, &key);
shader->current = variant;
+
+ lp_setup_set_fs_function(lp->setup,
+ shader->current->jit_function);
}