summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/common/dri_util.c121
-rw-r--r--src/mesa/drivers/dri/r200/Makefile3
-rw-r--r--src/mesa/drivers/dri/r200/r200_context.h2
-rw-r--r--src/mesa/drivers/dri/r200/r200_ioctl.h8
-rw-r--r--src/mesa/drivers/dri/r200/r200_state_init.c459
-rw-r--r--src/mesa/drivers/dri/r200/r200_swtcl.c2
-rw-r--r--src/mesa/drivers/dri/r200/r200_tcl.c76
-rw-r--r--src/mesa/drivers/dri/r300/r300_cmdbuf.c316
-rw-r--r--src/mesa/drivers/dri/r300/r300_cmdbuf.h6
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c3
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.h2
-rw-r--r--src/mesa/drivers/dri/r300/r300_draw.c177
-rw-r--r--src/mesa/drivers/dri/r300/r300_ioctl.c26
-rw-r--r--src/mesa/drivers/dri/r300/r300_render.c6
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c1
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.c3
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c59
-rw-r--r--src/mesa/drivers/dri/radeon/Makefile3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_cmdbuf.h4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.c150
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.h1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_dma.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_ioctl.h12
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_state_init.c269
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_swtcl.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tcl.c83
26 files changed, 1202 insertions, 594 deletions
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index e48e10d7c0..d7bcd565d7 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -82,6 +82,46 @@ driIntersectArea( drm_clip_rect_t rect1, drm_clip_rect_t rect2 )
return (rect1.x2 - rect1.x1) * (rect1.y2 - rect1.y1);
}
+static int driFreeDrawable(__DRIcontext *pcp)
+{
+ __DRIdrawable *pdp;
+ __DRIdrawable *prp;
+
+ if (pcp == NULL)
+ return GL_FALSE;
+
+ pdp = pcp->driDrawablePriv;
+ prp = pcp->driReadablePriv;
+
+ /* already unbound */
+ if (!pdp && !prp)
+ return GL_TRUE;
+
+ if (pdp->refcount == 0) {
+ /* ERROR!!! */
+ return GL_FALSE;
+ }
+
+ dri_put_drawable(pdp);
+
+ if (prp != pdp) {
+ if (prp->refcount == 0) {
+ /* ERROR!!! */
+ return GL_FALSE;
+ }
+
+ dri_put_drawable(prp);
+ }
+
+
+ /* XXX this is disabled so that if we call SwapBuffers on an unbound
+ * window we can determine the last context bound to the window and
+ * use that context's lock. (BrianP, 2-Dec-2000)
+ */
+ pcp->driDrawablePriv = pcp->driReadablePriv = NULL;
+ return GL_TRUE;
+}
+
/*****************************************************************/
/** \name Context (un)binding functions */
/*****************************************************************/
@@ -106,8 +146,6 @@ driIntersectArea( drm_clip_rect_t rect1, drm_clip_rect_t rect2 )
static int driUnbindContext(__DRIcontext *pcp)
{
__DRIscreen *psp;
- __DRIdrawable *pdp;
- __DRIdrawable *prp;
/*
** Assume error checking is done properly in glXMakeCurrent before
@@ -118,38 +156,10 @@ static int driUnbindContext(__DRIcontext *pcp)
return GL_FALSE;
psp = pcp->driScreenPriv;
- pdp = pcp->driDrawablePriv;
- prp = pcp->driReadablePriv;
- /* already unbound */
- if (!pdp && !prp)
- return GL_TRUE;
- /* Let driver unbind drawable from context */
+ /* Let driver unbind drawable from context */
(*psp->DriverAPI.UnbindContext)(pcp);
- if (pdp->refcount == 0) {
- /* ERROR!!! */
- return GL_FALSE;
- }
-
- dri_put_drawable(pdp);
-
- if (prp != pdp) {
- if (prp->refcount == 0) {
- /* ERROR!!! */
- return GL_FALSE;
- }
-
- dri_put_drawable(prp);
- }
-
-
- /* XXX this is disabled so that if we call SwapBuffers on an unbound
- * window we can determine the last context bound to the window and
- * use that context's lock. (BrianP, 2-Dec-2000)
- */
- pcp->driDrawablePriv = pcp->driReadablePriv = NULL;
-
#if 0
/* Unbind the drawable */
pdp->driContextPriv = &psp->dummyContextPriv;
@@ -171,17 +181,44 @@ static int driBindContext(__DRIcontext *pcp,
/* Bind the drawable to the context */
- if (pcp) {
- pcp->driDrawablePriv = pdp;
- pcp->driReadablePriv = prp;
- if (pdp) {
- pdp->driContextPriv = pcp;
- dri_get_drawable(pdp);
- }
- if ( prp && pdp != prp ) {
- dri_get_drawable(prp);
+ if (pcp) {
+
+ if (pcp->driDrawablePriv != pdp
+ || pcp->driReadablePriv != prp)
+ {
+ /* first increment ref count for new drawables */
+
+ if (pdp)
+ {
+ pdp->driContextPriv = pcp;
+ dri_get_drawable(pdp);
+ }
+
+ if (prp && prp != pdp)
+ {
+ dri_get_drawable(prp);
+ }
+
+ /* free old drawables */
+
+ if (pcp->driReadablePriv
+ && pcp->driReadablePriv != pcp->driDrawablePriv)
+ {
+ dri_put_drawable(pcp->driReadablePriv);
+ }
+
+ if (pcp->driDrawablePriv)
+ {
+ dri_put_drawable(pcp->driDrawablePriv);
+ }
+
+ /* assign new drawables to context */
+
+ pcp->driDrawablePriv = pdp;
+ pcp->driReadablePriv = prp;
+
+ }
}
- }
/*
** Now that we have a context associated with this drawable, we can
@@ -542,6 +579,7 @@ static void
driDestroyContext(__DRIcontext *pcp)
{
if (pcp) {
+ driFreeDrawable(pcp);
(*pcp->driScreenPriv->DriverAPI.DestroyContext)(pcp);
_mesa_free(pcp);
}
@@ -579,6 +617,7 @@ driCreateNewContext(__DRIscreen *psp, const __DRIconfig *config,
pcp->driScreenPriv = psp;
pcp->driDrawablePriv = NULL;
+ pcp->driReadablePriv = NULL;
/* When the first context is created for a screen, initialize a "dummy"
* context.
diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
index 2114ce55c1..25ecbef609 100644
--- a/src/mesa/drivers/dri/r200/Makefile
+++ b/src/mesa/drivers/dri/r200/Makefile
@@ -54,7 +54,8 @@ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
X86_SOURCES =
-DRIVER_DEFINES = -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R200
+DRIVER_DEFINES = -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R200 \
+ -Wall
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h
index 6267293817..18360890eb 100644
--- a/src/mesa/drivers/dri/r200/r200_context.h
+++ b/src/mesa/drivers/dri/r200/r200_context.h
@@ -526,8 +526,6 @@ struct r200_state {
struct r200_tcl_info {
GLuint hw_primitive;
- GLuint *Elts;
-
int elt_used;
};
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h
index 2a4b8a11f4..f6419f5a2c 100644
--- a/src/mesa/drivers/dri/r200/r200_ioctl.h
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.h
@@ -125,10 +125,12 @@ static INLINE int R200_DB_STATECHANGE(
* are available, you will also be adding an rmesa->state.max_state_size because
* r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
*/
-#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int))
-#define VERT_AOS_BUFSZ (5 * sizeof(int))
+#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2))
+#define VERT_AOS_BUFSZ (5)
#define ELTS_BUFSZ(nr) (12 + nr * 2)
-#define VBUF_BUFSZ (3 * sizeof(int))
+#define VBUF_BUFSZ (3)
+#define SCISSOR_BUFSZ (8)
+#define INDEX_BUFSZ (8+2)
static inline uint32_t cmdpacket3(int cmd_type)
{
diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
index 78ad5baebb..9b443bd0ea 100644
--- a/src/mesa/drivers/dri/r200/r200_state_init.c
+++ b/src/mesa/drivers/dri/r200/r200_state_init.c
@@ -226,57 +226,73 @@ static int cmdscl2( int offset, int stride, int count )
return h.i;
}
-#define CHECK( NM, FLAG ) \
+/**
+ * Check functions are used to check if state is active.
+ * If it is active check function returns maximum emit size.
+ */
+#define CHECK( NM, FLAG, ADD ) \
static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
{ \
r200ContextPtr rmesa = R200_CONTEXT(ctx); \
(void) rmesa; \
- return (FLAG) ? atom->cmd_size : 0; \
+ return (FLAG) ? atom->cmd_size + (ADD) : 0; \
}
-#define TCL_CHECK( NM, FLAG ) \
+#define TCL_CHECK( NM, FLAG, ADD ) \
static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
{ \
r200ContextPtr rmesa = R200_CONTEXT(ctx); \
- return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \
+ return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
}
-#define TCL_OR_VP_CHECK( NM, FLAG ) \
+#define TCL_OR_VP_CHECK( NM, FLAG, ADD ) \
static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
{ \
r200ContextPtr rmesa = R200_CONTEXT(ctx); \
- return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0; \
+ return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
}
-#define VP_CHECK( NM, FLAG ) \
+#define VP_CHECK( NM, FLAG, ADD ) \
static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
{ \
r200ContextPtr rmesa = R200_CONTEXT(ctx); \
(void) atom; \
- return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \
+ return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
}
-CHECK( always, GL_TRUE )
-CHECK( never, GL_FALSE )
-CHECK( tex_any, ctx->Texture._EnabledUnits )
-CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) );
-CHECK( tex_pair, (rmesa->state.texture.unit[atom->idx].unitneeded | rmesa->state.texture.unit[atom->idx & ~1].unitneeded) )
-CHECK( tex, rmesa->state.texture.unit[atom->idx].unitneeded )
-CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled )
- CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled) )
-CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) )
-CHECK( afs, ctx->ATIFragmentShader._Enabled )
-CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT )
-TCL_CHECK( tcl_fog, ctx->Fog.Enabled )
-TCL_CHECK( tcl, GL_TRUE )
-TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded )
-TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
-TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled )
-TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))) )
-TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE )
-VP_CHECK( tcl_vp, GL_TRUE )
-VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 )
-VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 )
+CHECK( always, GL_TRUE, 0 )
+CHECK( always_add4, GL_TRUE, 4 )
+CHECK( never, GL_FALSE, 0 )
+CHECK( tex_any, ctx->Texture._EnabledUnits, 0 )
+CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled), 0 );
+CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled, 0 )
+ CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 )
+CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)), 0 )
+CHECK( afs, ctx->ATIFragmentShader._Enabled, 0 )
+CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( tex_cube_cs, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 2 + 2*5 - CUBE_STATE_SIZE )
+TCL_CHECK( tcl_fog, ctx->Fog.Enabled, 0 )
+TCL_CHECK( tcl_fog_add4, ctx->Fog.Enabled, 4 )
+TCL_CHECK( tcl, GL_TRUE, 0 )
+TCL_CHECK( tcl_add8, GL_TRUE, 8 )
+TCL_CHECK( tcl_add4, GL_TRUE, 4 )
+TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded, 0 )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 0 )
+TCL_CHECK( tcl_tex_add4, rmesa->state.texture.unit[atom->idx].unitneeded, 4 )
+TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_lighting_add6, ctx->Light.Enabled, 6 )
+TCL_CHECK( tcl_light_add8, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 8 )
+TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 0 )
+TCL_OR_VP_CHECK( tcl_ucp_add4, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 4 )
+TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE, 0 )
+TCL_OR_VP_CHECK( tcl_or_vp_add2, GL_TRUE, 2 )
+VP_CHECK( tcl_vp, GL_TRUE, 0 )
+VP_CHECK( tcl_vp_add4, GL_TRUE, 4 )
+VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 0 )
+VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 0 )
+VP_CHECK( tcl_vp_size_add4, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 4 )
+VP_CHECK( tcl_vpp_size_add4, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 4 )
#define OUT_VEC(hdr, data) do { \
drm_radeon_cmd_header_t h; \
@@ -329,9 +345,8 @@ static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r200ContextPtr r200 = R200_CONTEXT(ctx);
BATCH_LOCALS(&r200->radeon);
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
- dwords += 6;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1));
OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18));
@@ -342,9 +357,8 @@ static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r200ContextPtr r200 = R200_CONTEXT(ctx);
BATCH_LOCALS(&r200->radeon);
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
- dwords += 8;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
@@ -355,9 +369,8 @@ static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r200ContextPtr r200 = R200_CONTEXT(ctx);
BATCH_LOCALS(&r200->radeon);
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
- dwords += 8;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1);
OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1);
@@ -368,9 +381,8 @@ static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r200ContextPtr r200 = R200_CONTEXT(ctx);
BATCH_LOCALS(&r200->radeon);
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
- dwords += 4;
OUT_VECLINEAR(atom->cmd[0], atom->cmd+1);
}
@@ -378,9 +390,8 @@ static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r200ContextPtr r200 = R200_CONTEXT(ctx);
BATCH_LOCALS(&r200->radeon);
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
- dwords += 2;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_SCL(atom->cmd[0], atom->cmd+1);
END_BATCH();
@@ -391,9 +402,8 @@ static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r200ContextPtr r200 = R200_CONTEXT(ctx);
BATCH_LOCALS(&r200->radeon);
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
- dwords += 4;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_VEC(atom->cmd[0], atom->cmd+1);
END_BATCH();
@@ -406,10 +416,10 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
struct radeon_renderbuffer *rrb;
uint32_t cbpitch;
uint32_t zbpitch, depth_fmt;
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
/* output the first 7 bytes of context */
- BEGIN_BATCH_NO_AUTOSTATE(dwords+2+2);
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_BATCH_TABLE(atom->cmd, 5);
rrb = radeon_get_depthbuffer(&r200->radeon);
@@ -466,6 +476,31 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
END_BATCH();
}
+static int check_always_ctx( GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb, *drb;
+ uint32_t dwords;
+
+ rrb = radeon_get_colorbuffer(&r200->radeon);
+ if (!rrb || !rrb->bo) {
+ return 0;
+ }
+
+ drb = radeon_get_depthbuffer(&r200->radeon);
+
+ dwords = 10;
+ if (drb)
+ dwords += 6;
+ if (rrb)
+ dwords += 8;
+ if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
+ dwords += 4;
+
+
+ return dwords;
+}
+
static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
{
r200ContextPtr r200 = R200_CONTEXT(ctx);
@@ -473,7 +508,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
struct radeon_renderbuffer *rrb, *drb;
uint32_t cbpitch = 0;
uint32_t zbpitch = 0;
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
uint32_t depth_fmt;
rrb = radeon_get_colorbuffer(&r200->radeon);
@@ -511,14 +546,6 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
}
- dwords = 10;
- if (drb)
- dwords += 6;
- if (rrb)
- dwords += 8;
- if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
- dwords += 4;
-
/* output the first 7 bytes of context */
BEGIN_BATCH_NO_AUTOSTATE(dwords);
@@ -556,16 +583,46 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
END_BATCH();
}
+static int get_tex_size(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ uint32_t dwords = atom->cmd_size + 2;
+ int i = atom->idx;
+ radeonTexObj *t = r200->state.texture.unit[i].texobj;
+ if (!(t && t->mt && !t->image_override))
+ dwords -= 2;
+
+ return dwords;
+}
+
+static int check_tex_pair(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ /** XOR is bit flip operation so use it for finding pair */
+ if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx ^ 1].unitneeded))
+ return 0;
+
+ return get_tex_size(ctx, atom);
+}
+
+static int check_tex(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ if (!(r200->state.texture.unit[atom->idx].unitneeded))
+ return 0;
+
+ return get_tex_size(ctx, atom);
+}
+
+
static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r200ContextPtr r200 = R200_CONTEXT(ctx);
BATCH_LOCALS(&r200->radeon);
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
int i = atom->idx;
radeonTexObj *t = r200->state.texture.unit[i].texobj;
- if (t && t->mt && !t->image_override)
- dwords += 2;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
/* is this ok even with drm older than 1.18? */
OUT_BATCH_TABLE(atom->cmd, 10);
@@ -583,17 +640,13 @@ static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
END_BATCH();
}
-static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+static int get_tex_mm_size(GLcontext* ctx, struct radeon_state_atom *atom)
{
r200ContextPtr r200 = R200_CONTEXT(ctx);
- BATCH_LOCALS(&r200->radeon);
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->cmd_size + 2;
+ int hastexture = 1;
int i = atom->idx;
radeonTexObj *t = r200->state.texture.unit[i].texobj;
- int hastexture = 1;
-
- if (!r200->state.texture.unit[i].unitneeded)
- hastexture = 0;
if (!t)
hastexture = 0;
else {
@@ -601,16 +654,46 @@ static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
hastexture = 0;
}
- if (hastexture)
- dwords += 2;
- else
- dwords -= 2;
+ if (!hastexture)
+ dwords -= 4;
+ return dwords;
+}
+
+static int check_tex_pair_mm(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ /** XOR is bit flip operation so use it for finding pair */
+ if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx ^ 1].unitneeded))
+ return 0;
+
+ return get_tex_mm_size(ctx, atom);
+}
+
+static int check_tex_mm(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ if (!(r200->state.texture.unit[atom->idx].unitneeded))
+ return 0;
+
+ return get_tex_mm_size(ctx, atom);
+}
+
+
+static void tex_emit_mm(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r200ContextPtr r200 = R200_CONTEXT(ctx);
+ BATCH_LOCALS(&r200->radeon);
+ uint32_t dwords = atom->check(ctx, atom);
+ int i = atom->idx;
+ radeonTexObj *t = r200->state.texture.unit[i].texobj;
+ if (!r200->state.texture.unit[i].unitneeded)
+ dwords -= 4;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (32 * i), 7));
OUT_BATCH_TABLE((atom->cmd + 1), 8);
- if (hastexture) {
+ if (dwords > atom->cmd_size) {
OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0));
if (t->mt && !t->image_override) {
OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
@@ -629,12 +712,15 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r200ContextPtr r200 = R200_CONTEXT(ctx);
BATCH_LOCALS(&r200->radeon);
- uint32_t dwords = 3;
+ uint32_t dwords = atom->check(ctx, atom);
int i = atom->idx, j;
radeonTexObj *t = r200->state.texture.unit[i].texobj;
radeon_mipmap_level *lvl;
- BEGIN_BATCH_NO_AUTOSTATE(dwords + (3 * 5));
+ if (!(t && !t->image_override))
+ dwords = 2;
+
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
/* XXX that size won't really match with image_override... */
OUT_BATCH_TABLE(atom->cmd, 2);
@@ -653,12 +739,14 @@ static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
{
r200ContextPtr r200 = R200_CONTEXT(ctx);
BATCH_LOCALS(&r200->radeon);
- uint32_t dwords = 2;
+ uint32_t dwords = atom->check(ctx, atom);
int i = atom->idx, j;
radeonTexObj *t = r200->state.texture.unit[i].texobj;
radeon_mipmap_level *lvl;
+ if (!(t && !t->image_override))
+ dwords = 2;
- BEGIN_BATCH_NO_AUTOSTATE(dwords + (4 * 5));
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_BATCH_TABLE(atom->cmd, 2);
if (t && !t->image_override) {
@@ -713,14 +801,19 @@ void r200InitState( r200ContextPtr rmesa )
/* Allocate state buffers:
*/
if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
- ALLOC_STATE( ctx, always, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
+ ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
else
- ALLOC_STATE( ctx, always, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 );
+ ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 );
if (rmesa->radeon.radeonScreen->kernel_mm)
+ {
rmesa->hw.ctx.emit = ctx_emit_cs;
+ rmesa->hw.ctx.check = check_always_ctx;
+ }
else
+ {
rmesa->hw.ctx.emit = ctx_emit;
+ }
ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
@@ -732,49 +825,56 @@ void r200InitState( r200ContextPtr rmesa )
ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
- if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
- if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
- /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
- ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
- ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
- ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
- }
- else {
- ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
- ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
- ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+ {
+ int state_size = TEX_STATE_SIZE_NEWDRM;
+ if (!rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+ state_size = TEX_STATE_SIZE_OLDDRM;
}
- ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-2", 2 );
- ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-3", 3 );
- ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-4", 4 );
- ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-5", 5 );
- ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 );
- ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
- ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
- }
- else {
- if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
- ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
- ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
- ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
- }
- else {
- ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
- ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
- ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+ /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
+ ALLOC_STATE( tex[0], tex_pair_mm, state_size, "TEX/tex-0", 0 );
+ ALLOC_STATE( tex[1], tex_pair_mm, state_size, "TEX/tex-1", 1 );
+ ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+ }
+ else {
+ ALLOC_STATE( tex[0], tex_mm, state_size, "TEX/tex-0", 0 );
+ ALLOC_STATE( tex[1], tex_mm, state_size, "TEX/tex-1", 1 );
+ ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+ }
+ ALLOC_STATE( tex[2], tex_mm, state_size, "TEX/tex-2", 2 );
+ ALLOC_STATE( tex[3], tex_mm, state_size, "TEX/tex-3", 3 );
+ ALLOC_STATE( tex[4], tex_mm, state_size, "TEX/tex-4", 4 );
+ ALLOC_STATE( tex[5], tex_mm, state_size, "TEX/tex-5", 5 );
+ if (!rmesa->radeon.radeonScreen->kernel_mm)
+ {
+ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+ rmesa->hw.tex[0].check = check_tex_pair;
+ rmesa->hw.tex[1].check = check_tex_pair;
+ } else {
+ rmesa->hw.tex[0].check = check_tex;
+ rmesa->hw.tex[1].check = check_tex;
+ }
+ rmesa->hw.tex[2].check = check_tex;
+ rmesa->hw.tex[3].check = check_tex;
+ rmesa->hw.tex[4].check = check_tex;
+ rmesa->hw.tex[5].check = check_tex;
+ }
+ if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+ ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 );
+ ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+ ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
+ } else {
+ ALLOC_STATE( atf, never, ATF_STATE_SIZE, "ATF/tfactor", 0 );
+ ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+ ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
+ }
}
- ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-2", 2 );
- ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-3", 3 );
- ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-4", 4 );
- ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-5", 5 );
- ALLOC_STATE( atf, never, ATF_STATE_SIZE, "TF/tfactor", 0 );
- ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
- ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
}
for (i = 0; i < 6; i++)
if (rmesa->radeon.radeonScreen->kernel_mm)
- rmesa->hw.tex[i].emit = tex_emit_cs;
+ rmesa->hw.tex[i].emit = tex_emit_mm;
else
rmesa->hw.tex[i].emit = tex_emit;
if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) {
@@ -785,9 +885,10 @@ void r200InitState( r200ContextPtr rmesa )
ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
for (i = 0; i < 6; i++)
- if (rmesa->radeon.radeonScreen->kernel_mm)
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
rmesa->hw.cube[i].emit = cube_emit_cs;
- else
+ rmesa->hw.cube[i].check = check_tex_cube_cs;
+ } else
rmesa->hw.cube[i].emit = cube_emit;
}
else {
@@ -801,10 +902,17 @@ void r200InitState( r200ContextPtr rmesa )
if (rmesa->radeon.radeonScreen->drmSupportsVertexProgram) {
ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
- ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
- ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
- ALLOC_STATE( vpp[0], tcl_vp, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
- ALLOC_STATE( vpp[1], tcl_vpp_size, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE( vpi[0], tcl_vp_add4, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
+ ALLOC_STATE( vpi[1], tcl_vp_size_add4, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
+ ALLOC_STATE( vpp[0], tcl_vp_add4, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
+ ALLOC_STATE( vpp[1], tcl_vpp_size_add4, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
+ } else {
+ ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
+ ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
+ ALLOC_STATE( vpp[0], tcl_vp, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
+ ALLOC_STATE( vpp[1], tcl_vpp_size, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
+ }
}
else {
ALLOC_STATE( pvs, never, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
@@ -817,35 +925,67 @@ void r200InitState( r200ContextPtr rmesa )
ALLOC_STATE( tcl, tcl_or_vp, TCL_STATE_SIZE, "TCL/tcl", 0 );
ALLOC_STATE( msl, tcl, MSL_STATE_SIZE, "MSL/matrix-select", 0 );
ALLOC_STATE( tcg, tcl, TCG_STATE_SIZE, "TCG/texcoordgen", 0 );
- ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 );
- ALLOC_STATE( mtl[1], tcl_lighting, MTL_STATE_SIZE, "MTL1/material1", 1 );
- ALLOC_STATE( grd, tcl_or_vp, GRD_STATE_SIZE, "GRD/guard-band", 0 );
- ALLOC_STATE( fog, tcl_fog, FOG_STATE_SIZE, "FOG/fog", 0 );
- ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 );
- ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
- ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 );
- ALLOC_STATE( mat[R200_MTX_IMV], tcl, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
- ALLOC_STATE( mat[R200_MTX_MVP], tcl, MAT_STATE_SIZE, "MAT/modelproject", 0 );
- ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex, MAT_STATE_SIZE, "MAT/texmat0", 0 );
- ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex, MAT_STATE_SIZE, "MAT/texmat1", 1 );
- ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex, MAT_STATE_SIZE, "MAT/texmat2", 2 );
- ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex, MAT_STATE_SIZE, "MAT/texmat3", 3 );
- ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex, MAT_STATE_SIZE, "MAT/texmat4", 4 );
- ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex, MAT_STATE_SIZE, "MAT/texmat5", 5 );
- ALLOC_STATE( ucp[0], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
- ALLOC_STATE( ucp[1], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
- ALLOC_STATE( ucp[2], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
- ALLOC_STATE( ucp[3], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
- ALLOC_STATE( ucp[4], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
- ALLOC_STATE( ucp[5], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
- ALLOC_STATE( lit[0], tcl_light, LIT_STATE_SIZE, "LIT/light-0", 0 );
- ALLOC_STATE( lit[1], tcl_light, LIT_STATE_SIZE, "LIT/light-1", 1 );
- ALLOC_STATE( lit[2], tcl_light, LIT_STATE_SIZE, "LIT/light-2", 2 );
- ALLOC_STATE( lit[3], tcl_light, LIT_STATE_SIZE, "LIT/light-3", 3 );
- ALLOC_STATE( lit[4], tcl_light, LIT_STATE_SIZE, "LIT/light-4", 4 );
- ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 );
- ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 );
- ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 );
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE( mtl[0], tcl_lighting_add6, MTL_STATE_SIZE, "MTL0/material0", 0 );
+ ALLOC_STATE( mtl[1], tcl_lighting_add6, MTL_STATE_SIZE, "MTL1/material1", 1 );
+ ALLOC_STATE( grd, tcl_or_vp_add2, GRD_STATE_SIZE, "GRD/guard-band", 0 );
+ ALLOC_STATE( fog, tcl_fog_add4, FOG_STATE_SIZE, "FOG/fog", 0 );
+ ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 0 );
+ ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
+ ALLOC_STATE( mat[R200_MTX_MV], tcl_add4, MAT_STATE_SIZE, "MAT/modelview", 0 );
+ ALLOC_STATE( mat[R200_MTX_IMV], tcl_add4, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
+ ALLOC_STATE( mat[R200_MTX_MVP], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 0 );
+ ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat0", 0 );
+ ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+ ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat2", 2 );
+ ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat3", 3 );
+ ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat4", 4 );
+ ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat5", 5 );
+ ALLOC_STATE( ucp[0], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
+ ALLOC_STATE( ucp[1], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+ ALLOC_STATE( ucp[2], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
+ ALLOC_STATE( ucp[3], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
+ ALLOC_STATE( ucp[4], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
+ ALLOC_STATE( ucp[5], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
+ ALLOC_STATE( lit[0], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-0", 0 );
+ ALLOC_STATE( lit[1], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-1", 1 );
+ ALLOC_STATE( lit[2], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-2", 2 );
+ ALLOC_STATE( lit[3], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-3", 3 );
+ ALLOC_STATE( lit[4], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-4", 4 );
+ ALLOC_STATE( lit[5], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-5", 5 );
+ ALLOC_STATE( lit[6], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-6", 6 );
+ ALLOC_STATE( lit[7], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-7", 7 );
+ } else {
+ ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 );
+ ALLOC_STATE( mtl[1], tcl_lighting, MTL_STATE_SIZE, "MTL1/material1", 1 );
+ ALLOC_STATE( grd, tcl_or_vp, GRD_STATE_SIZE, "GRD/guard-band", 0 );
+ ALLOC_STATE( fog, tcl_fog, FOG_STATE_SIZE, "FOG/fog", 0 );
+ ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 );
+ ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
+ ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 );
+ ALLOC_STATE( mat[R200_MTX_IMV], tcl, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
+ ALLOC_STATE( mat[R200_MTX_MVP], tcl, MAT_STATE_SIZE, "MAT/modelproject", 0 );
+ ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex, MAT_STATE_SIZE, "MAT/texmat0", 0 );
+ ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+ ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex, MAT_STATE_SIZE, "MAT/texmat2", 2 );
+ ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex, MAT_STATE_SIZE, "MAT/texmat3", 3 );
+ ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex, MAT_STATE_SIZE, "MAT/texmat4", 4 );
+ ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex, MAT_STATE_SIZE, "MAT/texmat5", 5 );
+ ALLOC_STATE( ucp[0], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
+ ALLOC_STATE( ucp[1], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+ ALLOC_STATE( ucp[2], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
+ ALLOC_STATE( ucp[3], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
+ ALLOC_STATE( ucp[4], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
+ ALLOC_STATE( ucp[5], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
+ ALLOC_STATE( lit[0], tcl_light, LIT_STATE_SIZE, "LIT/light-0", 0 );
+ ALLOC_STATE( lit[1], tcl_light, LIT_STATE_SIZE, "LIT/light-1", 1 );
+ ALLOC_STATE( lit[2], tcl_light, LIT_STATE_SIZE, "LIT/light-2", 2 );
+ ALLOC_STATE( lit[3], tcl_light, LIT_STATE_SIZE, "LIT/light-3", 3 );
+ ALLOC_STATE( lit[4], tcl_light, LIT_STATE_SIZE, "LIT/light-4", 4 );
+ ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 );
+ ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 );
+ ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 );
+ }
ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 );
ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 );
@@ -860,7 +1000,10 @@ void r200InitState( r200ContextPtr rmesa )
}
if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) {
ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
- ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 );
+ if (rmesa->radeon.radeonScreen->kernel_mm)
+ ALLOC_STATE( ptp, tcl_add8, PTP_STATE_SIZE, "PTP/pointparams", 0 );
+ else
+ ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 );
}
else {
ALLOC_STATE (spr, never, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
@@ -953,18 +1096,18 @@ void r200InitState( r200ContextPtr rmesa )
rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL);
rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL);
if (rmesa->radeon.radeonScreen->kernel_mm) {
- rmesa->hw.mtl[0].emit = mtl_emit;
- rmesa->hw.mtl[1].emit = mtl_emit;
-
- rmesa->hw.vpi[0].emit = veclinear_emit;
- rmesa->hw.vpi[1].emit = veclinear_emit;
- rmesa->hw.vpp[0].emit = veclinear_emit;
- rmesa->hw.vpp[1].emit = veclinear_emit;
-
- rmesa->hw.grd.emit = scl_emit;
- rmesa->hw.fog.emit = vec_emit;
- rmesa->hw.glt.emit = vec_emit;
- rmesa->hw.eye.emit = vec_emit;
+ rmesa->hw.mtl[0].emit = mtl_emit;
+ rmesa->hw.mtl[1].emit = mtl_emit;
+
+ rmesa->hw.vpi[0].emit = veclinear_emit;
+ rmesa->hw.vpi[1].emit = veclinear_emit;
+ rmesa->hw.vpp[0].emit = veclinear_emit;
+ rmesa->hw.vpp[1].emit = veclinear_emit;
+
+ rmesa->hw.grd.emit = scl_emit;
+ rmesa->hw.fog.emit = vec_emit;
+ rmesa->hw.glt.emit = vec_emit;
+ rmesa->hw.eye.emit = vec_emit;
for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++)
rmesa->hw.mat[i].emit = vec_emit;
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
index 1b23891140..56930c7863 100644
--- a/src/mesa/drivers/dri/r200/r200_swtcl.c
+++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
@@ -269,7 +269,7 @@ void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
rcommonEnsureCmdBufSpace(&rmesa->radeon,
- rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
+ radeonCountStateEmitSize(&rmesa->radeon) + (12*sizeof(int)),
__FUNCTION__);
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
index ca9a8dbf8c..7b3a9fc859 100644
--- a/src/mesa/drivers/dri/r200/r200_tcl.c
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -51,6 +51,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_swtcl.h"
#include "r200_maos.h"
+#include "radeon_common_context.h"
+
#define HAVE_POINTS 1
@@ -109,7 +111,7 @@ static GLboolean discrete_prim[0x10] = {
#define ELT_INIT(prim, hw_prim) \
r200TclPrimitive( ctx, prim, hw_prim | R200_VF_PRIM_WALK_IND )
-#define GET_MESA_ELTS() rmesa->tcl.Elts
+#define GET_MESA_ELTS() TNL_CONTEXT(ctx)->vb.Elts
/* Don't really know how many elts will fit in what's left of cmdbuf,
@@ -156,8 +158,6 @@ static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr )
if (rmesa->radeon.dma.flush)
rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->radeon.tcl.aos_count), __FUNCTION__);
-
r200EmitAOS( rmesa,
rmesa->radeon.tcl.aos_count, 0 );
@@ -187,9 +187,6 @@ static void r200EmitPrim( GLcontext *ctx,
r200TclPrimitive( ctx, prim, hwprim );
// fprintf(stderr,"Emit prim %d\n", rmesa->radeon.tcl.aos_count);
- rcommonEnsureCmdBufSpace( &rmesa->radeon,
- AOS_BUFSZ(rmesa->radeon.tcl.aos_count) +
- rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ );
r200EmitAOS( rmesa,
rmesa->radeon.tcl.aos_count,
@@ -206,6 +203,7 @@ static void r200EmitPrim( GLcontext *ctx,
r200EmitPrim( ctx, prim, hwprim, start, count ); \
(void) rmesa; } while (0)
+#define MAX_CONVERSION_SIZE 40
/* Try & join small primitives
*/
#if 0
@@ -368,6 +366,63 @@ r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
}
}
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static GLuint r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev )
+{
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint space_required;
+ GLuint state_size;
+ GLuint nr_aos = 0;
+ int i;
+ /* predict number of aos to emit */
+ for (i = 0; i < 15; ++i)
+ {
+ if (vimap_rev[i] != 255)
+ {
+ ++nr_aos;
+ }
+ }
+
+ {
+ /* count the prediction for state size */
+ space_required = 0;
+ state_size = radeonCountStateEmitSize( &rmesa->radeon );
+ /* vtx may be changed in r200EmitArrays so account for it if not dirty */
+ if (!rmesa->hw.vtx.dirty)
+ state_size += rmesa->hw.vtx.check(rmesa->radeon.glCtx, &rmesa->hw.vtx);
+ /* predict size for elements */
+ for (i = 0; i < VB->PrimitiveCount; ++i)
+ {
+ if (!VB->Primitive[i].count)
+ continue;
+ /* If primitive.count is less than MAX_CONVERSION_SIZE
+ rendering code may decide convert to elts.
+ In that case we have to make pessimistic prediction.
+ and use larger of 2 paths. */
+ const GLuint elts = ELTS_BUFSZ(nr_aos);
+ const GLuint index = INDEX_BUFSZ;
+ const GLuint vbuf = VBUF_BUFSZ;
+ if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+ || vbuf > index + elts)
+ space_required += vbuf;
+ else
+ space_required += index + elts;
+ space_required += AOS_BUFSZ(nr_aos);
+ }
+ space_required += SCISSOR_BUFSZ;
+ }
+ /* flush the buffer in case we need more than is left. */
+ if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required + state_size, __FUNCTION__))
+ return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+ else
+ return space_required + state_size;
+}
+
/**********************************************************************/
/* Render pipeline stage */
@@ -482,10 +537,10 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
/* Do the actual work:
*/
radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
+ GLuint emit_end = r200EnsureEmitSize( ctx, vimap_rev )
+ + rmesa->radeon.cmdbuf.cs->cdw;
r200EmitArrays( ctx, vimap_rev );
- rmesa->tcl.Elts = VB->Elts;
-
for (i = 0 ; i < VB->PrimitiveCount ; i++)
{
GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
@@ -495,11 +550,14 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
if (!length)
continue;
- if (rmesa->tcl.Elts)
+ if (VB->Elts)
r200EmitEltPrimitive( ctx, start, start+length, prim );
else
r200EmitPrimitive( ctx, start, start+length, prim );
}
+ if ( emit_end < rmesa->radeon.cmdbuf.cs->cdw )
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
return GL_FALSE; /* finished the pipe */
}
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
index 6b33f48885..87651716fe 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -74,6 +74,21 @@ static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt)
#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
+int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int cnt;
+ int extra = 1;
+ cnt = vpu_count(atom->cmd);
+
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ extra = 5;
+ }
+
+ return cnt ? (cnt * 4) + extra : 0;
+}
+
+
void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
@@ -81,39 +96,18 @@ void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom)
drm_r300_cmd_header_t cmd;
uint32_t addr, ndw;
- if (!r300->radeon.radeonScreen->kernel_mm) {
- uint32_t dwords;
- dwords = (*atom->check) (ctx, atom);
- BEGIN_BATCH_NO_AUTOSTATE(dwords);
- OUT_BATCH_TABLE(atom->cmd, dwords);
- END_BATCH();
- return;
- }
-
cmd.u = atom->cmd[0];
addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
- ndw = cmd.vpu.count * 4;
- if (ndw) {
+ ndw = atom->check(ctx, atom);
- if (r300->vap_flush_needed) {
- BEGIN_BATCH_NO_AUTOSTATE(15 + ndw);
+ BEGIN_BATCH_NO_AUTOSTATE(ndw);
- /* flush processing vertices */
- OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0);
- OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
- OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
- OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff);
- OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
- r300->vap_flush_needed = GL_FALSE;
- } else {
- BEGIN_BATCH_NO_AUTOSTATE(5 + ndw);
- }
- OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr);
- OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR);
- OUT_BATCH_TABLE(&atom->cmd[1], ndw);
- OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
- END_BATCH();
- }
+ ndw -= 5;
+ OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr);
+ OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR);
+ OUT_BATCH_TABLE(&atom->cmd[1], ndw);
+ OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+ END_BATCH();
}
void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom)
@@ -122,16 +116,9 @@ void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom)
BATCH_LOCALS(&r300->radeon);
drm_r300_cmd_header_t cmd;
uint32_t addr, ndw, sz;
- int type, clamp, stride;
+ int type, clamp;
- if (!r300->radeon.radeonScreen->kernel_mm) {
- uint32_t dwords;
- dwords = (*atom->check) (ctx, atom);
- BEGIN_BATCH_NO_AUTOSTATE(dwords);
- OUT_BATCH_TABLE(atom->cmd, dwords);
- END_BATCH();
- return;
- }
+ ndw = atom->check(ctx, atom);
cmd.u = atom->cmd[0];
sz = cmd.r500fp.count;
@@ -142,18 +129,34 @@ void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom)
addr |= (type << 16);
addr |= (clamp << 17);
- stride = type ? 4 : 6;
-
- ndw = sz * stride;
- if (ndw) {
+ BEGIN_BATCH_NO_AUTOSTATE(ndw);
+ OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0));
+ OUT_BATCH(addr);
+ ndw-=3;
+ OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR);
+ OUT_BATCH_TABLE(&atom->cmd[1], ndw);
+ END_BATCH();
+}
- BEGIN_BATCH_NO_AUTOSTATE(3 + ndw);
- OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0));
- OUT_BATCH(addr);
- OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR);
- OUT_BATCH_TABLE(&atom->cmd[1], ndw);
- END_BATCH();
+static int check_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
+ int dw = 0, i;
+ if (atom->cmd[0] == CP_PACKET2) {
+ return dw;
+ }
+ for(i = 0; i < numtmus; ++i) {
+ radeonTexObj *t = r300->hw.textures[i];
+ if (!t && !r300->radeon.radeonScreen->kernel_mm) {
+ dw += 0;
+ } else if (t && t->image_override && !t->bo) {
+ if (!r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+ } else
+ dw += 4;
}
+ return dw;
}
static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
@@ -243,6 +246,17 @@ void r300_emit_scissor(GLcontext *ctx)
OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT));
END_BATCH();
}
+static int check_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ uint32_t dw = 6 + 3 + 16;
+ if (r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ dw -= 3 + 16;
+ }
+ return dw;
+}
static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
{
@@ -252,7 +266,7 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
uint32_t cbpitch;
uint32_t offset = r300->radeon.state.color.draw_offset;
uint32_t dw = 6;
- int i;
+ int i;
rrb = radeon_get_colorbuffer(&r300->radeon);
if (!rrb || !rrb->bo) {
@@ -334,13 +348,23 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
}
}
+static int check_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ uint32_t dw;
+ dw = 6;
+ if (r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+ return dw;
+}
+
static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
BATCH_LOCALS(&r300->radeon);
struct radeon_renderbuffer *rrb;
uint32_t zbpitch;
- uint32_t dw;
+ uint32_t dw = atom->check(ctx, atom);
rrb = radeon_get_depthbuffer(&r300->radeon);
if (!rrb)
@@ -356,9 +380,6 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
}
}
- dw = 6;
- if (r300->radeon.radeonScreen->kernel_mm)
- dw += 2;
BEGIN_BATCH_NO_AUTOSTATE(dw);
OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
@@ -370,46 +391,6 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
END_BATCH();
}
-static void emit_gb_misc(GLcontext *ctx, struct radeon_state_atom * atom)
-{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
- BATCH_LOCALS(&r300->radeon);
- if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
- BEGIN_BATCH_NO_AUTOSTATE(4);
- OUT_BATCH(atom->cmd[0]);
- OUT_BATCH(atom->cmd[1]);
- OUT_BATCH(atom->cmd[2]);
- OUT_BATCH(atom->cmd[3]);
- END_BATCH();
- }
-}
-
-static void emit_threshold_misc(GLcontext *ctx, struct radeon_state_atom * atom)
-{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
- BATCH_LOCALS(&r300->radeon);
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- BEGIN_BATCH_NO_AUTOSTATE(3);
- OUT_BATCH(atom->cmd[0]);
- OUT_BATCH(atom->cmd[1]);
- OUT_BATCH(atom->cmd[2]);
- END_BATCH();
- }
-}
-
-static void emit_shade_misc(GLcontext *ctx, struct radeon_state_atom * atom)
-{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
- BATCH_LOCALS(&r300->radeon);
-
- if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
- BEGIN_BATCH_NO_AUTOSTATE(2);
- OUT_BATCH(atom->cmd[0]);
- OUT_BATCH(atom->cmd[1]);
- END_BATCH();
- }
-}
-
static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
@@ -427,7 +408,7 @@ static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom
format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
}
- BEGIN_BATCH_NO_AUTOSTATE(5);
+ BEGIN_BATCH_NO_AUTOSTATE(atom->cmd_size);
OUT_BATCH(atom->cmd[0]);
atom->cmd[1] &= ~0xf;
atom->cmd[1] |= format;
@@ -438,6 +419,11 @@ static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom
END_BATCH();
}
+static int check_never(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ return 0;
+}
+
static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
{
return atom->cmd_size;
@@ -454,28 +440,29 @@ static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom)
return cnt ? cnt + 1 : 0;
}
-int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
-{
- int cnt;
-
- cnt = vpu_count(atom->cmd);
- return cnt ? (cnt * 4) + 1 : 0;
-}
-
int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom)
{
int cnt;
-
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int extra = 1;
cnt = r500fp_count(atom->cmd);
- return cnt ? (cnt * 6) + 1 : 0;
+ if (r300->radeon.radeonScreen->kernel_mm)
+ extra = 3;
+
+ return cnt ? (cnt * 6) + extra : 0;
}
int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom)
{
int cnt;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int extra = 1;
+ cnt = r500fp_count(atom->cmd);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ extra = 3;
cnt = r500fp_count(atom->cmd);
- return cnt ? (cnt * 4) + 1 : 0;
+ return cnt ? (cnt * 4) + extra : 0;
}
#define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \
@@ -565,11 +552,14 @@ void r300InitCmdBuf(r300ContextPtr r300)
ALLOC_STATE(gb_enable, always, 2, 0);
r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1);
- ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
+ } else {
+ ALLOC_STATE(gb_misc, never, R300_GB_MISC_CMDSIZE, 0);
+ }
r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 3);
- r300->hw.gb_misc.emit = emit_gb_misc;
ALLOC_STATE(gb_misc2, always, R300_GB_MISC2_CMDSIZE, 0);
- r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2);
+ r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2);
ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1);
ALLOC_STATE(ga_point_s0, always, 5, 0);
@@ -584,9 +574,12 @@ void r300InitCmdBuf(r300ContextPtr r300)
r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1);
ALLOC_STATE(ga_line_stipple, always, 4, 0);
r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3);
- ALLOC_STATE(shade, always, 2, 0);
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ ALLOC_STATE(shade, always, 2, 0);
+ } else {
+ ALLOC_STATE(shade, never, 2, 0);
+ }
r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 1);
- r300->hw.shade.emit = emit_shade_misc;
ALLOC_STATE(shade2, always, 4, 0);
r300->hw.shade2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4278, 3);
ALLOC_STATE(polygon_mode, always, 4, 0);
@@ -635,11 +628,14 @@ void r300InitCmdBuf(r300ContextPtr r300)
ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0);
r300->hw.r500fp.cmd[R300_FPI_CMD_0] =
cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0);
- r300->hw.r500fp.emit = emit_r500fp;
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.r500fp.emit = emit_r500fp;
+
ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0);
r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] =
cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0);
- r300->hw.r500fp_const.emit = emit_r500fp;
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.r500fp_const.emit = emit_r500fp;
} else {
ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3);
@@ -682,15 +678,18 @@ void r300InitCmdBuf(r300ContextPtr r300)
}
ALLOC_STATE(rop, always, 2, 0);
r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1);
- ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
+ ALLOC_STATE(cb, cb_offset, R300_CB_CMDSIZE, 0);
r300->hw.cb.emit = &emit_cb_offset;
ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
- ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
- r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
- r300->hw.rb3d_discard_src_pixel_lte_threshold.emit = emit_threshold_misc;
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
+ } else {
+ ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, never, 3, 0);
+ }
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
r300->hw.zs.cmd[R300_ZS_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
@@ -700,7 +699,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4);
r300->hw.zstencil_format.emit = emit_zstencil_format;
- ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
+ ALLOC_STATE(zb, zb_offset, R300_ZB_CMDSIZE, 0);
r300->hw.zb.emit = emit_zb_offset;
ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1);
@@ -713,47 +712,72 @@ void r300InitCmdBuf(r300ContextPtr r300)
/* VPU only on TCL */
if (has_tcl) {
- int i;
+ int i;
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE(vap_flush, always, 10, 0);
+ /* flush processing vertices */
+ r300->hw.vap_flush.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
+ r300->hw.vap_flush.cmd[1] = 0;
+ r300->hw.vap_flush.cmd[2] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DSTCACHE_CTLSTAT, 1);
+ r300->hw.vap_flush.cmd[3] = R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D;
+ r300->hw.vap_flush.cmd[4] = cmdpacket0(r300->radeon.radeonScreen, RADEON_WAIT_UNTIL, 1);
+ r300->hw.vap_flush.cmd[5] = RADEON_WAIT_3D_IDLECLEAN;
+ r300->hw.vap_flush.cmd[6] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
+ r300->hw.vap_flush.cmd[7] = 0xffffff;
+ r300->hw.vap_flush.cmd[8] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
+ r300->hw.vap_flush.cmd[9] = 0;
+ } else {
+ ALLOC_STATE(vap_flush, never, 10, 0);
+ }
+
+
ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
r300->hw.vpi.cmd[0] =
- cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
- r300->hw.vpi.emit = emit_vpu;
+ cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpi.emit = emit_vpu;
if (is_r500) {
- ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
- r300->hw.vpp.cmd[0] =
- cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
- r300->hw.vpp.emit = emit_vpu;
-
- ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
- r300->hw.vps.cmd[0] =
- cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1);
- r300->hw.vps.emit = emit_vpu;
+ ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+ r300->hw.vpp.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpp.emit = emit_vpu;
+
+ ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+ r300->hw.vps.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vps.emit = emit_vpu;
for (i = 0; i < 6; i++) {
- ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
- r300->hw.vpucp[i].cmd[0] =
- cmdvpu(r300->radeon.radeonScreen,
- R500_PVS_UCP_START + i, 1);
- r300->hw.vpucp[i].emit = emit_vpu;
+ ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+ r300->hw.vpucp[i].cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen,
+ R500_PVS_UCP_START + i, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpucp[i].emit = emit_vpu;
}
} else {
- ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
- r300->hw.vpp.cmd[0] =
- cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
- r300->hw.vpp.emit = emit_vpu;
-
- ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
- r300->hw.vps.cmd[0] =
- cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1);
- r300->hw.vps.emit = emit_vpu;
+ ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+ r300->hw.vpp.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpp.emit = emit_vpu;
+
+ ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+ r300->hw.vps.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vps.emit = emit_vpu;
for (i = 0; i < 6; i++) {
ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
r300->hw.vpucp[i].cmd[0] =
cmdvpu(r300->radeon.radeonScreen,
- R300_PVS_UCP_START + i, 1);
- r300->hw.vpucp[i].emit = emit_vpu;
+ R300_PVS_UCP_START + i, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpucp[i].emit = emit_vpu;
}
}
}
@@ -777,7 +801,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0);
- ALLOC_STATE(tex.offset, variable, 1, 0);
+ ALLOC_STATE(tex.offset, tex_offsets, 1, 0);
r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0);
r300->hw.tex.offset.emit = &emit_tex_offsets;
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
index 53bcc0eeb4..1b703e518a 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
@@ -38,6 +38,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_context.h"
+#define CACHE_FLUSH_BUFSZ (4*2)
+#define PRE_EMIT_STATE_BUFSZ (2+2)
+#define AOS_BUFSZ(nr) (3+(nr >>1)*3 + (nr&1)*2 + (nr*2))
+#define FIREAOS_BUFSZ (3)
+#define SCISSORS_BUFSZ (3)
+
extern void r300InitCmdBuf(r300ContextPtr r300);
void r300_emit_scissor(GLcontext *ctx);
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index f701c434a4..0bf002caea 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -214,11 +214,8 @@ static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmes
static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon)
{
- r300ContextPtr r300 = (r300ContextPtr)radeon;
BATCH_LOCALS(radeon);
- r300->vap_flush_needed = GL_TRUE;
-
cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
BEGIN_BATCH_NO_AUTOSTATE(2);
OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH);
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index a8fe508c4a..1dadcc0a69 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -350,6 +350,7 @@ struct r300_hw_state {
struct radeon_state_atom zb_hiz_offset; /* (4F44) */
struct radeon_state_atom zb_hiz_pitch; /* (4F54) */
+ struct radeon_state_atom vap_flush;
struct radeon_state_atom vpi; /* vp instructions */
struct radeon_state_atom vpp; /* vp parameters */
struct radeon_state_atom vps; /* vertex point size (?) */
@@ -524,7 +525,6 @@ struct r300_context {
struct r300_swtcl_info swtcl;
struct r300_vertex_buffer vbuf;
struct r300_index_buffer ind_buf;
- GLboolean vap_flush_needed;
uint32_t fallback;
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index e98dc33518..56680516c8 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -39,8 +39,10 @@
#include "r300_render.h"
#include "r300_state.h"
#include "r300_tex.h"
+#include "r300_cmdbuf.h"
#include "radeon_buffer_objects.h"
+#include "radeon_common_context.h"
#include "tnl/tnl.h"
#include "tnl/t_vp_build.h"
@@ -328,7 +330,6 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st
type = GL_FLOAT;
- r300ConvertAttrib(ctx, count, input, &r300_attr);
if (input->StrideB == 0) {
r300_attr.stride = 0;
} else {
@@ -339,41 +340,14 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st
} else {
type = input->Type;
r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4;
- if (input->BufferObj->Name) {
- if (stride % 4 != 0) {
- assert(((int) input->Ptr) % input->StrideB == 0);
- r300AlignDataToDword(ctx, input, count, &r300_attr);
- r300_attr.is_named_bo = GL_FALSE;
- } else {
- r300_attr.stride = input->StrideB;
- r300_attr.bo_offset = (GLuint) input->Ptr;
- r300_attr.bo = get_radeon_buffer_object(input->BufferObj)->bo;
- r300_attr.is_named_bo = GL_TRUE;
- }
- } else {
- int size;
- uint32_t *dst;
+ if (!input->BufferObj->Name) {
if (input->StrideB == 0) {
- size = getTypeSize(input->Type) * input->Size;
- count = 1;
r300_attr.stride = 0;
} else {
- size = getTypeSize(input->Type) * input->Size * count;
r300_attr.stride = (getTypeSize(type) * input->Size + 3) & ~3;
}
- radeonAllocDmaRegion(&r300->radeon, &r300_attr.bo, &r300_attr.bo_offset, size, 32);
- assert(r300_attr.bo->ptr != NULL);
- dst = (uint32_t *)ADD_POINTERS(r300_attr.bo->ptr, r300_attr.bo_offset);
- switch (r300_attr.dwords) {
- case 1: radeonEmitVec4(dst, input->Ptr, input->StrideB, count); break;
- case 2: radeonEmitVec8(dst, input->Ptr, input->StrideB, count); break;
- case 3: radeonEmitVec12(dst, input->Ptr, input->StrideB, count); break;
- case 4: radeonEmitVec16(dst, input->Ptr, input->StrideB, count); break;
- default: assert(0); break;
- }
-
r300_attr.is_named_bo = GL_FALSE;
}
}
@@ -468,7 +442,6 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_vertex_buffer *vbuf = &r300->vbuf;
- int ret;
{
int i, tmp;
@@ -492,29 +465,83 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar
r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS);
if (r300->fallback)
return;
+}
- {
- int i;
+static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *input[], int count)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_buffer *vbuf = &r300->vbuf;
+ GLuint stride;
+ int ret;
+ int i, index;
+
+ for (index = 0; index < vbuf->num_attribs; index++) {
+ struct radeon_aos *aos = &r300->radeon.tcl.aos[index];
+ i = vbuf->attribs[index].element;
- for (i = 0; i < vbuf->num_attribs; i++) {
- struct radeon_aos *aos = &r300->radeon.tcl.aos[i];
+ stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
- aos->count = vbuf->attribs[i].stride == 0 ? 1 : count;
- aos->stride = vbuf->attribs[i].stride / sizeof(float);
- aos->offset = vbuf->attribs[i].bo_offset;
- aos->components = vbuf->attribs[i].dwords;
- aos->bo = vbuf->attribs[i].bo;
+ if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+ getTypeSize(input[i]->Type) != 4 ||
+#endif
+ stride < 4) {
+
+ r300ConvertAttrib(ctx, count, input[i], &vbuf->attribs[index]);
+ } else {
+ if (input[i]->BufferObj->Name) {
+ if (stride % 4 != 0) {
+ assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
+ r300AlignDataToDword(ctx, input[i], count, &vbuf->attribs[index]);
+ vbuf->attribs[index].is_named_bo = GL_FALSE;
+ } else {
+ vbuf->attribs[index].stride = input[i]->StrideB;
+ vbuf->attribs[index].bo_offset = (intptr_t) input[i]->Ptr;
+ vbuf->attribs[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+ vbuf->attribs[index].is_named_bo = GL_TRUE;
+ }
+ } else {
+
+ int size;
+ int local_count = count;
+ uint32_t *dst;
+
+ if (input[i]->StrideB == 0) {
+ size = getTypeSize(input[i]->Type) * input[i]->Size;
+ local_count = 1;
+ } else {
+ size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
+ }
+
+ radeonAllocDmaRegion(&r300->radeon, &vbuf->attribs[index].bo, &vbuf->attribs[index].bo_offset, size, 32);
+ assert(vbuf->attribs[index].bo->ptr != NULL);
+ dst = (uint32_t *)ADD_POINTERS(vbuf->attribs[index].bo->ptr, vbuf->attribs[index].bo_offset);
+ switch (vbuf->attribs[index].dwords) {
+ case 1: radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ case 3: radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ default: assert(0); break;
+ }
- if (vbuf->attribs[i].is_named_bo) {
- radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[i].bo, RADEON_GEM_DOMAIN_GTT, 0);
}
}
- r300->radeon.tcl.aos_count = vbuf->num_attribs;
- ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, first_elem(&r300->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
- if (ret)
- r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, GL_TRUE);
+ aos->count = vbuf->attribs[index].stride == 0 ? 1 : count;
+ aos->stride = vbuf->attribs[index].stride / sizeof(float);
+ aos->components = vbuf->attribs[index].dwords;
+ aos->bo = vbuf->attribs[index].bo;
+ aos->offset = vbuf->attribs[index].bo_offset;
+
+ if (vbuf->attribs[index].is_named_bo) {
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[index].bo, RADEON_GEM_DOMAIN_GTT, 0);
+ }
}
+
+ r300->radeon.tcl.aos_count = vbuf->num_attribs;
+ ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, first_elem(&r300->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+ r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, ret);
+
}
static void r300FreeData(GLcontext *ctx)
@@ -542,6 +569,34 @@ static void r300FreeData(GLcontext *ctx)
}
}
+static GLuint r300PredictTryDrawPrimsSize(GLcontext *ctx, GLuint nr_prims)
+{
+ struct r300_context *r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_buffer *vbuf = &r300->vbuf;
+ GLboolean flushed;
+ GLuint dwords;
+ GLuint state_size;
+
+ dwords = 2*CACHE_FLUSH_BUFSZ;
+ dwords += PRE_EMIT_STATE_BUFSZ;
+ dwords += (AOS_BUFSZ(vbuf->num_attribs)
+ + SCISSORS_BUFSZ
+ + FIREAOS_BUFSZ )*nr_prims;
+
+ state_size = radeonCountStateEmitSize(&r300->radeon);
+ flushed = rcommonEnsureCmdBufSpace(&r300->radeon,
+ dwords + state_size,
+ __FUNCTION__);
+ if (flushed)
+ dwords += radeonCountStateEmitSize(&r300->radeon);
+ else
+ dwords += state_size;
+
+ if (RADEON_DEBUG & DEBUG_PRIMS)
+ fprintf(stderr, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
+ return dwords;
+}
+
static GLboolean r300TryDrawPrims(GLcontext *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prim,
@@ -553,6 +608,10 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
struct r300_context *r300 = R300_CONTEXT(ctx);
GLuint i;
+ if (RADEON_DEBUG & DEBUG_PRIMS)
+ fprintf(stderr, "%s: %u (%d-%d) cs begin at %d\n",
+ __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw );
+
if (ctx->NewState)
_mesa_update_state( ctx );
@@ -563,14 +622,6 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx));
- /* ensure we have the cmd buf space in advance to cover
- * the state + DMA AOS pointers */
- rcommonEnsureCmdBufSpace(&r300->radeon,
- r300->radeon.hw.max_state_size + (60*sizeof(int)),
- __FUNCTION__);
-
- r300SetupIndexBuffer(ctx, ib);
-
r300SetVertexFormat(ctx, arrays, max_index + 1);
if (r300->fallback)
@@ -580,6 +631,18 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
r300UpdateShaderStates(r300);
+ /* ensure we have the cmd buf space in advance to cover
+ * the state + DMA AOS pointers */
+ GLuint emit_end = r300PredictTryDrawPrimsSize(ctx, nr_prims)
+ + r300->radeon.cmdbuf.cs->cdw;
+
+ r300SetupIndexBuffer(ctx, ib);
+
+ r300AllocDmaRegions(ctx, arrays, max_index + 1);
+
+ if (r300->fallback)
+ return GL_FALSE;
+
r300EmitCacheFlush(r300);
radeonEmitState(&r300->radeon);
@@ -591,6 +654,14 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
r300FreeData(ctx);
+ if (RADEON_DEBUG & DEBUG_PRIMS)
+ fprintf(stderr, "%s: %u (%d-%d) cs ending at %d\n",
+ __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw );
+
+ if (emit_end < r300->radeon.cmdbuf.cs->cdw)
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", r300->radeon.cmdbuf.cs->cdw - emit_end);
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
index 7ab6928247..3303078e39 100644
--- a/src/mesa/drivers/dri/r300/r300_ioctl.c
+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
@@ -507,7 +507,15 @@ static void r300EmitClearState(GLcontext * ctx)
R500_ALU_RGBA_A_SWIZ_0;
r500fp.cmd[7] = 0;
- emit_r500fp(ctx, &r500fp);
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ emit_r500fp(ctx, &r500fp);
+ } else {
+ int dwords = r500fp.check(ctx,&r500fp);
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(r500fp.cmd, dwords);
+ END_BATCH();
+ }
+
}
BEGIN_BATCH(2);
@@ -551,6 +559,7 @@ static void r300EmitClearState(GLcontext * ctx)
struct radeon_state_atom vpu;
uint32_t _cmd[10];
R300_STATECHANGE(r300, pvs);
+ R300_STATECHANGE(r300, vap_flush);
R300_STATECHANGE(r300, vpi);
BEGIN_BATCH(4);
@@ -592,8 +601,19 @@ static void r300EmitClearState(GLcontext * ctx)
PVS_SRC_REG_INPUT, NEGATE_NONE);
vpu.cmd[8] = 0x0;
- r300->vap_flush_needed = GL_TRUE;
- emit_vpu(ctx, &vpu);
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ int dwords = r300->hw.vap_flush.check(ctx,&r300->hw.vap_flush);
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(r300->hw.vap_flush.cmd, dwords);
+ END_BATCH();
+ emit_vpu(ctx, &vpu);
+ } else {
+ int dwords = vpu.check(ctx,&vpu);
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(vpu.cmd, dwords);
+ END_BATCH();
+ }
+
}
}
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
index 37a40f6c36..446cf40131 100644
--- a/src/mesa/drivers/dri/r300/r300_render.c
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -341,12 +341,6 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
if (type < 0 || num_verts <= 0)
return;
- /* Make space for at least 128 dwords.
- * This is supposed to ensure that we can get all rendering
- * commands into a single command buffer.
- */
- rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__);
-
if (rmesa->ind_buf.bo) {
GLuint first, incr, offset = 0;
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index ce0666b901..4fe9175b61 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -371,6 +371,7 @@ static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+ R300_STATECHANGE( rmesa, vap_flush );
R300_STATECHANGE( rmesa, vpucp[p] );
rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index c5edbd0052..862f212085 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -298,6 +298,8 @@ static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_ver
assert((code->length > 0) && (code->length % 4 == 0));
+ R300_STATECHANGE( r300, vap_flush );
+
switch ((dest >> 8) & 0xf) {
case 0:
R300_STATECHANGE(r300, vpi);
@@ -335,6 +337,7 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
+ R300_STATECHANGE(rmesa, vap_flush);
R300_STATECHANGE(rmesa, vpp);
param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index cdf3220a7f..d64e921bda 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -53,6 +53,8 @@
#include "r700_fragprog.h"
#include "r700_state.h"
+#include "radeon_common_context.h"
+
void r700WaitForIdle(context_t *context);
void r700WaitForIdleClean(context_t *context);
GLboolean r700SendTextureState(context_t *context);
@@ -298,22 +300,52 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
}
+/* start 3d, idle, cb/db flush */
+#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
+
+static GLuint r700PredictRenderSize(GLcontext* ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct r700_vertex_program *vpc
+ = (struct r700_vertex_program *)ctx->VertexProgram._Current;
+ struct vertex_buffer *vb = &tnl->vb;
+ GLboolean flushed;
+ GLuint dwords, i;
+ GLuint state_size;
+ /* pre calculate aos count so state prediction works */
+ context->radeon.tcl.aos_count = _mesa_bitcount(vpc->mesa_program.Base.InputsRead);
+
+ dwords = PRE_EMIT_STATE_BUFSZ;
+ for (i = 0; i < vb->PrimitiveCount; i++)
+ dwords += vb->Primitive[i].count + 10;
+ state_size = radeonCountStateEmitSize(&context->radeon);
+ flushed = rcommonEnsureCmdBufSpace(&context->radeon,
+ dwords + state_size, __FUNCTION__);
+
+ if (flushed)
+ dwords += radeonCountStateEmitSize(&context->radeon);
+ else
+ dwords += state_size;
+
+ if (RADEON_DEBUG & DEBUG_PRIMS)
+ fprintf(stderr, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
+ return dwords;
+}
+
static GLboolean r700RunRender(GLcontext * ctx,
struct tnl_pipeline_stage *stage)
{
context_t *context = R700_CONTEXT(ctx);
radeonContextPtr radeon = &context->radeon;
- unsigned int i, ind_count = 0, id = 0;
+ unsigned int i, id = 0;
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *vb = &tnl->vb;
struct radeon_renderbuffer *rrb;
- for (i = 0; i < vb->PrimitiveCount; i++)
- ind_count += vb->Primitive[i].count + 10;
-
- /* just an estimate, need to properly calculate this */
- rcommonEnsureCmdBufSpace(&context->radeon,
- radeon->hw.max_state_size + ind_count, __FUNCTION__);
+ if (RADEON_DEBUG & DEBUG_PRIMS)
+ fprintf(stderr, "%s: cs begin at %d\n",
+ __func__, context->radeon.cmdbuf.cs->cdw);
/* always emit CB base to prevent
* lock ups on some chips.
@@ -327,6 +359,9 @@ static GLboolean r700RunRender(GLcontext * ctx,
r700SetupVertexProgram(ctx);
r700SetupFragmentProgram(ctx);
r600UpdateTextureState(ctx);
+
+ GLuint emit_end = r700PredictRenderSize(ctx)
+ + context->radeon.cmdbuf.cs->cdw;
r700SetupStreams(ctx);
radeonEmitState(radeon);
@@ -336,7 +371,7 @@ static GLboolean r700RunRender(GLcontext * ctx,
GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
GLuint start = vb->Primitive[i].start;
GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
- r700RunRenderPrimitive(ctx, start, end, prim);
+ r700RunRenderPrimitive(ctx, start, end, prim);
}
/* Flush render op cached for last several quads. */
@@ -354,6 +389,14 @@ static GLboolean r700RunRender(GLcontext * ctx,
radeonReleaseArrays(ctx, ~0);
+ if (RADEON_DEBUG & DEBUG_PRIMS)
+ fprintf(stderr, "%s: cs end at %d\n",
+ __func__, context->radeon.cmdbuf.cs->cdw);
+
+ if ( emit_end < context->radeon.cmdbuf.cs->cdw )
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
+
return GL_FALSE;
}
diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
index 6bf67d2ea5..8fe9d98a0b 100644
--- a/src/mesa/drivers/dri/radeon/Makefile
+++ b/src/mesa/drivers/dri/radeon/Makefile
@@ -46,7 +46,8 @@ C_SOURCES = \
$(DRIVER_SOURCES) \
$(CS_SOURCES)
-DRIVER_DEFINES = -DRADEON_COMMON=0
+DRIVER_DEFINES = -DRADEON_COMMON=0 \
+ -Wall
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
diff --git a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
index c31421c253..410df4d665 100644
--- a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
+++ b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
@@ -3,7 +3,7 @@
#include "radeon_bocs_wrapper.h"
-void rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller);
+GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller);
int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller);
int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller);
void rcommonInitCmdBuf(radeonContextPtr rmesa);
@@ -71,7 +71,7 @@ void rcommonBeginBatch(radeonContextPtr rmesa,
*/
#define OUT_BATCH_TABLE(ptr,n) \
do { \
- radeon_cs_write_table(b_l_rmesa->cmdbuf.cs, ptr, n); \
+ radeon_cs_write_table(b_l_rmesa->cmdbuf.cs, (ptr), (n));\
} while(0)
/**
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index b5b4fed8fa..8f34fbf6bc 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -895,7 +895,7 @@ void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei he
ctx->Driver.Viewport = old_viewport;
}
-static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state)
+static void radeon_print_state_atom_prekmm(radeonContextPtr radeon, struct radeon_state_atom *state)
{
int i, j, reg;
int dwords = (*state->check) (radeon->glCtx, state);
@@ -904,6 +904,9 @@ static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state
fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size);
if (RADEON_DEBUG & DEBUG_VERBOSE) {
+ if (dwords > state->cmd_size)
+ dwords = state->cmd_size;
+
for (i = 0; i < dwords;) {
cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]);
reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo;
@@ -920,16 +923,27 @@ static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state
}
}
-static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_state_atom *state)
+static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state)
{
int i, j, reg, count;
- int dwords = (*state->check) (radeon->glCtx, state);
+ int dwords;
uint32_t packet0;
+ if (! (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) )
+ return;
+
+ if (!radeon->radeonScreen->kernel_mm) {
+ radeon_print_state_atom_prekmm(radeon, state);
+ return;
+ }
+
+ dwords = (*state->check) (radeon->glCtx, state);
fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size);
if (RADEON_DEBUG & DEBUG_VERBOSE) {
- for (i = 0; i < dwords;) {
+ if (dwords > state->cmd_size)
+ dwords = state->cmd_size;
+ for (i = 0; i < state->cmd_size;) {
packet0 = state->cmd[i];
reg = (packet0 & 0x1FFF) << 2;
count = ((packet0 & 0x3FFF0000) >> 16) + 1;
@@ -946,40 +960,84 @@ static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_s
}
}
-static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty)
+/**
+ * Count total size for next state emit.
+ **/
+GLuint radeonCountStateEmitSize(radeonContextPtr radeon)
{
- BATCH_LOCALS(radeon);
struct radeon_state_atom *atom;
+ GLuint dwords = 0;
+ /* check if we are going to emit full state */
+ if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
+ fprintf(stderr, "%s\n", __func__);
+
+ if (radeon->cmdbuf.cs->cdw && !radeon->hw.all_dirty) {
+ if (!radeon->hw.is_dirty)
+ return dwords;
+ foreach(atom, &radeon->hw.atomlist) {
+ if (atom->dirty) {
+ const GLuint atom_size = atom->check(radeon->glCtx, atom);
+ dwords += atom_size;
+ if (DEBUG_CMDBUF && atom_size) {
+ radeon_print_state_atom(radeon, atom);
+ }
+ }
+ }
+ } else {
+ foreach(atom, &radeon->hw.atomlist) {
+ const GLuint atom_size = atom->check(radeon->glCtx, atom);
+ dwords += atom_size;
+ if (DEBUG_CMDBUF && atom_size) {
+ radeon_print_state_atom(radeon, atom);
+ }
+
+ }
+ }
+ return dwords;
+}
+
+static INLINE void radeon_emit_atom(radeonContextPtr radeon, struct radeon_state_atom *atom)
+{
+ BATCH_LOCALS(radeon);
int dwords;
+ dwords = (*atom->check) (radeon->glCtx, atom);
+ if (dwords) {
+
+ radeon_print_state_atom(radeon, atom);
+
+ if (atom->emit) {
+ (*atom->emit)(radeon->glCtx, atom);
+ } else {
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
+ OUT_BATCH_TABLE(atom->cmd, dwords);
+ END_BATCH();
+ }
+ } else {
+ if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
+ fprintf(stderr, " skip state %s\n",
+ atom->name);
+ }
+ }
+ atom->dirty = GL_FALSE;
+
+}
+
+static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean emitAll)
+{
+ struct radeon_state_atom *atom;
+
if (radeon->vtbl.pre_emit_atoms)
radeon->vtbl.pre_emit_atoms(radeon);
/* Emit actual atoms */
- foreach(atom, &radeon->hw.atomlist) {
- if ((atom->dirty || radeon->hw.all_dirty) == dirty) {
- dwords = (*atom->check) (radeon->glCtx, atom);
- if (dwords) {
- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
- if (radeon->radeonScreen->kernel_mm)
- radeon_print_state_atom_kmm(radeon, atom);
- else
- radeon_print_state_atom(radeon, atom);
- }
- if (atom->emit) {
- (*atom->emit)(radeon->glCtx, atom);
- } else {
- BEGIN_BATCH_NO_AUTOSTATE(dwords);
- OUT_BATCH_TABLE(atom->cmd, dwords);
- END_BATCH();
- }
- atom->dirty = GL_FALSE;
- } else {
- if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
- fprintf(stderr, " skip state %s\n",
- atom->name);
- }
- }
+ if (radeon->hw.all_dirty || emitAll) {
+ foreach(atom, &radeon->hw.atomlist)
+ radeon_emit_atom( radeon, atom );
+ } else {
+ foreach(atom, &radeon->hw.atomlist) {
+ if ( atom->dirty )
+ radeon_emit_atom( radeon, atom );
}
}
@@ -1009,26 +1067,21 @@ void radeonEmitState(radeonContextPtr radeon)
if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty)
return;
- /* To avoid going across the entire set of states multiple times, just check
- * for enough space for the case of emitting all state, and inline the
- * radeonAllocCmdBuf code here without all the checks.
- */
- rcommonEnsureCmdBufSpace(radeon, radeon->hw.max_state_size, __FUNCTION__);
-
if (!radeon->cmdbuf.cs->cdw) {
if (RADEON_DEBUG & DEBUG_STATE)
fprintf(stderr, "Begin reemit state\n");
+ radeonEmitAtoms(radeon, GL_TRUE);
+ } else {
+
+ if (RADEON_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "Begin dirty state\n");
+
radeonEmitAtoms(radeon, GL_FALSE);
}
- if (RADEON_DEBUG & DEBUG_STATE)
- fprintf(stderr, "Begin dirty state\n");
-
- radeonEmitAtoms(radeon, GL_TRUE);
radeon->hw.is_dirty = GL_FALSE;
radeon->hw.all_dirty = GL_FALSE;
-
}
@@ -1172,12 +1225,16 @@ int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller)
*
* \param dwords The number of dwords we need to be free on the command buffer
*/
-void rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller)
+GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller)
{
- if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size ||
- radeon_cs_need_flush(rmesa->cmdbuf.cs)) {
- rcommonFlushCmdBuf(rmesa, caller);
- }
+ if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size
+ || radeon_cs_need_flush(rmesa->cmdbuf.cs)) {
+ /* If we try to flush empty buffer there is too big rendering operation. */
+ assert(rmesa->cmdbuf.cs->cdw);
+ rcommonFlushCmdBuf(rmesa, caller);
+ return GL_TRUE;
+ }
+ return GL_FALSE;
}
void rcommonInitCmdBuf(radeonContextPtr rmesa)
@@ -1252,7 +1309,6 @@ void rcommonBeginBatch(radeonContextPtr rmesa, int n,
const char *function,
int line)
{
- rcommonEnsureCmdBufSpace(rmesa, n, function);
if (!rmesa->cmdbuf.cs->cdw && dostate) {
if (RADEON_DEBUG & DEBUG_IOCTL)
fprintf(stderr, "Reemit state after flush (from %s)\n", function);
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h
index cebae18b2d..a9e1ca49eb 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common.h
@@ -24,6 +24,7 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa);
void radeonFlush(GLcontext *ctx);
void radeonFinish(GLcontext * ctx);
void radeonEmitState(radeonContextPtr radeon);
+GLuint radeonCountStateEmitSize(radeonContextPtr radeon);
void radeon_clear_tris(GLcontext *ctx, GLbitfield mask);
diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c
index a1835427f1..f306befec4 100644
--- a/src/mesa/drivers/dri/radeon/radeon_dma.c
+++ b/src/mesa/drivers/dri/radeon/radeon_dma.c
@@ -438,7 +438,7 @@ restart:
if (!rmesa->dma.flush) {
/* make sure we have enough space to use this in cmdbuf */
rcommonEnsureCmdBufSpace(rmesa,
- rmesa->hw.max_state_size + (20*sizeof(int)),
+ radeonCountStateEmitSize( rmesa ) + (20*sizeof(int)),
__FUNCTION__);
/* if cmdbuf flushed DMA restart */
if (is_empty_list(&rmesa->dma.reserved))
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
index 18805d4c57..deb53ae313 100644
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
@@ -132,16 +132,18 @@ static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa,
* r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
*/
#if RADEON_OLD_PACKETS
-#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int))
+#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2))+nr*2)
#define VERT_AOS_BUFSZ (0)
#define ELTS_BUFSZ(nr) (24 + nr * 2)
-#define VBUF_BUFSZ (6 * sizeof(int))
+#define VBUF_BUFSZ (8)
#else
-#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int))
-#define VERT_AOS_BUFSZ (5 * sizeof(int))
+#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2))
+#define VERT_AOS_BUFSZ (5)
#define ELTS_BUFSZ(nr) (16 + nr * 2)
-#define VBUF_BUFSZ (4 * sizeof(int))
+#define VBUF_BUFSZ (4)
#endif
+#define SCISSOR_BUFSZ (8)
+#define INDEX_BUFSZ (7)
static inline uint32_t cmdpacket3(int cmd_type)
diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
index 501ea0b66b..b34a4e803d 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state_init.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
@@ -197,55 +197,85 @@ static int cmdscl( int offset, int stride, int count )
return h.i;
}
-#define CHECK( NM, FLAG ) \
+#define CHECK( NM, FLAG, ADD ) \
static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
{ \
- return FLAG ? atom->cmd_size : 0; \
+ return FLAG ? atom->cmd_size + (ADD) : 0; \
}
-#define TCL_CHECK( NM, FLAG ) \
+#define TCL_CHECK( NM, FLAG, ADD ) \
static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
{ \
r100ContextPtr rmesa = R100_CONTEXT(ctx); \
- return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0; \
+ return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
}
-CHECK( always, GL_TRUE )
-CHECK( never, GL_FALSE )
-CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled )
-CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+CHECK( always, GL_TRUE, 0 )
+CHECK( always_add2, GL_TRUE, 2 )
+CHECK( always_add4, GL_TRUE, 4 )
+CHECK( never, GL_FALSE, 0 )
+CHECK( tex0_mm, ctx->Texture.Unit[0]._ReallyEnabled, 3 )
+CHECK( tex1_mm, ctx->Texture.Unit[1]._ReallyEnabled, 3 )
/* need this for the cubic_map on disabled unit 2 bug, maybe r100 only? */
-CHECK( tex2, ctx->Texture._EnabledUnits )
-CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT))
-CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT))
-CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT))
-CHECK( fog, ctx->Fog.Enabled )
-TCL_CHECK( tcl, GL_TRUE )
-TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled )
-TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled )
-TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled )
-TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
-TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled )
-TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled )
-TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled )
-TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled )
-TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled )
-TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled )
-TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled )
-TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled )
-TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled )
-TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1) )
-TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2) )
-TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4) )
-TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8) )
-TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10) )
-TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20) )
-TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled )
-
-CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT))
-CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT))
-CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT))
+CHECK( tex2_mm, ctx->Texture._EnabledUnits, 3 )
+CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled, 2 )
+CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled, 2 )
+CHECK( tex2, ctx->Texture._EnabledUnits, 2 )
+CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube0_mm, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( cube1_mm, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( cube2_mm, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( fog, ctx->Fog.Enabled, 0 )
+CHECK( fog_add4, ctx->Fog.Enabled, 4 )
+TCL_CHECK( tcl, GL_TRUE, 0 )
+TCL_CHECK( tcl_add4, GL_TRUE, 4 )
+TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex0_add4, ctx->Texture.Unit[0]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_tex1_add4, ctx->Texture.Unit[1]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_tex2_add4, ctx->Texture.Unit[2]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_eyespace_or_lighting_add4, ctx->_NeedEyeCoords || ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 0 )
+TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 0 )
+TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 0 )
+TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 0 )
+TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 0 )
+TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 0 )
+TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 0 )
+TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 0 )
+TCL_CHECK( tcl_lit0_add6, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 6 )
+TCL_CHECK( tcl_lit1_add6, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 6 )
+TCL_CHECK( tcl_lit2_add6, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 6 )
+TCL_CHECK( tcl_lit3_add6, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 6 )
+TCL_CHECK( tcl_lit4_add6, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 6 )
+TCL_CHECK( tcl_lit5_add6, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 6 )
+TCL_CHECK( tcl_lit6_add6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 6 )
+TCL_CHECK( tcl_lit7_add6, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 6 )
+TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1), 0 )
+TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2), 0 )
+TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4), 0 )
+TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8), 0 )
+TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10), 0 )
+TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20), 0 )
+TCL_CHECK( tcl_ucp0_add4, (ctx->Transform.ClipPlanesEnabled & 0x1), 4 )
+TCL_CHECK( tcl_ucp1_add4, (ctx->Transform.ClipPlanesEnabled & 0x2), 4 )
+TCL_CHECK( tcl_ucp2_add4, (ctx->Transform.ClipPlanesEnabled & 0x4), 4 )
+TCL_CHECK( tcl_ucp3_add4, (ctx->Transform.ClipPlanesEnabled & 0x8), 4 )
+TCL_CHECK( tcl_ucp4_add4, (ctx->Transform.ClipPlanesEnabled & 0x10), 4 )
+TCL_CHECK( tcl_ucp5_add4, (ctx->Transform.ClipPlanesEnabled & 0x20), 4 )
+TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 0 )
+TCL_CHECK( tcl_eyespace_or_fog_add4, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 4 )
+
+CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
#define OUT_VEC(hdr, data) do { \
drm_radeon_cmd_header_t h; \
@@ -271,9 +301,8 @@ static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r100ContextPtr r100 = R100_CONTEXT(ctx);
BATCH_LOCALS(&r100->radeon);
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
- dwords += 2;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_SCL(atom->cmd[0], atom->cmd+1);
END_BATCH();
@@ -284,9 +313,8 @@ static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r100ContextPtr r100 = R100_CONTEXT(ctx);
BATCH_LOCALS(&r100->radeon);
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
- dwords += 4;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_VEC(atom->cmd[0], atom->cmd+1);
END_BATCH();
@@ -297,9 +325,8 @@ static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r100ContextPtr r100 = R100_CONTEXT(ctx);
BATCH_LOCALS(&r100->radeon);
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
- dwords += 6;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
@@ -313,10 +340,10 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
struct radeon_renderbuffer *rrb;
uint32_t cbpitch;
uint32_t zbpitch, depth_fmt;
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
/* output the first 7 bytes of context */
- BEGIN_BATCH_NO_AUTOSTATE(dwords + 4);
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_BATCH_TABLE(atom->cmd, 5);
rrb = radeon_get_depthbuffer(&r100->radeon);
@@ -371,6 +398,28 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
END_BATCH();
}
+static int check_always_ctx( GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r100ContextPtr r100 = R100_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb, *drb;
+ uint32_t dwords;
+
+ rrb = radeon_get_colorbuffer(&r100->radeon);
+ if (!rrb || !rrb->bo) {
+ return 0;
+ }
+
+ drb = radeon_get_depthbuffer(&r100->radeon);
+
+ dwords = 10;
+ if (drb)
+ dwords += 6;
+ if (rrb)
+ dwords += 8;
+
+ return dwords;
+}
+
static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
{
r100ContextPtr r100 = R100_CONTEXT(ctx);
@@ -378,7 +427,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
struct radeon_renderbuffer *rrb, *drb;
uint32_t cbpitch = 0;
uint32_t zbpitch = 0;
- uint32_t dwords = atom->cmd_size;
+ uint32_t dwords = atom->check(ctx, atom);
uint32_t depth_fmt;
rrb = radeon_get_colorbuffer(&r100->radeon);
@@ -418,12 +467,6 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
}
- /* output the first 7 bytes of context */
- dwords = 10;
- if (drb)
- dwords += 6;
- if (rrb)
- dwords += 8;
BEGIN_BATCH_NO_AUTOSTATE(dwords);
/* In the CS case we need to split this up */
@@ -474,7 +517,7 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
{
r100ContextPtr r100 = R100_CONTEXT(ctx);
BATCH_LOCALS(&r100->radeon);
- uint32_t dwords = 3;
+ uint32_t dwords = atom->check(ctx, atom);
int i = atom->idx, j;
radeonTexObj *t = r100->state.texture.unit[i].texobj;
radeon_mipmap_level *lvl;
@@ -488,7 +531,7 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
if (!t->mt)
return;
- BEGIN_BATCH_NO_AUTOSTATE(dwords + (5 * 3));
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_BATCH_TABLE(atom->cmd, 3);
lvl = &t->mt->levels[0];
for (j = 0; j < 5; j++) {
@@ -502,7 +545,7 @@ static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
{
r100ContextPtr r100 = R100_CONTEXT(ctx);
BATCH_LOCALS(&r100->radeon);
- uint32_t dwords = 2;
+ uint32_t dwords = atom->check(ctx, atom);
int i = atom->idx, j;
radeonTexObj *t = r100->state.texture.unit[i].texobj;
radeon_mipmap_level *lvl;
@@ -523,7 +566,7 @@ static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
default:
case 0: base_reg = RADEON_PP_CUBIC_OFFSET_T0_0; break;
};
- BEGIN_BATCH_NO_AUTOSTATE(dwords + (5 * 4));
+ BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_BATCH_TABLE(atom->cmd, 2);
lvl = &t->mt->levels[0];
for (j = 0; j < 5; j++) {
@@ -665,10 +708,11 @@ void radeonInitState( r100ContextPtr rmesa )
/* Allocate state buffers:
*/
- ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 );
- if (rmesa->radeon.radeonScreen->kernel_mm)
+ ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE, "CTX/context", 0 );
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
rmesa->hw.ctx.emit = ctx_emit_cs;
- else
+ rmesa->hw.ctx.check = check_always_ctx;
+ } else
rmesa->hw.ctx.emit = ctx_emit;
ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
@@ -678,13 +722,63 @@ void radeonInitState( r100ContextPtr rmesa )
ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
ALLOC_STATE( tcl, always, TCL_STATE_SIZE, "TCL/tcl", 1 );
ALLOC_STATE( mtl, tcl_lighting, MTL_STATE_SIZE, "MTL/material", 1 );
- ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 );
- ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
- ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
- ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
- ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
- ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
- ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2);
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE( grd, always_add2, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+ ALLOC_STATE( fog, fog_add4, FOG_STATE_SIZE, "FOG/fog", 1 );
+ ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 1 );
+ ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+ ALLOC_STATE_IDX( tex[0], tex0_mm, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
+ ALLOC_STATE_IDX( tex[1], tex1_mm, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
+ ALLOC_STATE_IDX( tex[2], tex2_mm, TEX_STATE_SIZE, "TEX/tex-2", 0, 2);
+ ALLOC_STATE( mat[0], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+ ALLOC_STATE( mat[1], tcl_eyespace_or_fog_add4, MAT_STATE_SIZE, "MAT/modelview", 1 );
+ ALLOC_STATE( mat[2], tcl_eyespace_or_lighting_add4, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+ ALLOC_STATE( mat[3], tcl_tex0_add4, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+ ALLOC_STATE( mat[4], tcl_tex1_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+ ALLOC_STATE( mat[5], tcl_tex2_add4, MAT_STATE_SIZE, "MAT/texmat2", 1 );
+ ALLOC_STATE( lit[0], tcl_lit0_add6, LIT_STATE_SIZE, "LIT/light-0", 1 );
+ ALLOC_STATE( lit[1], tcl_lit1_add6, LIT_STATE_SIZE, "LIT/light-1", 1 );
+ ALLOC_STATE( lit[2], tcl_lit2_add6, LIT_STATE_SIZE, "LIT/light-2", 1 );
+ ALLOC_STATE( lit[3], tcl_lit3_add6, LIT_STATE_SIZE, "LIT/light-3", 1 );
+ ALLOC_STATE( lit[4], tcl_lit4_add6, LIT_STATE_SIZE, "LIT/light-4", 1 );
+ ALLOC_STATE( lit[5], tcl_lit5_add6, LIT_STATE_SIZE, "LIT/light-5", 1 );
+ ALLOC_STATE( lit[6], tcl_lit6_add6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+ ALLOC_STATE( lit[7], tcl_lit7_add6, LIT_STATE_SIZE, "LIT/light-7", 1 );
+ ALLOC_STATE( ucp[0], tcl_ucp0_add4, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+ ALLOC_STATE( ucp[1], tcl_ucp1_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+ ALLOC_STATE( ucp[2], tcl_ucp2_add4, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+ ALLOC_STATE( ucp[3], tcl_ucp3_add4, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+ ALLOC_STATE( ucp[4], tcl_ucp4_add4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+ ALLOC_STATE( ucp[5], tcl_ucp5_add4, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+ } else {
+ ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+ ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
+ ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
+ ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+ ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
+ ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
+ ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2);
+ ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+ ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
+ ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+ ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+ ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+ ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 );
+ ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 );
+ ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 );
+ ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 );
+ ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 );
+ ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 );
+ ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
+ ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+ ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
+ ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+ ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+ ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+ ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+ ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+ ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+ }
for (i = 0; i < 3; i++) {
if (rmesa->radeon.radeonScreen->kernel_mm)
@@ -694,14 +788,19 @@ void radeonInitState( r100ContextPtr rmesa )
}
if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
{
- ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
- ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
- ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
- for (i = 0; i < 3; i++)
- if (rmesa->radeon.radeonScreen->kernel_mm)
- rmesa->hw.cube[i].emit = cube_emit_cs;
- else
- rmesa->hw.cube[i].emit = cube_emit;
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE_IDX( cube[0], cube0_mm, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+ ALLOC_STATE_IDX( cube[1], cube1_mm, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+ ALLOC_STATE_IDX( cube[2], cube2_mm, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+ for (i = 0; i < 3; i++)
+ rmesa->hw.cube[i].emit = cube_emit_cs;
+ } else {
+ ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+ ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+ ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+ for (i = 0; i < 3; i++)
+ rmesa->hw.cube[i].emit = cube_emit;
+ }
}
else
{
@@ -709,26 +808,6 @@ void radeonInitState( r100ContextPtr rmesa )
ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
}
- ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
- ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
- ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
- ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 );
- ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 );
- ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 );
- ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
- ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
- ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
- ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
- ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
- ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
- ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 );
- ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 );
- ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 );
- ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 );
- ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 );
- ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
- ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
- ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 );
ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 );
ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 );
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index 58b3be9391..20ce6c470b 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -285,7 +285,7 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
r100ContextPtr rmesa = R100_CONTEXT(ctx);
rcommonEnsureCmdBufSpace(&rmesa->radeon,
- rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
+ radeonCountStateEmitSize( &rmesa->radeon ) + (12*sizeof(int)),
__FUNCTION__);
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
index df6708f05e..2404f28450 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -50,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_tcl.h"
#include "radeon_swtcl.h"
#include "radeon_maos.h"
+#include "radeon_common_context.h"
@@ -149,9 +150,6 @@ static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
if (rmesa->radeon.dma.flush)
rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->radeon.hw.max_state_size + ELTS_BUFSZ(nr) +
- AOS_BUFSZ(rmesa->radeon.tcl.aos_count), __FUNCTION__);
-
radeonEmitAOS( rmesa,
rmesa->radeon.tcl.aos_count, 0 );
@@ -176,10 +174,6 @@ static void radeonEmitPrim( GLcontext *ctx,
r100ContextPtr rmesa = R100_CONTEXT( ctx );
radeonTclPrimitive( ctx, prim, hwprim );
- rcommonEnsureCmdBufSpace( &rmesa->radeon,
- AOS_BUFSZ(rmesa->radeon.tcl.aos_count) +
- rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ );
-
radeonEmitAOS( rmesa,
rmesa->radeon.tcl.aos_count,
start );
@@ -196,6 +190,8 @@ static void radeonEmitPrim( GLcontext *ctx,
radeonEmitPrim( ctx, prim, hwprim, start, count ); \
(void) rmesa; } while (0)
+#define MAX_CONVERSION_SIZE 40
+
/* Try & join small primitives
*/
#if 0
@@ -360,6 +356,73 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
}
}
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static GLuint radeonEnsureEmitSize( GLcontext * ctx , GLuint inputs )
+{
+ r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint space_required;
+ GLuint state_size;
+ GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
+ int i;
+ /* list of flags that are allocating aos object */
+ const GLuint flags_to_check[] = {
+ VERT_BIT_NORMAL,
+ VERT_BIT_COLOR0,
+ VERT_BIT_COLOR1,
+ VERT_BIT_FOG
+ };
+ /* predict number of aos to emit */
+ for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i)
+ {
+ if (inputs & flags_to_check[i])
+ ++nr_aos;
+ }
+ for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
+ {
+ if (inputs & VERT_BIT_TEX(i))
+ ++nr_aos;
+ }
+
+ {
+ /* count the prediction for state size */
+ space_required = 0;
+ state_size = radeonCountStateEmitSize( &rmesa->radeon );
+ /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
+ if (!rmesa->hw.tcl.dirty)
+ state_size += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl );
+ /* predict size for elements */
+ for (i = 0; i < VB->PrimitiveCount; ++i)
+ {
+ if (!VB->Primitive[i].count)
+ continue;
+ /* If primitive.count is less than MAX_CONVERSION_SIZE
+ rendering code may decide convert to elts.
+ In that case we have to make pessimistic prediction.
+ and use larger of 2 paths. */
+ const GLuint elts = ELTS_BUFSZ(nr_aos);
+ const GLuint index = INDEX_BUFSZ;
+ const GLuint vbuf = VBUF_BUFSZ;
+ if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+ || vbuf > index + elts)
+ space_required += vbuf;
+ else
+ space_required += index + elts;
+ space_required += AOS_BUFSZ(nr_aos);
+ }
+ space_required += SCISSOR_BUFSZ;
+ }
+ /* flush the buffer in case we need more than is left. */
+ if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__))
+ return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+ else
+ return space_required + state_size;
+}
+
/**********************************************************************/
/* Render pipeline stage */
/**********************************************************************/
@@ -410,6 +473,8 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
}
radeonReleaseArrays( ctx, ~0 );
+ GLuint emit_end = radeonEnsureEmitSize( ctx, inputs )
+ + rmesa->radeon.cmdbuf.cs->cdw;
radeonEmitArrays( ctx, inputs );
rmesa->tcl.Elts = VB->Elts;
@@ -429,6 +494,10 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
radeonEmitPrimitive( ctx, start, start+length, prim );
}
+ if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
+
return GL_FALSE; /* finished the pipe */
}