From e64166703a27c5b1127373b1dff3b93e617bcaea Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Sun, 11 Mar 2007 12:18:27 +1100 Subject: Renamed some of the unkXXX variables in the command buffer init ialization code. Note that there are still plenty of actual unknown variables left that should probably be deciphered. There are a number of things incomplete in the driver; the different polygon offset modes (line, point, etc), the other texture filter, texture chroma key, etc. These should probably be fixed in the future, or at least added to the TODO list. --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 54 ++++++++++++------------ src/mesa/drivers/dri/r300/r300_context.h | 19 +++++---- src/mesa/drivers/dri/r300/r300_state.c | 72 ++++++++++++++++---------------- 3 files changed, 73 insertions(+), 72 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 2c7b5aa011..d5f4c917f0 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -291,14 +291,14 @@ void r300InitCmdBuf(r300ContextPtr r300) /* Initialize state atoms */ ALLOC_STATE( vpt, always, R300_VPT_CMDSIZE, "vpt", 0 ); r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6); - ALLOC_STATE( unk2080, always, 2, "unk2080", 0 ); - r300->hw.unk2080.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1); + ALLOC_STATE( vap_cntl, always, 2, "vap_cntl", 0 ); + r300->hw.vap_cntl.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1); ALLOC_STATE( vte, always, 3, "vte", 0 ); r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2); ALLOC_STATE( unk2134, always, 3, "unk2134", 0 ); r300->hw.unk2134.cmd[0] = cmdpacket0(0x2134, 2); - ALLOC_STATE( unk2140, always, 2, "unk2140", 0 ); - r300->hw.unk2140.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1); + ALLOC_STATE( vap_cntl_status, always, 2, "vap_cntl_status", 0 ); + r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1); ALLOC_STATE( vir[0], variable, R300_VIR_CMDSIZE, "vir/0", 0 ); r300->hw.vir[0].cmd[R300_VIR_CMD_0] = cmdpacket0(R300_VAP_INPUT_ROUTE_0_0, 1); ALLOC_STATE( vir[1], variable, R300_VIR_CMDSIZE, "vir/1", 1 ); @@ -335,18 +335,18 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.lcntl.cmd[0] = cmdpacket0(R300_RE_LINE_CNT, 1); ALLOC_STATE( unk4260, always, 4, "unk4260", 0 ); r300->hw.unk4260.cmd[0] = cmdpacket0(0x4260, 3); - ALLOC_STATE( unk4274, always, 5, "unk4274", 0 ); - r300->hw.unk4274.cmd[0] = cmdpacket0(R300_RE_SHADE, 4); - ALLOC_STATE( unk4288, always, 4, "unk4288", 0 ); - r300->hw.unk4288.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3); + ALLOC_STATE( shade, always, 5, "shade", 0 ); + r300->hw.shade.cmd[0] = cmdpacket0(R300_RE_SHADE, 4); + ALLOC_STATE( polygon_mode, always, 4, "unk4288", 0 ); + r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3); ALLOC_STATE( fogp, always, 3, "fogp", 0 ); r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2); ALLOC_STATE( unk42A0, always, 2, "unk42A0", 0 ); r300->hw.unk42A0.cmd[0] = cmdpacket0(0x42A0, 1); ALLOC_STATE( zbs, always, R300_ZBS_CMDSIZE, "zbs", 0 ); r300->hw.zbs.cmd[R300_ZBS_CMD_0] = cmdpacket0(R300_RE_ZBIAS_T_FACTOR, 4); - ALLOC_STATE( unk42B4, always, 2, "unk42B4", 0 ); - r300->hw.unk42B4.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1); + ALLOC_STATE( occlusion_cntl, always, 2, "occlusion_cntl", 0 ); + r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1); ALLOC_STATE( cul, always, R300_CUL_CMDSIZE, "cul", 0 ); r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_RE_CULL_CNTL, 1); ALLOC_STATE( unk42C0, always, 3, "unk42C0", 0 ); @@ -392,8 +392,8 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2); ALLOC_STATE( cmk, always, R300_CMK_CMDSIZE, "cmk", 0 ); r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(R300_RB3D_COLORMASK, 1); - ALLOC_STATE( unk4E10, always, 4, "unk4E10", 0 ); - r300->hw.unk4E10.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 3); + ALLOC_STATE( blend_color, always, 4, "blend_color", 0 ); + r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 3); ALLOC_STATE( cb, always, R300_CB_CMDSIZE, "cb", 0 ); r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1); r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1); @@ -405,8 +405,8 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.unk4EA0.cmd[0] = cmdpacket0(0x4EA0, 2); ALLOC_STATE( zs, always, R300_ZS_CMDSIZE, "zstencil", 0 ); r300->hw.zs.cmd[R300_ZS_CMD_0] = cmdpacket0(R300_RB3D_ZSTENCIL_CNTL_0, 3); - ALLOC_STATE( unk4F10, always, 5, "unk4F10", 0 ); - r300->hw.unk4F10.cmd[0] = cmdpacket0(R300_RB3D_ZSTENCIL_FORMAT, 4); + ALLOC_STATE( zstencil_format, always, 5, "zstencil_format", 0 ); + r300->hw.zstencil_format.cmd[0] = cmdpacket0(R300_RB3D_ZSTENCIL_FORMAT, 4); ALLOC_STATE( zb, always, R300_ZB_CMDSIZE, "zb", 0 ); r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_RB3D_DEPTHOFFSET, 2); ALLOC_STATE( unk4F28, always, 2, "unk4F28", 0 ); @@ -429,8 +429,8 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE( tex.filter, variable, mtu+1, "tex_filter", 0 ); r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER_0, 0); - ALLOC_STATE( tex.unknown1, variable, mtu+1, "tex_unknown1", 0 ); - r300->hw.tex.unknown1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, 0); + ALLOC_STATE( tex.filter_1, variable, mtu+1, "tex_filter_1", 0 ); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, 0); ALLOC_STATE( tex.size, variable, mtu+1, "tex_size", 0 ); r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0); @@ -444,8 +444,8 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE( tex.offset, variable, mtu+1, "tex_offset", 0 ); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_OFFSET_0, 0); - ALLOC_STATE( tex.unknown4, variable, mtu+1, "tex_unknown4", 0 ); - r300->hw.tex.unknown4.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, 0); + ALLOC_STATE( tex.chroma_key, variable, mtu+1, "tex_chroma_key", 0 ); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, 0); ALLOC_STATE( tex.border_color, variable, mtu+1, "tex_border_color", 0 ); r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_BORDER_COLOR_0, 0); @@ -456,10 +456,10 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.atomlist.name = "atom-list"; insert_at_tail(&r300->hw.atomlist, &r300->hw.vpt); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2080); + insert_at_tail(&r300->hw.atomlist, &r300->hw.vap_cntl); insert_at_tail(&r300->hw.atomlist, &r300->hw.vte); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2134); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2140); + insert_at_tail(&r300->hw.atomlist, &r300->hw.vap_cntl_status); insert_at_tail(&r300->hw.atomlist, &r300->hw.vir[0]); insert_at_tail(&r300->hw.atomlist, &r300->hw.vir[1]); insert_at_tail(&r300->hw.atomlist, &r300->hw.vic); @@ -478,12 +478,12 @@ void r300InitCmdBuf(r300ContextPtr r300) insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4230); insert_at_tail(&r300->hw.atomlist, &r300->hw.lcntl); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4260); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4274); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4288); + insert_at_tail(&r300->hw.atomlist, &r300->hw.shade); + insert_at_tail(&r300->hw.atomlist, &r300->hw.polygon_mode); insert_at_tail(&r300->hw.atomlist, &r300->hw.fogp); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42A0); insert_at_tail(&r300->hw.atomlist, &r300->hw.zbs); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42B4); + insert_at_tail(&r300->hw.atomlist, &r300->hw.occlusion_cntl); insert_at_tail(&r300->hw.atomlist, &r300->hw.cul); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42C0); insert_at_tail(&r300->hw.atomlist, &r300->hw.rc); @@ -506,13 +506,13 @@ void r300InitCmdBuf(r300ContextPtr r300) insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E00); insert_at_tail(&r300->hw.atomlist, &r300->hw.bld); insert_at_tail(&r300->hw.atomlist, &r300->hw.cmk); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E10); + insert_at_tail(&r300->hw.atomlist, &r300->hw.blend_color); insert_at_tail(&r300->hw.atomlist, &r300->hw.cb); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E50); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E88); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4EA0); insert_at_tail(&r300->hw.atomlist, &r300->hw.zs); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F10); + insert_at_tail(&r300->hw.atomlist, &r300->hw.zstencil_format); insert_at_tail(&r300->hw.atomlist, &r300->hw.zb); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F28); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F30); @@ -524,12 +524,12 @@ void r300InitCmdBuf(r300ContextPtr r300) insert_at_tail(&r300->hw.atomlist, &r300->hw.vps); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.filter); - insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.unknown1); + insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.filter_1); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.size); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.format); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.pitch); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.offset); - insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.unknown4); + insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.chroma_key); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.border_color); r300->hw.is_dirty = GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index a339b2f9c1..43de5dfea3 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -180,6 +180,7 @@ struct r300_tex_obj { /* hardware register values */ /* Note that R200 has 8 registers per texture and R300 only 7 */ GLuint filter; + GLuint filter_1; GLuint pitch_reg; GLuint size; /* npot only */ GLuint format; @@ -432,11 +433,11 @@ struct r300_hw_state { int max_state_size; /* in dwords */ struct r300_state_atom vpt; /* viewport (1D98) */ - struct r300_state_atom unk2080; /* (2080) */ + struct r300_state_atom vap_cntl; struct r300_state_atom vof; /* VAP output format register 0x2090 */ struct r300_state_atom vte; /* (20B0) */ struct r300_state_atom unk2134; /* (2134) */ - struct r300_state_atom unk2140; /* (2140) */ + struct r300_state_atom vap_cntl_status; struct r300_state_atom vir[2]; /* vap input route (2150/21E0) */ struct r300_state_atom vic; /* vap input control (2180) */ struct r300_state_atom unk21DC; /* (21DC) */ @@ -452,13 +453,13 @@ struct r300_hw_state { struct r300_state_atom unk4230; /* (4230) */ struct r300_state_atom lcntl; /* line control */ struct r300_state_atom unk4260; /* (4260) */ - struct r300_state_atom unk4274; /* (4274) */ - struct r300_state_atom unk4288; /* (4288) */ + struct r300_state_atom shade; + struct r300_state_atom polygon_mode; struct r300_state_atom fogp; /* fog parameters (4294) */ struct r300_state_atom unk429C; /* (429C) */ struct r300_state_atom unk42A0; /* (42A0) */ struct r300_state_atom zbs; /* zbias (42A4) */ - struct r300_state_atom unk42B4; /* (42B4) */ + struct r300_state_atom occlusion_cntl; struct r300_state_atom cul; /* cull cntl (42B8) */ struct r300_state_atom unk42C0; /* (42C0) */ struct r300_state_atom rc; /* rs control (4300) */ @@ -478,13 +479,13 @@ struct r300_hw_state { struct r300_state_atom unk4E00; /* (4E00) */ struct r300_state_atom bld; /* blending (4E04) */ struct r300_state_atom cmk; /* colormask (4E0C) */ - struct r300_state_atom unk4E10; /* constant blend color + ??? (4E10) */ + struct r300_state_atom blend_color; /* constant blend color */ struct r300_state_atom cb; /* colorbuffer (4E28) */ struct r300_state_atom unk4E50; /* (4E50) */ struct r300_state_atom unk4E88; /* (4E88) */ struct r300_state_atom unk4EA0; /* (4E88) I saw it only written on RV350 hardware.. */ struct r300_state_atom zs; /* zstencil control (4F00) */ - struct r300_state_atom unk4F10; /* (4F10) */ + struct r300_state_atom zstencil_format; struct r300_state_atom zb; /* z buffer (4F20) */ struct r300_state_atom unk4F28; /* (4F28) */ struct r300_state_atom unk4F30; /* (4F30) */ @@ -501,12 +502,12 @@ struct r300_hw_state { updating the whole thing at once */ struct { struct r300_state_atom filter; - struct r300_state_atom unknown1; + struct r300_state_atom filter_1; struct r300_state_atom size; struct r300_state_atom format; struct r300_state_atom pitch; struct r300_state_atom offset; - struct r300_state_atom unknown4; + struct r300_state_atom chroma_key; struct r300_state_atom border_color; } tex; struct r300_state_atom txe; /* tex enable (4104) */ diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 0e33e51ef3..402f7508a4 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -69,14 +69,14 @@ static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) GLubyte color[4]; r300ContextPtr rmesa = R300_CONTEXT(ctx); - R300_STATECHANGE(rmesa, unk4E10); + R300_STATECHANGE(rmesa, blend_color); CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]); CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]); CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]); CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]); - rmesa->hw.unk4E10.cmd[1] = r300PackColor(4, color[3], color[0], + rmesa->hw.blend_color.cmd[1] = r300PackColor(4, color[3], color[0], color[1], color[2]); } @@ -335,17 +335,17 @@ static void update_early_z(GLcontext *ctx) */ r300ContextPtr r300 = R300_CONTEXT(ctx); - R300_STATECHANGE(r300, unk4F10); + R300_STATECHANGE(r300, zstencil_format); if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) /* disable early Z */ - r300->hw.unk4F10.cmd[2] = R300_EARLY_Z_DISABLE; + r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; else { if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) /* enable early Z */ - r300->hw.unk4F10.cmd[2] = R300_EARLY_Z_ENABLE; + r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_ENABLE; else /* disable early Z */ - r300->hw.unk4F10.cmd[2] = R300_EARLY_Z_DISABLE; + r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; } } @@ -531,11 +531,11 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state) break; case GL_POLYGON_OFFSET_FILL: - R300_STATECHANGE(r300, unk42B4); + R300_STATECHANGE(r300, occlusion_cntl); if(state){ - r300->hw.unk42B4.cmd[1] |= (3<<0); + r300->hw.occlusion_cntl.cmd[1] |= (3<<0); } else { - r300->hw.unk42B4.cmd[1] &= ~(3<<0); + r300->hw.occlusion_cntl.cmd[1] &= ~(3<<0); } break; default: @@ -589,9 +589,9 @@ static void r300UpdatePolygonMode(GLcontext *ctx) } } - if (r300->hw.unk4288.cmd[1] != hw_mode) { - R300_STATECHANGE(r300, unk4288); - r300->hw.unk4288.cmd[1] = hw_mode; + if (r300->hw.polygon_mode.cmd[1] != hw_mode) { + R300_STATECHANGE(r300, polygon_mode); + r300->hw.polygon_mode.cmd[1] = hw_mode; } } @@ -830,13 +830,13 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - R300_STATECHANGE(rmesa, unk4274); + R300_STATECHANGE(rmesa, shade); switch (mode) { case GL_FLAT: - rmesa->hw.unk4274.cmd[2] = R300_RE_SHADE_MODEL_FLAT; + rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT; break; case GL_SMOOTH: - rmesa->hw.unk4274.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH; + rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH; break; default: return; @@ -1217,12 +1217,12 @@ void r300_setup_textures(GLcontext *ctx) R300_STATECHANGE(r300, txe); R300_STATECHANGE(r300, tex.filter); - R300_STATECHANGE(r300, tex.unknown1); + R300_STATECHANGE(r300, tex.filter_1); R300_STATECHANGE(r300, tex.size); R300_STATECHANGE(r300, tex.format); R300_STATECHANGE(r300, tex.pitch); R300_STATECHANGE(r300, tex.offset); - R300_STATECHANGE(r300, tex.unknown4); + R300_STATECHANGE(r300, tex.chroma_key); R300_STATECHANGE(r300, tex.border_color); r300->hw.txe.cmd[R300_TXE_ENABLE]=0x0; @@ -1259,7 +1259,7 @@ void r300_setup_textures(GLcontext *ctx) r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + hw_tmu] = gen_fixed_filter(t->filter) | (hw_tmu << 28); /* Currently disabled! */ - r300->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80; + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80; r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->size; r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->format; r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->pitch_reg; @@ -1273,7 +1273,7 @@ void r300_setup_textures(GLcontext *ctx) WARN_ONCE("micro tiling enabled!\n"); } - r300->hw.tex.unknown4.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; + r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->pp_border_color; last_hw_tmu = hw_tmu; @@ -1283,12 +1283,12 @@ void r300_setup_textures(GLcontext *ctx) } r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER_0, last_hw_tmu + 1); - r300->hw.tex.unknown1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1); r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1); r300->hw.tex.format.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1); r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_PITCH_0, last_hw_tmu + 1); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1); - r300->hw.tex.unknown4.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1); r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); @@ -1973,7 +1973,7 @@ void r300ResetHwState(r300ContextPtr r300) /* Initialize magic registers TODO : learn what they really do, or get rid of those we don't have to touch */ - r300->hw.unk2080.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */ + r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */ r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA | R300_VPORT_X_OFFSET_ENA @@ -1987,9 +1987,9 @@ void r300ResetHwState(r300ContextPtr r300) r300->hw.unk2134.cmd[1] = 0x00FFFFFF; r300->hw.unk2134.cmd[2] = 0x00000000; if (_mesa_little_endian()) - r300->hw.unk2140.cmd[1] = 0x00000000; + r300->hw.vap_cntl_status.cmd[1] = 0x00000000; else - r300->hw.unk2140.cmd[1] = 0x00000002; + r300->hw.vap_cntl_status.cmd[1] = 0x00000002; #if 0 /* Done in setup routing */ ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->packet0.count = 1; @@ -2080,15 +2080,15 @@ void r300ResetHwState(r300ContextPtr r300) r300->hw.unk4260.cmd[2] = r300PackFloat32(0.0); r300->hw.unk4260.cmd[3] = r300PackFloat32(1.0); - r300->hw.unk4274.cmd[1] = 0x00000002; + r300->hw.shade.cmd[1] = 0x00000002; r300ShadeModel(ctx, ctx->Light.ShadeModel); - r300->hw.unk4274.cmd[3] = 0x00000000; - r300->hw.unk4274.cmd[4] = 0x00000000; + r300->hw.shade.cmd[3] = 0x00000000; + r300->hw.shade.cmd[4] = 0x00000000; r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode); r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode); - r300->hw.unk4288.cmd[2] = 0x00000001; - r300->hw.unk4288.cmd[3] = 0x00000000; + r300->hw.polygon_mode.cmd[2] = 0x00000001; + r300->hw.polygon_mode.cmd[3] = 0x00000000; r300->hw.unk42A0.cmd[1] = 0x00000000; r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, ctx->Polygon.OffsetUnits); @@ -2147,8 +2147,8 @@ void r300ResetHwState(r300ContextPtr r300) #endif r300BlendColor(ctx, ctx->Color.BlendColor); - r300->hw.unk4E10.cmd[2] = 0; - r300->hw.unk4E10.cmd[3] = 0; + r300->hw.blend_color.cmd[2] = 0; + r300->hw.blend_color.cmd[3] = 0; /* Again, r300ClearBuffer uses this */ r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + @@ -2180,10 +2180,10 @@ void r300ResetHwState(r300ContextPtr r300) switch (ctx->Visual.depthBits) { case 16: - r300->hw.unk4F10.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; break; case 24: - r300->hw.unk4F10.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; break; default: fprintf(stderr, "Error: Unsupported depth %d... exiting\n", @@ -2192,10 +2192,10 @@ void r300ResetHwState(r300ContextPtr r300) } /* z compress? */ - //r300->hw.unk4F10.cmd[1] |= R300_DEPTH_FORMAT_UNK32; + //r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32; - r300->hw.unk4F10.cmd[3] = 0x00000003; - r300->hw.unk4F10.cmd[4] = 0x00000000; + r300->hw.zstencil_format.cmd[3] = 0x00000003; + r300->hw.zstencil_format.cmd[4] = 0x00000000; r300->hw.zb.cmd[R300_ZB_OFFSET] = r300->radeon.radeonScreen->depthOffset + -- cgit v1.2.3 From 69e57cf6ae0af823e903926bafa8daa46b11352c Mon Sep 17 00:00:00 2001 From: Aapo Tahkola Date: Sun, 11 Mar 2007 11:47:03 +0200 Subject: Guess another unknown register in R300 command buffer initialization. (Oliver McFadden) --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 6 +++--- src/mesa/drivers/dri/r300/r300_context.h | 2 +- src/mesa/drivers/dri/r300/r300_reg.h | 1 + src/mesa/drivers/dri/r300/r300_state.c | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index d5f4c917f0..89725447f1 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -341,8 +341,8 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3); ALLOC_STATE( fogp, always, 3, "fogp", 0 ); r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2); - ALLOC_STATE( unk42A0, always, 2, "unk42A0", 0 ); - r300->hw.unk42A0.cmd[0] = cmdpacket0(0x42A0, 1); + ALLOC_STATE( zbias_cntl, always, 2, "zbias_cntl", 0 ); + r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_RE_ZBIAS_CNTL, 1); ALLOC_STATE( zbs, always, R300_ZBS_CMDSIZE, "zbs", 0 ); r300->hw.zbs.cmd[R300_ZBS_CMD_0] = cmdpacket0(R300_RE_ZBIAS_T_FACTOR, 4); ALLOC_STATE( occlusion_cntl, always, 2, "occlusion_cntl", 0 ); @@ -481,7 +481,7 @@ void r300InitCmdBuf(r300ContextPtr r300) insert_at_tail(&r300->hw.atomlist, &r300->hw.shade); insert_at_tail(&r300->hw.atomlist, &r300->hw.polygon_mode); insert_at_tail(&r300->hw.atomlist, &r300->hw.fogp); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42A0); + insert_at_tail(&r300->hw.atomlist, &r300->hw.zbias_cntl); insert_at_tail(&r300->hw.atomlist, &r300->hw.zbs); insert_at_tail(&r300->hw.atomlist, &r300->hw.occlusion_cntl); insert_at_tail(&r300->hw.atomlist, &r300->hw.cul); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 43de5dfea3..bd9ed6f170 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -457,7 +457,7 @@ struct r300_hw_state { struct r300_state_atom polygon_mode; struct r300_state_atom fogp; /* fog parameters (4294) */ struct r300_state_atom unk429C; /* (429C) */ - struct r300_state_atom unk42A0; /* (42A0) */ + struct r300_state_atom zbias_cntl; struct r300_state_atom zbs; /* zbias (42A4) */ struct r300_state_atom occlusion_cntl; struct r300_state_atom cul; /* cull cntl (42B8) */ diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 69bc994cf6..7bc832c871 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -571,6 +571,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Some of the tests indicate that fgl has a fallback implementation of zbias * via pixel shaders. */ +#define R300_RE_ZBIAS_CNTL 0x42A0 /* GUESS */ #define R300_RE_ZBIAS_T_FACTOR 0x42A4 #define R300_RE_ZBIAS_T_CONSTANT 0x42A8 #define R300_RE_ZBIAS_W_FACTOR 0x42AC diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 402f7508a4..895c2ff43c 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2089,7 +2089,7 @@ void r300ResetHwState(r300ContextPtr r300) r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode); r300->hw.polygon_mode.cmd[2] = 0x00000001; r300->hw.polygon_mode.cmd[3] = 0x00000000; - r300->hw.unk42A0.cmd[1] = 0x00000000; + r300->hw.zbias_cntl.cmd[1] = 0x00000000; r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, ctx->Polygon.OffsetUnits); r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill); -- cgit v1.2.3 From 61ec23cc63a040a2edf1bc466917e85362514c89 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sun, 11 Mar 2007 22:41:26 +0800 Subject: fix for bug#10196 Compute half if LOCAL_VIEWER is enabled and the light is a directional source. --- src/mesa/drivers/dri/i965/brw_vs_tnl.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i965/brw_vs_tnl.c b/src/mesa/drivers/dri/i965/brw_vs_tnl.c index 0d61092247..c05a9b5ea1 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_tnl.c +++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c @@ -988,7 +988,14 @@ static void build_lighting( struct tnl_program *p ) */ VPpli = register_param3(p, STATE_LIGHT, i, STATE_POSITION_NORMALIZED); - half = register_param3(p, STATE_LIGHT, i, STATE_HALF); + if (p->state->light_local_viewer) { + struct ureg eye_hat = get_eye_position_normalized(p); + half = get_temp(p); + emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); + emit_normalize_vec3(p, half, half); + } else { + half = register_param3(p, STATE_LIGHT, i, STATE_HALF); + } } else { struct ureg Ppli = register_param3(p, STATE_LIGHT, i, -- cgit v1.2.3 From 88501887e48d1619e4296afa609dfe9904b81a70 Mon Sep 17 00:00:00 2001 From: Alan Swanson Date: Mon, 12 Mar 2007 09:59:28 +0100 Subject: radeon: Adapt cliprect fixes from r300. --- src/mesa/drivers/dri/radeon/radeon_context.c | 10 ++++++---- src/mesa/drivers/dri/radeon/radeon_lock.c | 1 - src/mesa/drivers/dri/radeon/radeon_state.c | 2 ++ 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index 6bc2c4aa5c..0d25951b64 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -594,12 +594,14 @@ radeonMakeCurrent( __DRIcontextPrivate *driContextPriv, driDrawableInitVBlank( driDrawPriv, newCtx->vblank_flags, &newCtx->vbl_seq ); } - - if ( (newCtx->dri.drawable != driDrawPriv) - || (newCtx->dri.readable != driReadPriv) ) { + + newCtx->dri.readable = driReadPriv; + + if ( (newCtx->dri.drawable != driDrawPriv) || + newCtx->lastStamp != driDrawPriv->lastStamp ) { newCtx->dri.drawable = driDrawPriv; - newCtx->dri.readable = driReadPriv; + radeonSetCliprects(newCtx); radeonUpdateWindow( newCtx->glCtx ); radeonUpdateViewportOffset( newCtx->glCtx ); } diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index e6ab6af456..cdf8a19fb1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -96,7 +96,6 @@ void radeonGetLock( radeonContextPtr rmesa, GLuint flags ) radeonSetCliprects( rmesa ); radeonUpdateViewportOffset( rmesa->glCtx ); driUpdateFramebufferSize(rmesa->glCtx, drawable); - rmesa->lastStamp = drawable->lastStamp; } RADEON_STATECHANGE( rmesa, ctx ); diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index e19202fa44..4de05c7697 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -1675,6 +1675,8 @@ void radeonSetCliprects( radeonContextPtr rmesa ) if (rmesa->state.scissor.enabled) radeonRecalcScissorRects( rmesa ); + + rmesa->lastStamp = drawable->lastStamp; } -- cgit v1.2.3 From 33ea9dccaf31c2dfb0961847eedfe75336d9c80f Mon Sep 17 00:00:00 2001 From: Alan Swanson Date: Mon, 12 Mar 2007 09:59:45 +0100 Subject: r200: Adapt cliprect fixes from r300. --- src/mesa/drivers/dri/r200/r200_context.c | 6 ++++-- src/mesa/drivers/dri/r200/r200_lock.c | 1 - src/mesa/drivers/dri/r200/r200_state.c | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index fc6eb93daa..75efd9a838 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -673,11 +673,13 @@ r200MakeCurrent( __DRIcontextPrivate *driContextPriv, &newCtx->vbl_seq ); } + newCtx->dri.readable = driReadPriv; + if ( newCtx->dri.drawable != driDrawPriv || - newCtx->dri.readable != driReadPriv ) { + newCtx->lastStamp != driDrawPriv->lastStamp ) { newCtx->dri.drawable = driDrawPriv; - newCtx->dri.readable = driReadPriv; + r200SetCliprects(newCtx, GL_BACK_LEFT); r200UpdateWindow( newCtx->glCtx ); r200UpdateViewportOffset( newCtx->glCtx ); } diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c index bcc0c91639..745e596467 100644 --- a/src/mesa/drivers/dri/r200/r200_lock.c +++ b/src/mesa/drivers/dri/r200/r200_lock.c @@ -98,7 +98,6 @@ void r200GetLock( r200ContextPtr rmesa, GLuint flags ) r200SetCliprects( rmesa, GL_FRONT_LEFT ); r200UpdateViewportOffset( rmesa->glCtx ); driUpdateFramebufferSize(rmesa->glCtx, drawable); - rmesa->lastStamp = drawable->lastStamp; } R200_STATECHANGE( rmesa, ctx ); diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index bdb487f2b9..911a340f60 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -1889,6 +1889,8 @@ void r200SetCliprects( r200ContextPtr rmesa, GLenum mode ) if (rmesa->state.scissor.enabled) r200RecalcScissorRects( rmesa ); + + rmesa->lastStamp = drawable->lastStamp; } -- cgit v1.2.3 From d0a3400f66b8c4ace7ebef6d0a944376d5203756 Mon Sep 17 00:00:00 2001 From: Alan Swanson Date: Mon, 12 Mar 2007 10:03:01 +0100 Subject: r200: Simplify r200SetCliprects like radeonSetCliprects in radeon and r300. --- src/mesa/drivers/dri/r200/r200_context.c | 2 +- src/mesa/drivers/dri/r200/r200_lock.c | 5 +---- src/mesa/drivers/dri/r200/r200_state.c | 35 +++++++++++++++++--------------- src/mesa/drivers/dri/r200/r200_state.h | 2 +- 4 files changed, 22 insertions(+), 22 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 75efd9a838..3abcdf9e18 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -679,7 +679,7 @@ r200MakeCurrent( __DRIcontextPrivate *driContextPriv, newCtx->lastStamp != driDrawPriv->lastStamp ) { newCtx->dri.drawable = driDrawPriv; - r200SetCliprects(newCtx, GL_BACK_LEFT); + r200SetCliprects(newCtx); r200UpdateWindow( newCtx->glCtx ); r200UpdateViewportOffset( newCtx->glCtx ); } diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c index 745e596467..9ffdb2b212 100644 --- a/src/mesa/drivers/dri/r200/r200_lock.c +++ b/src/mesa/drivers/dri/r200/r200_lock.c @@ -92,10 +92,7 @@ void r200GetLock( r200ContextPtr rmesa, GLuint flags ) if ( rmesa->lastStamp != drawable->lastStamp ) { r200UpdatePageFlipping( rmesa ); - if (rmesa->glCtx->DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) - r200SetCliprects( rmesa, GL_BACK_LEFT ); - else - r200SetCliprects( rmesa, GL_FRONT_LEFT ); + r200SetCliprects( rmesa ); r200UpdateViewportOffset( rmesa->glCtx ); driUpdateFramebufferSize(rmesa->glCtx, drawable); } diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 911a340f60..16726d7d55 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -1691,6 +1691,11 @@ static void r200ClearStencil( GLcontext *ctx, GLint s ) #define SUBPIXEL_X 0.125 #define SUBPIXEL_Y 0.125 + +/** + * Called when window size or position changes or viewport or depth range + * state is changed. We update the hardware viewport state here. + */ void r200UpdateWindow( GLcontext *ctx ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); @@ -1843,19 +1848,18 @@ static void r200LogicOpCode( GLcontext *ctx, GLenum opcode ) } -void r200SetCliprects( r200ContextPtr rmesa, GLenum mode ) +/* + * Set up the cliprects for either front or back-buffer drawing. + */ +void r200SetCliprects( r200ContextPtr rmesa ) { __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; __DRIdrawablePrivate *const readable = rmesa->dri.readable; GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate; GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate; - switch ( mode ) { - case GL_FRONT_LEFT: - rmesa->numClipRects = drawable->numClipRects; - rmesa->pClipRects = drawable->pClipRects; - break; - case GL_BACK_LEFT: + if (draw_fb->_ColorDrawBufferMask[0] + == BUFFER_BIT_BACK_LEFT) { /* Can't ignore 2d windows if we are page flipping. */ if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) { @@ -1866,11 +1870,12 @@ void r200SetCliprects( r200ContextPtr rmesa, GLenum mode ) rmesa->numClipRects = drawable->numBackClipRects; rmesa->pClipRects = drawable->pBackClipRects; } - break; - default: - fprintf(stderr, "bad mode in r200SetCliprects\n"); - return; } + else { + /* front buffer (or none, or multiple buffers) */ + rmesa->numClipRects = drawable->numClipRects; + rmesa->pClipRects = drawable->pClipRects; + } if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) { _mesa_resize_framebuffer(rmesa->glCtx, draw_fb, @@ -1910,19 +1915,17 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode ) */ switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) { case BUFFER_BIT_FRONT_LEFT: - FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE ); - r200SetCliprects( rmesa, GL_FRONT_LEFT ); - break; case BUFFER_BIT_BACK_LEFT: FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE ); - r200SetCliprects( rmesa, GL_BACK_LEFT ); break; default: - /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */ + /* 0 (GL_NONE) buffers or multiple color drawing buffers */ FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE ); return; } + r200SetCliprects( rmesa ); + /* We'll set the drawing engine's offset/pitch parameters later * when we update other state. */ diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h index 98c6fbe40b..f34090b619 100644 --- a/src/mesa/drivers/dri/r200/r200_state.h +++ b/src/mesa/drivers/dri/r200/r200_state.h @@ -44,7 +44,7 @@ extern void r200InitTnlFuncs( GLcontext *ctx ); extern void r200UpdateMaterial( GLcontext *ctx ); -extern void r200SetCliprects( r200ContextPtr rmesa, GLenum mode ); +extern void r200SetCliprects( r200ContextPtr rmesa ); extern void r200RecalcScissorRects( r200ContextPtr rmesa ); extern void r200UpdateViewportOffset( GLcontext *ctx ); extern void r200UpdateWindow( GLcontext *ctx ); -- cgit v1.2.3 From 708d836e6b785764692c485e6cdf42f2b50fcf86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 12 Mar 2007 19:08:28 +0100 Subject: i915tex: Don't crash when intel_fb->color_rb[i] is NULL. This can be the case on some systems when running glxinfo. --- src/mesa/drivers/dri/i915tex/intel_buffers.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i915tex/intel_buffers.c b/src/mesa/drivers/dri/i915tex/intel_buffers.c index 45fd2fa8de..62ff54b007 100644 --- a/src/mesa/drivers/dri/i915tex/intel_buffers.c +++ b/src/mesa/drivers/dri/i915tex/intel_buffers.c @@ -327,6 +327,9 @@ intelWindowMoved(struct intel_context *intel) } for (i = 0; i < intel_fb->pf_num_pages; i++) { + if (!intel_fb->color_rb[i]) + continue; + vbl.request.sequence = intel_fb->color_rb[i]->vbl_pending; drmWaitVBlank(intel->driFd, &vbl); } @@ -336,7 +339,8 @@ intelWindowMoved(struct intel_context *intel) intel_fb->vbl_waited = intel_fb->vbl_seq; for (i = 0; i < intel_fb->pf_num_pages; i++) { - intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_waited; + if (intel_fb->color_rb[i]) + intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_waited; } } } else { -- cgit v1.2.3 From 0c750ca98de5a08c0fe4c513a741c78b7bab80e3 Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Tue, 13 Mar 2007 00:48:17 +0000 Subject: Guess another unknown register used for R300 pacification. --- src/mesa/drivers/dri/r300/r300_ioctl.c | 2 +- src/mesa/drivers/dri/r300/r300_reg.h | 4 ++++ src/mesa/drivers/dri/r300/r300_render.c | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 6fa34ee482..41114a875d 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -165,7 +165,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) e32(0x0000000a); - reg_start(0x4f18,0); + reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); e32(0x00000003); cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); } diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 7bc832c871..be357ab5c7 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1395,6 +1395,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* gap */ +#define R300_RB3D_ZCACHE_CTLSTAT 0x4F18 /* GUESS */ + +/* gap */ + #define R300_RB3D_DEPTHOFFSET 0x4F20 #define R300_RB3D_DEPTHPITCH 0x4F24 # define R300_DEPTHPITCH_MASK 0x00001FF8 /* GUESS */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 211c451f66..7a850b1001 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -346,7 +346,7 @@ GLboolean r300_run_vb_render(GLcontext *ctx, reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); e32(0x0000000a); - reg_start(0x4f18,0); + reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); e32(0x00000003); r300EmitState(rmesa); @@ -362,7 +362,7 @@ GLboolean r300_run_vb_render(GLcontext *ctx, reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); e32(0x0000000a/*0x2*/); - reg_start(0x4f18,0); + reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); e32(0x00000003/*0x1*/); #ifdef USER_BUFFERS -- cgit v1.2.3 From cd6660475646defb6311491028dfe50202709b59 Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Tue, 13 Mar 2007 01:31:38 +0000 Subject: Corrected values written to R300_RB3D_DSTCACHE_CTLSTAT to either R300_RB3D_DSTCACHE_02 or R300_RB3D_DSTCACHE_0A, rather than hexadecimal values. --- src/mesa/drivers/dri/r300/r300_ioctl.c | 2 +- src/mesa/drivers/dri/r300/r300_render.c | 4 ++-- src/mesa/drivers/dri/r300/radeon_mm.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 41114a875d..bb16cd13d4 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -162,7 +162,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); + e32(R300_RB3D_DSTCACHE_0A); reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 7a850b1001..87d370e303 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -344,7 +344,7 @@ GLboolean r300_run_vb_render(GLcontext *ctx, r300UpdateShaderStates(rmesa); reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); + e32(R300_RB3D_DSTCACHE_0A); reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); e32(0x00000003); @@ -360,7 +360,7 @@ GLboolean r300_run_vb_render(GLcontext *ctx, } reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a/*0x2*/); + e32(R300_RB3D_DSTCACHE_0A /*R300_RB3D_DSTCACHE_02*/); reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); e32(0x00000003/*0x1*/); diff --git a/src/mesa/drivers/dri/r300/radeon_mm.c b/src/mesa/drivers/dri/r300/radeon_mm.c index 32ed1f4393..065f288e4c 100644 --- a/src/mesa/drivers/dri/r300/radeon_mm.c +++ b/src/mesa/drivers/dri/r300/radeon_mm.c @@ -284,7 +284,7 @@ static void emit_lin_cp(r300ContextPtr rmesa, unsigned long dst, unsigned long s } reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); + e32(R300_RB3D_DSTCACHE_0A); reg_start(0x342c,0); e32(0x00000005); -- cgit v1.2.3 From 19fbe9732c92ff67911c28bf19c174326af05c84 Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Tue, 13 Mar 2007 06:08:36 +0000 Subject: Documented the value written for R300_TX_CNTL cache flush. --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 3 ++- src/mesa/drivers/dri/r300/r300_reg.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 89725447f1..e4511e0c21 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -174,11 +174,12 @@ static __inline__ void r300DoEmitState(r300ContextPtr r300, GLboolean dirty) dest ++; r300->cmdbuf.count_used ++; + /* Emit cache flush */ *dest = cmdpacket0(R300_TX_CNTL, 1); dest ++; r300->cmdbuf.count_used ++; - *dest = 0x0; + *dest = R300_TX_FLUSH; dest ++; r300->cmdbuf.count_used ++; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index be357ab5c7..aa6d81a063 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -497,6 +497,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Zero to flush caches. */ #define R300_TX_CNTL 0x4100 +#define R300_TX_FLUSH 0x0 /* The upper enable bits are guessed, based on fglrx reported limits. */ #define R300_TX_ENABLE 0x4104 -- cgit v1.2.3 From eb4db4c4ec7efc366b00e1b1f1fe519ca1af79d6 Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Tue, 13 Mar 2007 06:24:56 +0000 Subject: Add defines for the values written to R300_RB3D_ZCACHE_CTLSTAT. Note that just like the values written to R300_RB3D_DSTCACHE_CTLSTAT these values are really unknown; ideally more reverse engineering should be done to determine what these values mean and when they should be set. --- src/mesa/drivers/dri/r300/r300_ioctl.c | 2 +- src/mesa/drivers/dri/r300/r300_reg.h | 2 ++ src/mesa/drivers/dri/r300/r300_render.c | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index bb16cd13d4..f0741f978d 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -166,7 +166,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); - e32(0x00000003); + e32(R300_RB3D_ZCACHE_CTLSTAT_03); cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); } diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index aa6d81a063..f9accadf61 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1397,6 +1397,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* gap */ #define R300_RB3D_ZCACHE_CTLSTAT 0x4F18 /* GUESS */ +# define R300_RB3D_ZCACHE_CTLSTAT_01 0x1 +# define R300_RB3D_ZCACHE_CTLSTAT_03 0x3 /* gap */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 87d370e303..659ec3ff54 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -347,7 +347,7 @@ GLboolean r300_run_vb_render(GLcontext *ctx, e32(R300_RB3D_DSTCACHE_0A); reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); - e32(0x00000003); + e32(R300_RB3D_ZCACHE_CTLSTAT_03); r300EmitState(rmesa); @@ -363,7 +363,7 @@ GLboolean r300_run_vb_render(GLcontext *ctx, e32(R300_RB3D_DSTCACHE_0A /*R300_RB3D_DSTCACHE_02*/); reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); - e32(0x00000003/*0x1*/); + e32(R300_RB3D_ZCACHE_CTLSTAT_03 /*R300_RB3D_ZCACHE_CTLSTAT_01*/); #ifdef USER_BUFFERS r300UseArrays(ctx); -- cgit v1.2.3 From 4c18d9056b3793a8441ecbfc9c5ea5f2d43555b2 Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Tue, 13 Mar 2007 14:46:45 +0000 Subject: r300: Renamed the CACHE_CTLSTAT values to include UNKNOWN in the name; not enough information is known about them to be sure as to what the values mean. --- src/mesa/drivers/dri/r300/r300_ioctl.c | 4 ++-- src/mesa/drivers/dri/r300/r300_reg.h | 8 ++++---- src/mesa/drivers/dri/r300/r300_render.c | 8 ++++---- src/mesa/drivers/dri/r300/radeon_mm.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index f0741f978d..f674fea7c9 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -162,11 +162,11 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(R300_RB3D_DSTCACHE_0A); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); - e32(R300_RB3D_ZCACHE_CTLSTAT_03); + e32(R300_RB3D_ZCACHE_UNKNOWN_03); cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); } diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index f9accadf61..3de15752b1 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1324,8 +1324,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Set to 0A before 3D operations, set to 02 afterwards. */ #define R300_RB3D_DSTCACHE_CTLSTAT 0x4E4C -# define R300_RB3D_DSTCACHE_02 0x00000002 -# define R300_RB3D_DSTCACHE_0A 0x0000000A +# define R300_RB3D_DSTCACHE_UNKNOWN_02 0x00000002 +# define R300_RB3D_DSTCACHE_UNKNOWN_0A 0x0000000A /* gap */ /* There seems to be no "write only" setting, so use Z-test = ALWAYS @@ -1397,8 +1397,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* gap */ #define R300_RB3D_ZCACHE_CTLSTAT 0x4F18 /* GUESS */ -# define R300_RB3D_ZCACHE_CTLSTAT_01 0x1 -# define R300_RB3D_ZCACHE_CTLSTAT_03 0x3 +# define R300_RB3D_ZCACHE_UNKNOWN_01 0x1 +# define R300_RB3D_ZCACHE_UNKNOWN_03 0x3 /* gap */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 659ec3ff54..0864558e8d 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -344,10 +344,10 @@ GLboolean r300_run_vb_render(GLcontext *ctx, r300UpdateShaderStates(rmesa); reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(R300_RB3D_DSTCACHE_0A); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); - e32(R300_RB3D_ZCACHE_CTLSTAT_03); + e32(R300_RB3D_ZCACHE_UNKNOWN_03); r300EmitState(rmesa); @@ -360,10 +360,10 @@ GLboolean r300_run_vb_render(GLcontext *ctx, } reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(R300_RB3D_DSTCACHE_0A /*R300_RB3D_DSTCACHE_02*/); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A /*R300_RB3D_DSTCACHE_UNKNOWN_02*/); reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); - e32(R300_RB3D_ZCACHE_CTLSTAT_03 /*R300_RB3D_ZCACHE_CTLSTAT_01*/); + e32(R300_RB3D_ZCACHE_UNKNOWN_03 /*R300_RB3D_ZCACHE_UNKNOWN_01*/); #ifdef USER_BUFFERS r300UseArrays(ctx); diff --git a/src/mesa/drivers/dri/r300/radeon_mm.c b/src/mesa/drivers/dri/r300/radeon_mm.c index 065f288e4c..1502dac8fc 100644 --- a/src/mesa/drivers/dri/r300/radeon_mm.c +++ b/src/mesa/drivers/dri/r300/radeon_mm.c @@ -284,7 +284,7 @@ static void emit_lin_cp(r300ContextPtr rmesa, unsigned long dst, unsigned long s } reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(R300_RB3D_DSTCACHE_0A); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); reg_start(0x342c,0); e32(0x00000005); -- cgit v1.2.3 From 37e6f760fde506728077c6e8b48bb8c34d53dfd3 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 13 Mar 2007 17:43:21 +0100 Subject: enable ARB_vertex_buffer_object for more dri drivers ARB_vertex_buffer_object looks like a useful extension even for old chips. The drivers should not need any code to be able to use this extension since they just use mesa's vbo code anyway. Newly enabled for i810, mach64, mga, r128, radeon, savage, sis and unichrome. --- src/mesa/drivers/dri/i810/i810context.c | 2 ++ src/mesa/drivers/dri/mach64/mach64_context.c | 2 ++ src/mesa/drivers/dri/mga/mga_xmesa.c | 2 ++ src/mesa/drivers/dri/r128/r128_context.c | 2 ++ src/mesa/drivers/dri/radeon/radeon_context.c | 2 ++ src/mesa/drivers/dri/savage/savage_xmesa.c | 2 ++ src/mesa/drivers/dri/sis/sis_context.c | 2 ++ src/mesa/drivers/dri/unichrome/via_context.c | 2 ++ 8 files changed, 16 insertions(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i810/i810context.c b/src/mesa/drivers/dri/i810/i810context.c index db8f7a19a2..f0332d9ae8 100644 --- a/src/mesa/drivers/dri/i810/i810context.c +++ b/src/mesa/drivers/dri/i810/i810context.c @@ -65,6 +65,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #include "extension_helper.h" #ifndef I810_DEBUG @@ -129,6 +130,7 @@ const struct dri_extension card_extensions[] = { "GL_ARB_texture_env_combine", NULL }, { "GL_ARB_texture_env_crossbar", NULL }, { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_edge_clamp", NULL }, { "GL_EXT_texture_env_combine", NULL }, diff --git a/src/mesa/drivers/dri/mach64/mach64_context.c b/src/mesa/drivers/dri/mach64/mach64_context.c index 5a6c301da2..ad661e198c 100644 --- a/src/mesa/drivers/dri/mach64/mach64_context.c +++ b/src/mesa/drivers/dri/mach64/mach64_context.c @@ -58,6 +58,7 @@ #include "vblank.h" #define need_GL_ARB_multisample +#define need_GL_ARB_vertex_buffer_object #include "extension_helper.h" #ifndef MACH64_DEBUG @@ -83,6 +84,7 @@ const struct dri_extension card_extensions[] = { { "GL_ARB_multisample", GL_ARB_multisample_functions }, { "GL_ARB_multitexture", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_texture_edge_clamp", NULL }, { "GL_MESA_ycbcr_texture", NULL }, { "GL_SGIS_generate_mipmap", NULL }, diff --git a/src/mesa/drivers/dri/mga/mga_xmesa.c b/src/mesa/drivers/dri/mga/mga_xmesa.c index 67a6f8bdf0..ca2c8faa18 100644 --- a/src/mesa/drivers/dri/mga/mga_xmesa.c +++ b/src/mesa/drivers/dri/mga/mga_xmesa.c @@ -72,6 +72,7 @@ #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #define need_GL_ARB_vertex_program #define need_GL_EXT_fog_coord #define need_GL_EXT_multi_draw_arrays @@ -401,6 +402,7 @@ static const struct dri_extension card_extensions[] = { "GL_ARB_multisample", GL_ARB_multisample_functions }, { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, { "GL_ARB_texture_rectangle", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_blend_logic_op", NULL }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, diff --git a/src/mesa/drivers/dri/r128/r128_context.c b/src/mesa/drivers/dri/r128/r128_context.c index 89ddafa02a..95e54a6af5 100644 --- a/src/mesa/drivers/dri/r128/r128_context.c +++ b/src/mesa/drivers/dri/r128/r128_context.c @@ -68,6 +68,7 @@ int R128_DEBUG = 0; #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #define need_GL_EXT_blend_minmax #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color @@ -80,6 +81,7 @@ const struct dri_extension card_extensions[] = { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, { "GL_ARB_texture_env_add", NULL }, { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_blend_subtract", GL_EXT_blend_minmax_functions }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, { "GL_EXT_texture_edge_clamp", NULL }, diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index 0d25951b64..d7c2d1407d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -64,6 +64,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #define need_GL_EXT_blend_minmax #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color @@ -122,6 +123,7 @@ const struct dri_extension card_extensions[] = { "GL_ARB_texture_env_crossbar", NULL }, { "GL_ARB_texture_env_dot3", NULL }, { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_blend_logic_op", NULL }, { "GL_EXT_blend_subtract", GL_EXT_blend_minmax_functions }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, diff --git a/src/mesa/drivers/dri/savage/savage_xmesa.c b/src/mesa/drivers/dri/savage/savage_xmesa.c index f859217069..43422db9a8 100644 --- a/src/mesa/drivers/dri/savage/savage_xmesa.c +++ b/src/mesa/drivers/dri/savage/savage_xmesa.c @@ -61,6 +61,7 @@ #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #define need_GL_EXT_secondary_color #include "extension_helper.h" @@ -135,6 +136,7 @@ static const struct dri_extension card_extensions[] = { "GL_ARB_multisample", GL_ARB_multisample_functions }, { "GL_ARB_multitexture", NULL }, { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_lod_bias", NULL }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, diff --git a/src/mesa/drivers/dri/sis/sis_context.c b/src/mesa/drivers/dri/sis/sis_context.c index 89b81da347..b21df0a61e 100644 --- a/src/mesa/drivers/dri/sis/sis_context.c +++ b/src/mesa/drivers/dri/sis/sis_context.c @@ -60,6 +60,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color #include "extension_helper.h" @@ -79,6 +80,7 @@ struct dri_extension card_extensions[] = { "GL_ARB_texture_border_clamp", NULL }, { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, /*{ "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },*/ { "GL_EXT_texture_lod_bias", NULL }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, diff --git a/src/mesa/drivers/dri/unichrome/via_context.c b/src/mesa/drivers/dri/unichrome/via_context.c index bc5a414df6..89533421b4 100644 --- a/src/mesa/drivers/dri/unichrome/via_context.c +++ b/src/mesa/drivers/dri/unichrome/via_context.c @@ -64,6 +64,7 @@ #define need_GL_ARB_multisample #define need_GL_ARB_point_parameters +#define need_GL_ARB_vertex_buffer_object #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color #include "extension_helper.h" @@ -372,6 +373,7 @@ const struct dri_extension card_extensions[] = { "GL_ARB_texture_env_combine", NULL }, /* { "GL_ARB_texture_env_dot3", NULL }, */ { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, { "GL_EXT_stencil_wrap", NULL }, -- cgit v1.2.3 From a6cc9ab493a2efa9a0ea91cddba0e85c8c8c83f1 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 13 Mar 2007 19:04:28 +0100 Subject: sync up t_vp_build.c brw_vs_tnl.c a bit Bring over the optimizations for fog and normalized spot dir from t_vp_build.c to brw_vs_tnl.c. Likewise, port a fix for point size calc from brw_vs_tnl.c to t_vp_build.c (use ABS(eyez) instead of -eyez). Leave the now differing point size calcs alone though, not sure what's better (it's basically MOV, ABS, MUL, DP3 vs. ABS, MAD, MAD). --- src/mesa/drivers/dri/i965/brw_vs_tnl.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i965/brw_vs_tnl.c b/src/mesa/drivers/dri/i965/brw_vs_tnl.c index c05a9b5ea1..e22a26b291 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_tnl.c +++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c @@ -850,14 +850,13 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p, /* Calculate spot attenuation: */ if (!p->state->unit[i].light_spotcutoff_is_180) { - struct ureg spot_dir = register_param3(p, STATE_LIGHT, i, - STATE_SPOT_DIRECTION); + struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, + STATE_SPOT_DIR_NORMALIZED, i); struct ureg spot = get_temp(p); struct ureg slt = get_temp(p); - - emit_normalize_vec3( p, spot, spot_dir ); /* XXX: precompute! */ - emit_op2(p, OPCODE_DP3, spot, 0, ureg_negate(VPpli), spot); - emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir,W), spot); + + emit_op2(p, OPCODE_DP3, spot, 0, ureg_negate(VPpli), spot_dir_norm); + emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); emit_op2(p, OPCODE_MUL, att, 0, slt, spot); @@ -1166,7 +1165,8 @@ static void build_fog( struct tnl_program *p ) if (p->state->fog_option && p->state->tnl_do_vertex_fog) { - struct ureg params = register_param1(p, STATE_FOG_PARAMS); + struct ureg params = register_param2(p, STATE_INTERNAL, + STATE_FOG_PARAMS_OPTIMIZED); struct ureg tmp = get_temp(p); struct ureg id = get_identity_param(p); @@ -1174,8 +1174,7 @@ static void build_fog( struct tnl_program *p ) switch (p->state->fog_option) { case FOG_LINEAR: { - emit_op1(p, OPCODE_ABS, tmp, 0, input); - emit_op2(p, OPCODE_SUB, tmp, 0, swizzle1(params,Z), tmp); + emit_op3(p, OPCODE_MAD, tmp, 0, input, swizzle1(params,X), swizzle1(params,Y)); emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W)); emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); @@ -1183,15 +1182,13 @@ static void build_fog( struct tnl_program *p ) } case FOG_EXP: emit_op1(p, OPCODE_ABS, tmp, 0, input); - emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,X)); - emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, - register_const1f(p, M_E), ureg_negate(tmp)); + emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,Z)); + emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp)); break; case FOG_EXP2: - emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,X)); + emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W)); emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); - emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, - register_const1f(p, M_E), ureg_negate(tmp)); + emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp)); break; } -- cgit v1.2.3 From 47e0b606a85059ff29fe311dc2f1bcafdefe4cdb Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 14 Mar 2007 12:42:30 -0600 Subject: move CLIENT_ID code in xmesa_delete_framebuffer(), see bug 7205 --- src/mesa/drivers/x11/xm_buffer.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/x11/xm_buffer.c b/src/mesa/drivers/x11/xm_buffer.c index 747971a6c3..73c46b1fe6 100644 --- a/src/mesa/drivers/x11/xm_buffer.c +++ b/src/mesa/drivers/x11/xm_buffer.c @@ -362,16 +362,13 @@ xmesa_delete_framebuffer(struct gl_framebuffer *fb) { XMesaBuffer b = XMESA_BUFFER(fb); -#ifdef XFree86Server - int client = 0; - if (b->frontxrb->drawable) - client = CLIENT_ID(b->frontxrb->drawable->id); -#endif - if (b->num_alloced > 0) { /* If no other buffer uses this X colormap then free the colors. */ if (!xmesa_find_buffer(b->display, b->cmap, b)) { #ifdef XFree86Server + int client = 0; + if (b->frontxrb->drawable) + client = CLIENT_ID(b->frontxrb->drawable->id); (void)FreeColors(b->cmap, client, b->num_alloced, b->alloced_colors, 0); #else -- cgit v1.2.3 From 3049946fa742b654afa9b24f8bc79f387f01aea9 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 14 Mar 2007 12:52:53 -0600 Subject: clear the b->frontxrb->drawable field in xmesa_free_buffer(), see bug 7205 --- src/mesa/drivers/x11/xm_api.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 776928dec4..cbbbd56efd 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -483,6 +483,12 @@ xmesa_free_buffer(XMesaBuffer buffer) /* mark as delete pending */ fb->DeletePending = GL_TRUE; + + /* Since the X window for the XMesaBuffer is going away, we don't + * want to dereference this pointer in the future. + */ + b->frontxrb->drawable = 0; + /* Unreference. If count = zero we'll really delete the buffer */ _mesa_unreference_framebuffer(&fb); -- cgit v1.2.3 From 4a7fe4fcfa9069043db3ba8e88a6b941788f5f43 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 15 Mar 2007 10:27:47 +0000 Subject: Fix off by one error in immediate state packet size. --- src/mesa/drivers/dri/i915tex/i915_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i915tex/i915_state.c b/src/mesa/drivers/dri/i915tex/i915_state.c index 78ae4bdb5f..1fafadced0 100644 --- a/src/mesa/drivers/dri/i915tex/i915_state.c +++ b/src/mesa/drivers/dri/i915tex/i915_state.c @@ -859,7 +859,7 @@ i915_init_packets(struct i915_context *i915) i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(4) | - I1_LOAD_S(5) | I1_LOAD_S(6) | (4)); + I1_LOAD_S(5) | I1_LOAD_S(6) | (3)); i915->state.Ctx[I915_CTXREG_LIS2] = 0; i915->state.Ctx[I915_CTXREG_LIS4] = 0; i915->state.Ctx[I915_CTXREG_LIS5] = 0; -- cgit v1.2.3 From 4e4ab2a62bf33a582420cff85775a6580167b5a9 Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Thu, 15 Mar 2007 17:35:34 +0000 Subject: Committed Rune Petersen's fragment.position patch (Bug #10024) plus a few small corrections. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 91 +++++++++++++++++++++++++++++ src/mesa/drivers/dri/r300/r300_state.c | 14 ++--- src/mesa/drivers/dri/r300/r300_state.h | 1 + src/mesa/drivers/dri/r300/r300_vertexprog.c | 67 ++------------------- 4 files changed, 103 insertions(+), 70 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index a1c634a54d..d3062a4145 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -49,6 +49,7 @@ #include "r300_context.h" #include "r300_fragprog.h" #include "r300_reg.h" +#include "r300_state.h" /* * Usefull macros and values @@ -1787,6 +1788,94 @@ static GLboolean parse_program(struct r300_fragment_program *rp) return GL_TRUE; } +static void insert_wpos(struct gl_program *prog) +{ + GLint tokens[6] = { STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0, 0 }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = prog->NumTemporaries; + /* should do something else if no temps left... */ + prog->NumTemporaries++; + + + fpi = malloc((prog->NumInstructions + 3) * sizeof(struct prog_instruction)); + /* all including END */ + memcpy(&fpi[3], prog->Instructions, prog->NumInstructions * sizeof(struct prog_instruction)); + + memset(fpi, 0, 3 * sizeof(struct prog_instruction)); + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(prog->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + free(prog->Instructions); + + prog->Instructions = fpi; + + prog->NumInstructions += i; + fpi = &prog->Instructions[prog->NumInstructions-1]; + + assert(fpi->Opcode == OPCODE_END); + + for(fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++){ + for(i=0; i<3; i++) + if( fpi->SrcReg[i].File == PROGRAM_INPUT && + fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS ){ + fpi->SrcReg[i].File = PROGRAM_TEMPORARY; + fpi->SrcReg[i].Index = tempregi; + } + } +} + /* - Init structures * - Determine what hwregs each input corresponds to */ @@ -1844,6 +1933,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) if (InputsRead & FRAG_BIT_WPOS) { cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(rp); + insert_wpos(&mp->Base); } InputsRead &= ~FRAG_BIT_WPOS; @@ -1956,6 +2046,7 @@ void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_pr rp->translated = GL_TRUE; if (0) dump_program(rp); + r300UpdateStateParameters(rp->ctx, _NEW_PROGRAM); } update_params(rp); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 895c2ff43c..fcb61a2831 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1066,8 +1066,8 @@ static void r300FetchStateParameter(GLcontext *ctx, const enum state_index state switch(state[1]) { case STATE_R300_WINDOW_DIMENSION: - value[0] = r300->radeon.dri.drawable->w; /* width */ - value[1] = r300->radeon.dri.drawable->h; /* height */ + value[0] = r300->radeon.dri.drawable->w*0.5f;/* width*0.5 */ + value[1] = r300->radeon.dri.drawable->h*0.5f;/* height*0.5 */ value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ value[3] = 1.0F; /* not used */ break; @@ -1081,20 +1081,20 @@ static void r300FetchStateParameter(GLcontext *ctx, const enum state_index state * Update R300's own internal state parameters. * For now just STATE_R300_WINDOW_DIMENSION */ -static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) +void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) { - struct r300_vertex_program_cont *vpc; + struct r300_fragment_program *fp; struct gl_program_parameter_list *paramList; GLuint i; if(!(new_state & (_NEW_BUFFERS|_NEW_PROGRAM))) return; - vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; - if (!vpc) + fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current; + if (!fp) return; - paramList = vpc->mesa_program.Base.Parameters; + paramList = fp->mesa_program.Base.Parameters; if (!paramList) return; diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index f6a50655d1..52e606f241 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -61,6 +61,7 @@ do { \ extern void r300ResetHwState(r300ContextPtr r300); +extern void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state); extern void r300InitState(r300ContextPtr r300); extern void r300InitStateFuncs(struct dd_function_table* functions); extern void r300UpdateViewportOffset( GLcontext *ctx ); diff --git a/src/mesa/drivers/dri/r300/r300_vertexprog.c b/src/mesa/drivers/dri/r300/r300_vertexprog.c index 2ff92e1328..68a11a42b3 100644 --- a/src/mesa/drivers/dri/r300/r300_vertexprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertexprog.c @@ -962,22 +962,19 @@ static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, GLuint temp_index) { - - GLint tokens[6] = { STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0, 0 }; struct prog_instruction *vpi; struct prog_instruction *vpi_insert; - GLuint window_index; int i = 0; - vpi = malloc((prog->NumInstructions + 5) * sizeof(struct prog_instruction)); + vpi = malloc((prog->NumInstructions + 2) * sizeof(struct prog_instruction)); /* all but END */ memcpy(vpi, prog->Instructions, (prog->NumInstructions - 1) * sizeof(struct prog_instruction)); /* END */ - memcpy(&vpi[prog->NumInstructions + 4], &prog->Instructions[prog->NumInstructions - 1], + memcpy(&vpi[prog->NumInstructions + 1], &prog->Instructions[prog->NumInstructions - 1], sizeof(struct prog_instruction)); vpi_insert = &vpi[prog->NumInstructions - 1]; - memset(vpi_insert, 0, 5 * sizeof(struct prog_instruction)); + memset(vpi_insert, 0, 2 * sizeof(struct prog_instruction)); vpi_insert[i].Opcode = OPCODE_MOV; @@ -991,59 +988,7 @@ static void insert_wpos(struct r300_vertex_program *vp, vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); i++; - /* perspective divide */ - vpi_insert[i].Opcode = OPCODE_RCP; - - vpi_insert[i].DstReg.File = PROGRAM_TEMPORARY; - vpi_insert[i].DstReg.Index = temp_index; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_W; - vpi_insert[i].DstReg.CondMask = COND_TR; - - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); - i++; - - vpi_insert[i].Opcode = OPCODE_MUL; - - vpi_insert[i].DstReg.File = PROGRAM_TEMPORARY; - vpi_insert[i].DstReg.Index = temp_index; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZ; - vpi_insert[i].DstReg.CondMask = COND_TR; - - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - vpi_insert[i].SrcReg[1].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[1].Index = temp_index; - vpi_insert[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_ZERO); - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(prog->Parameters, tokens); - - vpi_insert[i].Opcode = OPCODE_MAD; - - vpi_insert[i].DstReg.File = PROGRAM_TEMPORARY; - vpi_insert[i].DstReg.Index = temp_index; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZ; - vpi_insert[i].DstReg.CondMask = COND_TR; - - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - vpi_insert[i].SrcReg[1].File = PROGRAM_STATE_VAR; - vpi_insert[i].SrcReg[1].Index = window_index; - vpi_insert[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_ZERO); - - vpi_insert[i].SrcReg[2].File = PROGRAM_STATE_VAR; - vpi_insert[i].SrcReg[2].Index = window_index; - vpi_insert[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - vpi_insert[i].Opcode = OPCODE_MUL; + vpi_insert[i].Opcode = OPCODE_MOV; vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0+vp->wpos_idx; @@ -1053,10 +998,6 @@ static void insert_wpos(struct r300_vertex_program *vp, vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; vpi_insert[i].SrcReg[0].Index = temp_index; vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); - - vpi_insert[i].SrcReg[1].File = PROGRAM_STATE_VAR; - vpi_insert[i].SrcReg[1].Index = window_index; - vpi_insert[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_ONE); i++; free(prog->Instructions); -- cgit v1.2.3 From 7f08dd3fc6bb23ddef5e7bea827282c8d7c82899 Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Thu, 15 Mar 2007 18:26:01 +0000 Subject: r300: Fixed "no previous prototype for 'r300RefillCurrentDmaRegion'" warning. --- src/mesa/drivers/dri/r300/r300_ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index f674fea7c9..11e2d42e49 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -389,7 +389,7 @@ void r300Flush(GLcontext * ctx) #ifdef USER_BUFFERS #include "radeon_mm.h" -void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) +static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) { struct r300_dma_buffer *dmabuf; size = MAX2(size, RADEON_BUFFER_SIZE*16); @@ -503,7 +503,7 @@ void r300AllocDmaRegion(r300ContextPtr rmesa, } #else -void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) +static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) { struct r300_dma_buffer *dmabuf; int fd = rmesa->radeon.dri.fd; -- cgit v1.2.3 From b3a9a90cdf7c30b81c749037784428274d997549 Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Thu, 15 Mar 2007 18:29:13 +0000 Subject: r300: Fixed a printf conversion warning. --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index e4511e0c21..a106bbc6ab 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -545,9 +545,9 @@ void r300InitCmdBuf(r300ContextPtr r300) size = 64*256; if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) { - fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%u\n", + fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n", sizeof(drm_r300_cmd_header_t)); - fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%u\n", + fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n", sizeof(drm_radeon_cmd_buffer_t)); fprintf(stderr, "Allocating %d bytes command buffer (max state is %d bytes)\n", -- cgit v1.2.3 From 51693b22cddae9ae73d2310faee295c77f30e8db Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Thu, 15 Mar 2007 18:44:29 +0000 Subject: r300: Fixed an unused variable warning and removed some cruft, too. --- src/mesa/drivers/dri/r300/r300_shader.c | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 26721e8dfd..41c07a3188 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -10,22 +10,8 @@ static void r300BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog) { - - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r300_vertex_program_cont *vp=(void *)prog; - - switch(target){ case GL_VERTEX_PROGRAM_ARB: - //rmesa->curr_vp = (struct gl_vertex_program *)vp; - //vp->ref_count++; -#if 0 - if((vp->ref_count % 1500) == 0) { - fprintf(stderr, "id %p, ref_count %d\n", vp, vp->ref_count); - _mesa_print_program(&vp->mesa_program.Base); - } -#endif - case GL_FRAGMENT_PROGRAM_ARB: break; default: @@ -59,18 +45,9 @@ r300NewProgram(GLcontext *ctx, GLenum target, GLuint id) return NULL; } - static void r300DeleteProgram(GLcontext *ctx, struct gl_program *prog) { -#if 0 - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r300_vertex_program *vp=(void *)prog; - - if(rmesa->curr_vp == vp) - rmesa->curr_vp = NULL; -#endif - _mesa_delete_program(ctx, prog); } @@ -98,9 +75,6 @@ r300ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog) static GLboolean r300IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog) { - //struct r300_vertex_program *vp=(void *)prog; - //r300ContextPtr rmesa = R300_CONTEXT(ctx); - return 1; } -- cgit v1.2.3 From 0d6d80ef3dbd39ed346c3189385242016f5aed74 Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Thu, 15 Mar 2007 19:09:10 +0000 Subject: r300: Updated R300 to use the new SWIZZLE macros. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 6 +++--- src/mesa/drivers/dri/r300/r300_vertexprog.c | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index d3062a4145..82fb5b66ed 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -1815,7 +1815,7 @@ static void insert_wpos(struct gl_program *prog) fpi[i].SrcReg[0].File = PROGRAM_INPUT; fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; i++; fpi[i].Opcode = OPCODE_MUL; @@ -1827,11 +1827,11 @@ static void insert_wpos(struct gl_program *prog) fpi[i].SrcReg[0].File = PROGRAM_INPUT; fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; i++; /* viewport transformation */ diff --git a/src/mesa/drivers/dri/r300/r300_vertexprog.c b/src/mesa/drivers/dri/r300/r300_vertexprog.c index 68a11a42b3..9257ff44e3 100644 --- a/src/mesa/drivers/dri/r300/r300_vertexprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertexprog.c @@ -908,11 +908,11 @@ static void position_invariant(struct gl_program *prog) vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; vpi[i].SrcReg[1].File = PROGRAM_INPUT; vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW; #else if (i == 0) vpi[i].Opcode = OPCODE_MUL; @@ -932,7 +932,7 @@ static void position_invariant(struct gl_program *prog) vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; vpi[i].SrcReg[1].File = PROGRAM_INPUT; vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; @@ -941,7 +941,7 @@ static void position_invariant(struct gl_program *prog) if (i > 0) { vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; vpi[i].SrcReg[2].Index = 0; - vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW; } #endif } @@ -985,7 +985,7 @@ static void insert_wpos(struct r300_vertex_program *vp, vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; i++; vpi_insert[i].Opcode = OPCODE_MOV; @@ -997,7 +997,7 @@ static void insert_wpos(struct r300_vertex_program *vp, vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; i++; free(prog->Instructions); -- cgit v1.2.3 From 0e9ada1087a58af4c1375cc35b318aa0521d3a72 Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Thu, 15 Mar 2007 19:49:10 +0000 Subject: r300: Use _mesa_alloc_instructions/_mesa_init_instructions instead of malloc. Note that insert_wpos in r300_vertexprog.c is still a little flaky and could be improved. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 10 ++++------ src/mesa/drivers/dri/r300/r300_vertexprog.c | 15 +++++++-------- 2 files changed, 11 insertions(+), 14 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 82fb5b66ed..e05abdb7c6 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -1798,12 +1798,8 @@ static void insert_wpos(struct gl_program *prog) /* should do something else if no temps left... */ prog->NumTemporaries++; - - fpi = malloc((prog->NumInstructions + 3) * sizeof(struct prog_instruction)); - /* all including END */ - memcpy(&fpi[3], prog->Instructions, prog->NumInstructions * sizeof(struct prog_instruction)); - - memset(fpi, 0, 3 * sizeof(struct prog_instruction)); + fpi = _mesa_alloc_instructions (prog->NumInstructions + 3); + _mesa_init_instructions (fpi, prog->NumInstructions + 3); /* perspective divide */ fpi[i].Opcode = OPCODE_RCP; @@ -1857,6 +1853,8 @@ static void insert_wpos(struct gl_program *prog) fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); i++; + _mesa_memcpy(&fpi[i], prog->Instructions, prog->NumInstructions * sizeof(struct prog_instruction)); + free(prog->Instructions); prog->Instructions = fpi; diff --git a/src/mesa/drivers/dri/r300/r300_vertexprog.c b/src/mesa/drivers/dri/r300/r300_vertexprog.c index 9257ff44e3..0c43270d75 100644 --- a/src/mesa/drivers/dri/r300/r300_vertexprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertexprog.c @@ -889,8 +889,8 @@ static void position_invariant(struct gl_program *prog) #endif paramList = prog->Parameters; - vpi = malloc((prog->NumInstructions + 4) * sizeof(struct prog_instruction)); - memset(vpi, 0, 4 * sizeof(struct prog_instruction)); + vpi = _mesa_alloc_instructions (prog->NumInstructions + 4); + _mesa_init_instructions (vpi, prog->NumInstructions + 4); for (i=0; i < 4; i++) { GLint idx; @@ -946,7 +946,7 @@ static void position_invariant(struct gl_program *prog) #endif } - memcpy(&vpi[i], prog->Instructions, prog->NumInstructions * sizeof(struct prog_instruction)); + _mesa_memcpy(&vpi[i], prog->Instructions, prog->NumInstructions * sizeof(struct prog_instruction)); free(prog->Instructions); @@ -966,15 +966,14 @@ static void insert_wpos(struct r300_vertex_program *vp, struct prog_instruction *vpi_insert; int i = 0; - vpi = malloc((prog->NumInstructions + 2) * sizeof(struct prog_instruction)); + vpi = _mesa_alloc_instructions (prog->NumInstructions + 2); + _mesa_init_instructions (vpi, prog->NumInstructions + 2); /* all but END */ - memcpy(vpi, prog->Instructions, (prog->NumInstructions - 1) * sizeof(struct prog_instruction)); + _mesa_memcpy(vpi, prog->Instructions, (prog->NumInstructions - 1) * sizeof(struct prog_instruction)); /* END */ - memcpy(&vpi[prog->NumInstructions + 1], &prog->Instructions[prog->NumInstructions - 1], + _mesa_memcpy(&vpi[prog->NumInstructions + 1], &prog->Instructions[prog->NumInstructions - 1], sizeof(struct prog_instruction)); - vpi_insert = &vpi[prog->NumInstructions - 1]; - memset(vpi_insert, 0, 2 * sizeof(struct prog_instruction)); vpi_insert[i].Opcode = OPCODE_MOV; -- cgit v1.2.3 From 1195caa2745de4d16ddbd46eeb4f527a78c5eb63 Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Thu, 15 Mar 2007 20:39:07 +0000 Subject: r300: Renamed r300_vertexprog.c to r300_vertprog.c --- src/mesa/drivers/dri/r300/Makefile | 2 +- src/mesa/drivers/dri/r300/r300_vertexprog.c | 1116 --------------------------- src/mesa/drivers/dri/r300/r300_vertprog.c | 1116 +++++++++++++++++++++++++++ 3 files changed, 1117 insertions(+), 1117 deletions(-) delete mode 100644 src/mesa/drivers/dri/r300/r300_vertexprog.c create mode 100644 src/mesa/drivers/dri/r300/r300_vertprog.c (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 2ee2328934..d4bf0ae892 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -39,7 +39,7 @@ DRIVER_SOURCES = \ r300_texmem.c \ r300_tex.c \ r300_texstate.c \ - r300_vertexprog.c \ + r300_vertprog.c \ r300_fragprog.c \ r300_shader.c \ r300_maos.c \ diff --git a/src/mesa/drivers/dri/r300/r300_vertexprog.c b/src/mesa/drivers/dri/r300/r300_vertexprog.c deleted file mode 100644 index 0c43270d75..0000000000 --- a/src/mesa/drivers/dri/r300/r300_vertexprog.c +++ /dev/null @@ -1,1116 +0,0 @@ -/************************************************************************** - -Copyright (C) 2005 Aapo Tahkola. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -on the rights to use, copy, modify, merge, publish, distribute, sub -license, and/or sell copies of the Software, and to permit persons to whom -the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next -paragraph) shall be included in all copies or substantial portions of the -Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Aapo Tahkola - */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" -#include "program.h" -#include "nvvertexec.h" - -#include "r300_context.h" -#include "r300_program.h" -#include "program_instruction.h" - -#if SWIZZLE_X != VSF_IN_COMPONENT_X || \ - SWIZZLE_Y != VSF_IN_COMPONENT_Y || \ - SWIZZLE_Z != VSF_IN_COMPONENT_Z || \ - SWIZZLE_W != VSF_IN_COMPONENT_W || \ - SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \ - SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \ - WRITEMASK_X != VSF_FLAG_X || \ - WRITEMASK_Y != VSF_FLAG_Y || \ - WRITEMASK_Z != VSF_FLAG_Z || \ - WRITEMASK_W != VSF_FLAG_W -#error Cannot change these! -#endif - -#define SCALAR_FLAG (1<<31) -#define FLAG_MASK (1<<31) -#define OP_MASK (0xf) /* we are unlikely to have more than 15 */ -#define OPN(operator, ip) {#operator, OPCODE_##operator, ip} - -static struct{ - char *name; - int opcode; - unsigned long ip; /* number of input operands and flags */ -}op_names[]={ - OPN(ABS, 1), - OPN(ADD, 2), - OPN(ARL, 1|SCALAR_FLAG), - OPN(DP3, 2), - OPN(DP4, 2), - OPN(DPH, 2), - OPN(DST, 2), - OPN(EX2, 1|SCALAR_FLAG), - OPN(EXP, 1|SCALAR_FLAG), - OPN(FLR, 1), - OPN(FRC, 1), - OPN(LG2, 1|SCALAR_FLAG), - OPN(LIT, 1), - OPN(LOG, 1|SCALAR_FLAG), - OPN(MAD, 3), - OPN(MAX, 2), - OPN(MIN, 2), - OPN(MOV, 1), - OPN(MUL, 2), - OPN(POW, 2|SCALAR_FLAG), - OPN(RCP, 1|SCALAR_FLAG), - OPN(RSQ, 1|SCALAR_FLAG), - OPN(SGE, 2), - OPN(SLT, 2), - OPN(SUB, 2), - OPN(SWZ, 1), - OPN(XPD, 2), - OPN(RCC, 0), //extra - OPN(PRINT, 0), - OPN(END, 0), -}; -#undef OPN - -int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program_cont *vp, float *dst) -{ - int pi; - struct gl_vertex_program *mesa_vp = &vp->mesa_program; - float *dst_o=dst; - struct gl_program_parameter_list *paramList; - - if (mesa_vp->IsNVProgram) { - _mesa_init_vp_per_primitive_registers(ctx); - - for (pi=0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { - *dst++=ctx->VertexProgram.Parameters[pi][0]; - *dst++=ctx->VertexProgram.Parameters[pi][1]; - *dst++=ctx->VertexProgram.Parameters[pi][2]; - *dst++=ctx->VertexProgram.Parameters[pi][3]; - } - return dst - dst_o; - } - - assert(mesa_vp->Base.Parameters); - _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); - - if(mesa_vp->Base.Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH){ - fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); - exit(-1); - } - - paramList = mesa_vp->Base.Parameters; - for(pi=0; pi < paramList->NumParameters; pi++){ - switch(paramList->Parameters[pi].Type){ - - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); - case PROGRAM_CONSTANT: - *dst++=paramList->ParameterValues[pi][0]; - *dst++=paramList->ParameterValues[pi][1]; - *dst++=paramList->ParameterValues[pi][2]; - *dst++=paramList->ParameterValues[pi][3]; - break; - - default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); - } - - } - - return dst - dst_o; -} - -static unsigned long t_dst_mask(GLuint mask) -{ - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & VSF_FLAG_ALL; -} - -static unsigned long t_dst_class(enum register_file file) -{ - - switch(file){ - case PROGRAM_TEMPORARY: - return VSF_OUT_CLASS_TMP; - case PROGRAM_OUTPUT: - return VSF_OUT_CLASS_RESULT; - case PROGRAM_ADDRESS: - return VSF_OUT_CLASS_ADDR; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - exit(0); - } -} - -static unsigned long t_dst_index(struct r300_vertex_program *vp, struct prog_dst_register *dst) -{ - if(dst->File == PROGRAM_OUTPUT) - return vp->outputs[dst->Index]; - - return dst->Index; -} - -static unsigned long t_src_class(enum register_file file) -{ - - switch(file){ - case PROGRAM_TEMPORARY: - return VSF_IN_CLASS_TMP; - - case PROGRAM_INPUT: - return VSF_IN_CLASS_ATTR; - - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - return VSF_IN_CLASS_PARAM; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - exit(0); - } -} - -static __inline unsigned long t_swizzle(GLubyte swizzle) -{ -/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ - return swizzle; -} - -#if 0 -static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) -{ - int i; - - if(vp == NULL){ - fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller); - return ; - } - - fprintf(stderr, "%s:<", caller); - for(i=0; i < VERT_ATTRIB_MAX; i++) - fprintf(stderr, "%d ", vp->inputs[i]); - fprintf(stderr, ">\n"); - -} -#endif - -static unsigned long t_src_index(struct r300_vertex_program *vp, struct prog_src_register *src) -{ - int i; - int max_reg=-1; - - if(src->File == PROGRAM_INPUT){ - if(vp->inputs[src->Index] != -1) - return vp->inputs[src->Index]; - - for(i=0; i < VERT_ATTRIB_MAX; i++) - if(vp->inputs[i] > max_reg) - max_reg=vp->inputs[i]; - - vp->inputs[src->Index]=max_reg+1; - - //vp_dump_inputs(vp, __FUNCTION__); - - return vp->inputs[src->Index]; - }else{ - if (src->Index < 0) { - fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n"); - return 0; - } - return src->Index; - } -} - -static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src) -{ - /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return MAKE_VSF_SOURCE(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 1)), - t_swizzle(GET_SWZ(src->Swizzle, 2)), - t_swizzle(GET_SWZ(src->Swizzle, 3)), - t_src_class(src->File), - src->NegateBase) | (src->RelAddr << 4); -} - -static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src) -{ - - return MAKE_VSF_SOURCE(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_src_class(src->File), - src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); -} - -static unsigned long t_opcode(enum prog_opcode opcode) -{ - - switch(opcode){ - case OPCODE_ARL: return R300_VPI_OUT_OP_ARL; - case OPCODE_DST: return R300_VPI_OUT_OP_DST; - case OPCODE_EX2: return R300_VPI_OUT_OP_EX2; - case OPCODE_EXP: return R300_VPI_OUT_OP_EXP; - case OPCODE_FRC: return R300_VPI_OUT_OP_FRC; - case OPCODE_LG2: return R300_VPI_OUT_OP_LG2; - case OPCODE_LOG: return R300_VPI_OUT_OP_LOG; - case OPCODE_MAX: return R300_VPI_OUT_OP_MAX; - case OPCODE_MIN: return R300_VPI_OUT_OP_MIN; - case OPCODE_MUL: return R300_VPI_OUT_OP_MUL; - case OPCODE_RCP: return R300_VPI_OUT_OP_RCP; - case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ; - case OPCODE_SGE: return R300_VPI_OUT_OP_SGE; - case OPCODE_SLT: return R300_VPI_OUT_OP_SLT; - case OPCODE_DP4: return R300_VPI_OUT_OP_DOT; - - default: - fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode); - } - exit(-1); - return 0; -} - -static unsigned long op_operands(enum prog_opcode opcode) -{ - int i; - - /* Can we trust mesas opcodes to be in order ? */ - for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++) - if(op_names[i].opcode == opcode) - return op_names[i].ip; - - fprintf(stderr, "op %d not found in op_names\n", opcode); - exit(-1); - return 0; -} - -static GLboolean valid_dst(struct r300_vertex_program *vp, struct prog_dst_register *dst) -{ - if(dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1){ - WARN_ONCE("Output %d not used by fragment program\n", dst->Index); - return GL_FALSE; - }else if(dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); - } - - return GL_TRUE; -} - -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ - t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ - (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ - t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \ - -#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) - -#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) - -#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) - -#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) - -#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) - -#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) - -/* DP4 version seems to trigger some hw peculiarity */ -//#define PREFER_DP4 - -#define FREE_TEMPS() \ - do { \ - if(u_temp_i < vp->num_temporaries) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \ - vp->native = GL_FALSE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) - -static void r300_translate_vertex_shader(struct r300_vertex_program *vp, struct prog_instruction *vpi) -{ - int i, cur_reg=0; - VERTEX_SHADER_INSTRUCTION *o_inst; - unsigned long operands; - int are_srcs_scalar; - unsigned long hw_op; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; - struct prog_src_register src[3]; - - vp->pos_end=0; /* Not supported yet */ - vp->program.length=0; - /*vp->num_temporaries=mesa_vp->Base.NumTemporaries;*/ - - for(i=0; i < VERT_ATTRIB_MAX; i++) - vp->inputs[i] = -1; - - for(i=0; i < VERT_RESULT_MAX; i++) - vp->outputs[i] = -1; - - assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); - - /* Assign outputs */ - if(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) - vp->outputs[VERT_RESULT_HPOS] = cur_reg++; - - if(vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) - vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; - - if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) - vp->outputs[VERT_RESULT_COL0] = cur_reg++; - - if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) - vp->outputs[VERT_RESULT_COL1] = cur_reg++; - -#if 0 /* Not supported yet */ - if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; - - if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; - - if(vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) - vp->outputs[VERT_RESULT_FOGC] = cur_reg++; -#endif - - for(i=VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) - if(vp->key.OutputsWritten & (1 << i)) - vp->outputs[i] = cur_reg++; - - vp->translated = GL_TRUE; - vp->native = GL_TRUE; - - o_inst=vp->program.body.i; - for(; vpi->Opcode != OPCODE_END; vpi++, o_inst++){ - FREE_TEMPS(); - - if(!valid_dst(vp, &vpi->DstReg)) - { - /* redirect result to unused temp */ - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = u_temp_i; - } - - operands=op_operands(vpi->Opcode); - are_srcs_scalar=operands & SCALAR_FLAG; - operands &= OP_MASK; - - for(i=0; i < operands; i++) - src[i]=vpi->SrcReg[i]; - - if(operands == 3){ /* TODO: scalars */ - if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){ - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, - VSF_FLAG_ALL, VSF_OUT_CLASS_TMP); - - o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), - SWIZZLE_X, SWIZZLE_Y, - SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4); - - o_inst->src2=ZERO_SRC_2; - o_inst->src3=ZERO_SRC_2; - o_inst++; - - src[2].File=PROGRAM_TEMPORARY; - src[2].Index=u_temp_i; - src[2].RelAddr=0; - u_temp_i--; - } - - } - - if(operands >= 2){ - if( CMP_SRCS(src[1], src[0]) ){ - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, - VSF_FLAG_ALL, VSF_OUT_CLASS_TMP); - - o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - SWIZZLE_X, SWIZZLE_Y, - SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4); - - o_inst->src2=ZERO_SRC_0; - o_inst->src3=ZERO_SRC_0; - o_inst++; - - src[0].File=PROGRAM_TEMPORARY; - src[0].Index=u_temp_i; - src[0].RelAddr=0; - u_temp_i--; - } - } - - /* These ops need special handling. */ - switch(vpi->Opcode){ - case OPCODE_POW: - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - o_inst->src1=t_src_scalar(vp, &src[0]); - o_inst->src2=ZERO_SRC_0; - o_inst->src3=t_src_scalar(vp, &src[1]); - goto next; - - case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - case OPCODE_SWZ: -#if 1 - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=ZERO_SRC_0; - o_inst->src3=ZERO_SRC_0; -#else - hw_op=(src[0].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; - - o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=ONE_SRC_0; - o_inst->src3=ZERO_SRC_0; -#endif - - goto next; - - case OPCODE_ADD: -#if 1 - hw_op=(src[0].File == PROGRAM_TEMPORARY && - src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; - - o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - o_inst->src1=ONE_SRC_0; - o_inst->src2=t_src(vp, &src[0]); - o_inst->src3=t_src(vp, &src[1]); -#else - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=t_src(vp, &src[1]); - o_inst->src3=ZERO_SRC_1; - -#endif - goto next; - - case OPCODE_MAD: - hw_op=(src[0].File == PROGRAM_TEMPORARY && - src[1].File == PROGRAM_TEMPORARY && - src[2].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; - - o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=t_src(vp, &src[1]); - o_inst->src3=t_src(vp, &src[2]); - goto next; - - case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */ - hw_op=(src[0].File == PROGRAM_TEMPORARY && - src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; - - o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=t_src(vp, &src[1]); - - o_inst->src3=ZERO_SRC_1; - goto next; - - case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - - o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); - - o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[1].RelAddr << 4); - - o_inst->src3=ZERO_SRC_1; - goto next; - - case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W -#if 1 - hw_op=(src[0].File == PROGRAM_TEMPORARY && - src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; - - o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=ONE_SRC_0; - o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); -#else - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); - o_inst->src3=0; -#endif - goto next; - - case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); - o_inst->src3=0; - goto next; - - case OPCODE_FLR: - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP); - - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=ZERO_SRC_0; - o_inst->src3=ZERO_SRC_0; - o_inst++; - - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=MAKE_VSF_SOURCE(u_temp_i, - VSF_IN_COMPONENT_X, - VSF_IN_COMPONENT_Y, - VSF_IN_COMPONENT_Z, - VSF_IN_COMPONENT_W, - VSF_IN_CLASS_TMP, - /* Not 100% sure about this */ - (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); - - o_inst->src3=ZERO_SRC_0; - u_temp_i--; - goto next; - - case OPCODE_LG2:// LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - - o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); - o_inst->src2=ZERO_SRC_0; - o_inst->src3=ZERO_SRC_0; - goto next; - - case OPCODE_LIT://LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); - o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); - o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); - goto next; - - case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - - o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - VSF_IN_COMPONENT_ONE, - t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); - o_inst->src2=t_src(vp, &src[1]); - o_inst->src3=ZERO_SRC_1; - goto next; - - case OPCODE_XPD: - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - NOTE: might need MAD_2 - */ - - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP); - - o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); - - o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1].File), - src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); - - o_inst->src3=ZERO_SRC_1; - o_inst++; - u_temp_i--; - - o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - - o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); - - o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); - - o_inst->src3=MAKE_VSF_SOURCE(u_temp_i+1, - VSF_IN_COMPONENT_X, - VSF_IN_COMPONENT_Y, - VSF_IN_COMPONENT_Z, - VSF_IN_COMPONENT_W, - VSF_IN_CLASS_TMP, - VSF_FLAG_NONE); - - goto next; - - case OPCODE_RCC: - fprintf(stderr, "Dont know how to handle op %d yet\n", vpi->Opcode); - exit(-1); - break; - case OPCODE_END: - break; - default: - break; - } - - o_inst->op=MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); - - if(are_srcs_scalar){ - switch(operands){ - case 1: - o_inst->src1=t_src_scalar(vp, &src[0]); - o_inst->src2=ZERO_SRC_0; - o_inst->src3=ZERO_SRC_0; - break; - - case 2: - o_inst->src1=t_src_scalar(vp, &src[0]); - o_inst->src2=t_src_scalar(vp, &src[1]); - o_inst->src3=ZERO_SRC_1; - break; - - case 3: - o_inst->src1=t_src_scalar(vp, &src[0]); - o_inst->src2=t_src_scalar(vp, &src[1]); - o_inst->src3=t_src_scalar(vp, &src[2]); - break; - - default: - fprintf(stderr, "scalars and op RCC not handled yet"); - exit(-1); - break; - } - }else{ - switch(operands){ - case 1: - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=ZERO_SRC_0; - o_inst->src3=ZERO_SRC_0; - break; - - case 2: - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=t_src(vp, &src[1]); - o_inst->src3=ZERO_SRC_1; - break; - - case 3: - o_inst->src1=t_src(vp, &src[0]); - o_inst->src2=t_src(vp, &src[1]); - o_inst->src3=t_src(vp, &src[2]); - break; - - default: - fprintf(stderr, "scalars and op RCC not handled yet"); - exit(-1); - break; - } - } - next: ; - } - - /* Will most likely segfault before we get here... fix later. */ - if(o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH/4) { - vp->program.length = 0; - vp->native = GL_FALSE; - return ; - } - vp->program.length=(o_inst - vp->program.body.i) * 4; -#if 0 - fprintf(stderr, "hw program:\n"); - for(i=0; i < vp->program.length; i++) - fprintf(stderr, "%08x\n", vp->program.body.d[i]); -#endif -} - -static void position_invariant(struct gl_program *prog) -{ - struct prog_instruction *vpi; - struct gl_program_parameter_list *paramList; - int i; - - GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX }; - -#ifdef PREFER_DP4 - tokens[5] = STATE_MATRIX; -#else - tokens[5] = STATE_MATRIX_TRANSPOSE; -#endif - paramList = prog->Parameters; - - vpi = _mesa_alloc_instructions (prog->NumInstructions + 4); - _mesa_init_instructions (vpi, prog->NumInstructions + 4); - - for (i=0; i < 4; i++) { - GLint idx; - tokens[3] = tokens[4] = i; - idx = _mesa_add_state_reference(paramList, tokens); -#ifdef PREFER_DP4 - vpi[i].Opcode = OPCODE_DP4; - vpi[i].StringPos = 0; - vpi[i].Data = 0; - - vpi[i].DstReg.File = PROGRAM_OUTPUT; - vpi[i].DstReg.Index = VERT_RESULT_HPOS; - vpi[i].DstReg.WriteMask = 1 << i; - vpi[i].DstReg.CondMask = COND_TR; - - vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; - vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - vpi[i].SrcReg[1].File = PROGRAM_INPUT; - vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW; -#else - if (i == 0) - vpi[i].Opcode = OPCODE_MUL; - else - vpi[i].Opcode = OPCODE_MAD; - - vpi[i].StringPos = 0; - vpi[i].Data = 0; - - if (i == 3) - vpi[i].DstReg.File = PROGRAM_OUTPUT; - else - vpi[i].DstReg.File = PROGRAM_TEMPORARY; - vpi[i].DstReg.Index = 0; - vpi[i].DstReg.WriteMask = 0xf; - vpi[i].DstReg.CondMask = COND_TR; - - vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; - vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - vpi[i].SrcReg[1].File = PROGRAM_INPUT; - vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i); - - if (i > 0) { - vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; - vpi[i].SrcReg[2].Index = 0; - vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW; - } -#endif - } - - _mesa_memcpy(&vpi[i], prog->Instructions, prog->NumInstructions * sizeof(struct prog_instruction)); - - free(prog->Instructions); - - prog->Instructions = vpi; - - prog->NumInstructions += 4; - vpi = &prog->Instructions[prog->NumInstructions-1]; - - assert(vpi->Opcode == OPCODE_END); -} - -static void insert_wpos(struct r300_vertex_program *vp, - struct gl_program *prog, - GLuint temp_index) -{ - struct prog_instruction *vpi; - struct prog_instruction *vpi_insert; - int i = 0; - - vpi = _mesa_alloc_instructions (prog->NumInstructions + 2); - _mesa_init_instructions (vpi, prog->NumInstructions + 2); - /* all but END */ - _mesa_memcpy(vpi, prog->Instructions, (prog->NumInstructions - 1) * sizeof(struct prog_instruction)); - /* END */ - _mesa_memcpy(&vpi[prog->NumInstructions + 1], &prog->Instructions[prog->NumInstructions - 1], - sizeof(struct prog_instruction)); - vpi_insert = &vpi[prog->NumInstructions - 1]; - - vpi_insert[i].Opcode = OPCODE_MOV; - - vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; - vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; - vpi_insert[i].DstReg.CondMask = COND_TR; - - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - i++; - - vpi_insert[i].Opcode = OPCODE_MOV; - - vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; - vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0+vp->wpos_idx; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; - vpi_insert[i].DstReg.CondMask = COND_TR; - - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - i++; - - free(prog->Instructions); - - prog->Instructions = vpi; - - prog->NumInstructions += i; - vpi = &prog->Instructions[prog->NumInstructions-1]; - - assert(vpi->Opcode == OPCODE_END); -} - -static void pos_as_texcoord(struct r300_vertex_program *vp, - struct gl_program *prog) -{ - struct prog_instruction *vpi; - GLuint tempregi = prog->NumTemporaries; - /* should do something else if no temps left... */ - prog->NumTemporaries++; - - for(vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++){ - if( vpi->DstReg.File == PROGRAM_OUTPUT && - vpi->DstReg.Index == VERT_RESULT_HPOS ){ - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = tempregi; - } - } - insert_wpos(vp, prog, tempregi); -} - -static struct r300_vertex_program *build_program(struct r300_vertex_program_key *wanted_key, - struct gl_vertex_program *mesa_vp, - GLint wpos_idx) -{ - struct r300_vertex_program *vp; - - vp = _mesa_calloc(sizeof(*vp)); - _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - - vp->wpos_idx = wpos_idx; - - if(mesa_vp->IsPositionInvariant) { - position_invariant(&mesa_vp->Base); - } - - if(wpos_idx > -1) - pos_as_texcoord(vp, &mesa_vp->Base); - - assert(mesa_vp->Base.NumInstructions); - - vp->num_temporaries=mesa_vp->Base.NumTemporaries; - - r300_translate_vertex_shader(vp, mesa_vp->Base.Instructions); - - return vp; -} - -void r300_select_vertex_shader(r300ContextPtr r300) -{ - GLcontext *ctx = ctx = r300->radeon.glCtx; - GLuint InputsRead; - struct r300_vertex_program_key wanted_key = { 0 }; - GLint i; - struct r300_vertex_program_cont *vpc; - struct r300_vertex_program *vp; - GLint wpos_idx; - - vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - - wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS; - - wpos_idx = -1; - if (InputsRead & FRAG_BIT_WPOS){ - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (!(InputsRead & (FRAG_BIT_TEX0 << i))) - break; - - if(i == ctx->Const.MaxTextureUnits){ - fprintf(stderr, "\tno free texcoord found\n"); - exit(0); - } - - InputsRead |= (FRAG_BIT_TEX0 << i); - wpos_idx = i; - } - - if (InputsRead & FRAG_BIT_COL0) - wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0; - - if ((InputsRead & FRAG_BIT_COL1) /*|| - (InputsRead & FRAG_BIT_FOGC)*/) - wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1; - - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (InputsRead & (FRAG_BIT_TEX0 << i)) - wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); - - wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; - if(vpc->mesa_program.IsPositionInvariant) { - /* we wan't position don't we ? */ - wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); - } - - for (vp = vpc->progs; vp; vp = vp->next) - if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { - r300->selected_vp = vp; - return ; - } - - //_mesa_print_program(&vpc->mesa_program.Base); - - vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx); - vp->next = vpc->progs; - vpc->progs = vp; - r300->selected_vp = vp; -} diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c new file mode 100644 index 0000000000..0c43270d75 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -0,0 +1,1116 @@ +/************************************************************************** + +Copyright (C) 2005 Aapo Tahkola. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Aapo Tahkola + */ +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "program.h" +#include "nvvertexec.h" + +#include "r300_context.h" +#include "r300_program.h" +#include "program_instruction.h" + +#if SWIZZLE_X != VSF_IN_COMPONENT_X || \ + SWIZZLE_Y != VSF_IN_COMPONENT_Y || \ + SWIZZLE_Z != VSF_IN_COMPONENT_Z || \ + SWIZZLE_W != VSF_IN_COMPONENT_W || \ + SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \ + SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \ + WRITEMASK_X != VSF_FLAG_X || \ + WRITEMASK_Y != VSF_FLAG_Y || \ + WRITEMASK_Z != VSF_FLAG_Z || \ + WRITEMASK_W != VSF_FLAG_W +#error Cannot change these! +#endif + +#define SCALAR_FLAG (1<<31) +#define FLAG_MASK (1<<31) +#define OP_MASK (0xf) /* we are unlikely to have more than 15 */ +#define OPN(operator, ip) {#operator, OPCODE_##operator, ip} + +static struct{ + char *name; + int opcode; + unsigned long ip; /* number of input operands and flags */ +}op_names[]={ + OPN(ABS, 1), + OPN(ADD, 2), + OPN(ARL, 1|SCALAR_FLAG), + OPN(DP3, 2), + OPN(DP4, 2), + OPN(DPH, 2), + OPN(DST, 2), + OPN(EX2, 1|SCALAR_FLAG), + OPN(EXP, 1|SCALAR_FLAG), + OPN(FLR, 1), + OPN(FRC, 1), + OPN(LG2, 1|SCALAR_FLAG), + OPN(LIT, 1), + OPN(LOG, 1|SCALAR_FLAG), + OPN(MAD, 3), + OPN(MAX, 2), + OPN(MIN, 2), + OPN(MOV, 1), + OPN(MUL, 2), + OPN(POW, 2|SCALAR_FLAG), + OPN(RCP, 1|SCALAR_FLAG), + OPN(RSQ, 1|SCALAR_FLAG), + OPN(SGE, 2), + OPN(SLT, 2), + OPN(SUB, 2), + OPN(SWZ, 1), + OPN(XPD, 2), + OPN(RCC, 0), //extra + OPN(PRINT, 0), + OPN(END, 0), +}; +#undef OPN + +int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program_cont *vp, float *dst) +{ + int pi; + struct gl_vertex_program *mesa_vp = &vp->mesa_program; + float *dst_o=dst; + struct gl_program_parameter_list *paramList; + + if (mesa_vp->IsNVProgram) { + _mesa_init_vp_per_primitive_registers(ctx); + + for (pi=0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { + *dst++=ctx->VertexProgram.Parameters[pi][0]; + *dst++=ctx->VertexProgram.Parameters[pi][1]; + *dst++=ctx->VertexProgram.Parameters[pi][2]; + *dst++=ctx->VertexProgram.Parameters[pi][3]; + } + return dst - dst_o; + } + + assert(mesa_vp->Base.Parameters); + _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); + + if(mesa_vp->Base.Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH){ + fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); + exit(-1); + } + + paramList = mesa_vp->Base.Parameters; + for(pi=0; pi < paramList->NumParameters; pi++){ + switch(paramList->Parameters[pi].Type){ + + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); + case PROGRAM_CONSTANT: + *dst++=paramList->ParameterValues[pi][0]; + *dst++=paramList->ParameterValues[pi][1]; + *dst++=paramList->ParameterValues[pi][2]; + *dst++=paramList->ParameterValues[pi][3]; + break; + + default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); + } + + } + + return dst - dst_o; +} + +static unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & VSF_FLAG_ALL; +} + +static unsigned long t_dst_class(enum register_file file) +{ + + switch(file){ + case PROGRAM_TEMPORARY: + return VSF_OUT_CLASS_TMP; + case PROGRAM_OUTPUT: + return VSF_OUT_CLASS_RESULT; + case PROGRAM_ADDRESS: + return VSF_OUT_CLASS_ADDR; + /* + case PROGRAM_INPUT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + exit(0); + } +} + +static unsigned long t_dst_index(struct r300_vertex_program *vp, struct prog_dst_register *dst) +{ + if(dst->File == PROGRAM_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(enum register_file file) +{ + + switch(file){ + case PROGRAM_TEMPORARY: + return VSF_IN_CLASS_TMP; + + case PROGRAM_INPUT: + return VSF_IN_CLASS_ATTR; + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + return VSF_IN_CLASS_PARAM; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + exit(0); + } +} + +static __inline unsigned long t_swizzle(GLubyte swizzle) +{ +/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +#if 0 +static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) +{ + int i; + + if(vp == NULL){ + fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller); + return ; + } + + fprintf(stderr, "%s:<", caller); + for(i=0; i < VERT_ATTRIB_MAX; i++) + fprintf(stderr, "%d ", vp->inputs[i]); + fprintf(stderr, ">\n"); + +} +#endif + +static unsigned long t_src_index(struct r300_vertex_program *vp, struct prog_src_register *src) +{ + int i; + int max_reg=-1; + + if(src->File == PROGRAM_INPUT){ + if(vp->inputs[src->Index] != -1) + return vp->inputs[src->Index]; + + for(i=0; i < VERT_ATTRIB_MAX; i++) + if(vp->inputs[i] > max_reg) + max_reg=vp->inputs[i]; + + vp->inputs[src->Index]=max_reg+1; + + //vp_dump_inputs(vp, __FUNCTION__); + + return vp->inputs[src->Index]; + }else{ + if (src->Index < 0) { + fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n"); + return 0; + } + return src->Index; + } +} + +static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src) +{ + /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return MAKE_VSF_SOURCE(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->NegateBase) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src) +{ + + return MAKE_VSF_SOURCE(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); +} + +static unsigned long t_opcode(enum prog_opcode opcode) +{ + + switch(opcode){ + case OPCODE_ARL: return R300_VPI_OUT_OP_ARL; + case OPCODE_DST: return R300_VPI_OUT_OP_DST; + case OPCODE_EX2: return R300_VPI_OUT_OP_EX2; + case OPCODE_EXP: return R300_VPI_OUT_OP_EXP; + case OPCODE_FRC: return R300_VPI_OUT_OP_FRC; + case OPCODE_LG2: return R300_VPI_OUT_OP_LG2; + case OPCODE_LOG: return R300_VPI_OUT_OP_LOG; + case OPCODE_MAX: return R300_VPI_OUT_OP_MAX; + case OPCODE_MIN: return R300_VPI_OUT_OP_MIN; + case OPCODE_MUL: return R300_VPI_OUT_OP_MUL; + case OPCODE_RCP: return R300_VPI_OUT_OP_RCP; + case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ; + case OPCODE_SGE: return R300_VPI_OUT_OP_SGE; + case OPCODE_SLT: return R300_VPI_OUT_OP_SLT; + case OPCODE_DP4: return R300_VPI_OUT_OP_DOT; + + default: + fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode); + } + exit(-1); + return 0; +} + +static unsigned long op_operands(enum prog_opcode opcode) +{ + int i; + + /* Can we trust mesas opcodes to be in order ? */ + for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++) + if(op_names[i].opcode == opcode) + return op_names[i].ip; + + fprintf(stderr, "op %d not found in op_names\n", opcode); + exit(-1); + return 0; +} + +static GLboolean valid_dst(struct r300_vertex_program *vp, struct prog_dst_register *dst) +{ + if(dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1){ + WARN_ONCE("Output %d not used by fragment program\n", dst->Index); + return GL_FALSE; + }else if(dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } + + return GL_TRUE; +} + +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ + ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ + t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ + (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ + t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \ + +#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) + +#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + +#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) + +#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + +/* DP4 version seems to trigger some hw peculiarity */ +//#define PREFER_DP4 + +#define FREE_TEMPS() \ + do { \ + if(u_temp_i < vp->num_temporaries) { \ + WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \ + vp->native = GL_FALSE; \ + } \ + u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ + } while (0) + +static void r300_translate_vertex_shader(struct r300_vertex_program *vp, struct prog_instruction *vpi) +{ + int i, cur_reg=0; + VERTEX_SHADER_INSTRUCTION *o_inst; + unsigned long operands; + int are_srcs_scalar; + unsigned long hw_op; + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; + struct prog_src_register src[3]; + + vp->pos_end=0; /* Not supported yet */ + vp->program.length=0; + /*vp->num_temporaries=mesa_vp->Base.NumTemporaries;*/ + + for(i=0; i < VERT_ATTRIB_MAX; i++) + vp->inputs[i] = -1; + + for(i=0; i < VERT_RESULT_MAX; i++) + vp->outputs[i] = -1; + + assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); + + /* Assign outputs */ + if(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) + vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + + if(vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + + if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) + vp->outputs[VERT_RESULT_COL0] = cur_reg++; + + if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) + vp->outputs[VERT_RESULT_COL1] = cur_reg++; + +#if 0 /* Not supported yet */ + if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) + vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + + if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) + vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + + if(vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) + vp->outputs[VERT_RESULT_FOGC] = cur_reg++; +#endif + + for(i=VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) + if(vp->key.OutputsWritten & (1 << i)) + vp->outputs[i] = cur_reg++; + + vp->translated = GL_TRUE; + vp->native = GL_TRUE; + + o_inst=vp->program.body.i; + for(; vpi->Opcode != OPCODE_END; vpi++, o_inst++){ + FREE_TEMPS(); + + if(!valid_dst(vp, &vpi->DstReg)) + { + /* redirect result to unused temp */ + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = u_temp_i; + } + + operands=op_operands(vpi->Opcode); + are_srcs_scalar=operands & SCALAR_FLAG; + operands &= OP_MASK; + + for(i=0; i < operands; i++) + src[i]=vpi->SrcReg[i]; + + if(operands == 3){ /* TODO: scalars */ + if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){ + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, + VSF_FLAG_ALL, VSF_OUT_CLASS_TMP); + + o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), + SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_Z, SWIZZLE_W, + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4); + + o_inst->src2=ZERO_SRC_2; + o_inst->src3=ZERO_SRC_2; + o_inst++; + + src[2].File=PROGRAM_TEMPORARY; + src[2].Index=u_temp_i; + src[2].RelAddr=0; + u_temp_i--; + } + + } + + if(operands >= 2){ + if( CMP_SRCS(src[1], src[0]) ){ + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, + VSF_FLAG_ALL, VSF_OUT_CLASS_TMP); + + o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_Z, SWIZZLE_W, + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + o_inst->src2=ZERO_SRC_0; + o_inst->src3=ZERO_SRC_0; + o_inst++; + + src[0].File=PROGRAM_TEMPORARY; + src[0].Index=u_temp_i; + src[0].RelAddr=0; + u_temp_i--; + } + } + + /* These ops need special handling. */ + switch(vpi->Opcode){ + case OPCODE_POW: + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + o_inst->src1=t_src_scalar(vp, &src[0]); + o_inst->src2=ZERO_SRC_0; + o_inst->src3=t_src_scalar(vp, &src[1]); + goto next; + + case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + case OPCODE_SWZ: +#if 1 + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=ZERO_SRC_0; + o_inst->src3=ZERO_SRC_0; +#else + hw_op=(src[0].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; + + o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=ONE_SRC_0; + o_inst->src3=ZERO_SRC_0; +#endif + + goto next; + + case OPCODE_ADD: +#if 1 + hw_op=(src[0].File == PROGRAM_TEMPORARY && + src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; + + o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + o_inst->src1=ONE_SRC_0; + o_inst->src2=t_src(vp, &src[0]); + o_inst->src3=t_src(vp, &src[1]); +#else + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=t_src(vp, &src[1]); + o_inst->src3=ZERO_SRC_1; + +#endif + goto next; + + case OPCODE_MAD: + hw_op=(src[0].File == PROGRAM_TEMPORARY && + src[1].File == PROGRAM_TEMPORARY && + src[2].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; + + o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=t_src(vp, &src[1]); + o_inst->src3=t_src(vp, &src[2]); + goto next; + + case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */ + hw_op=(src[0].File == PROGRAM_TEMPORARY && + src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; + + o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=t_src(vp, &src[1]); + + o_inst->src3=ZERO_SRC_1; + goto next; + + case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + + o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[1].File), + src[1].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + + o_inst->src3=ZERO_SRC_1; + goto next; + + case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W +#if 1 + hw_op=(src[0].File == PROGRAM_TEMPORARY && + src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD; + + o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=ONE_SRC_0; + o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); +#else + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + o_inst->src3=0; +#endif + goto next; + + case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + o_inst->src3=0; + goto next; + + case OPCODE_FLR: + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP); + + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=ZERO_SRC_0; + o_inst->src3=ZERO_SRC_0; + o_inst++; + + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=MAKE_VSF_SOURCE(u_temp_i, + VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, + VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, + VSF_IN_CLASS_TMP, + /* Not 100% sure about this */ + (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); + + o_inst->src3=ZERO_SRC_0; + u_temp_i--; + goto next; + + case OPCODE_LG2:// LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + + o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_src_class(src[0].File), + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + o_inst->src2=ZERO_SRC_0; + o_inst->src3=ZERO_SRC_0; + goto next; + + case OPCODE_LIT://LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_src_class(src[0].File), + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_src_class(src[0].File), + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + goto next; + + case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + + o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + VSF_IN_COMPONENT_ONE, + t_src_class(src[0].File), + src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + o_inst->src2=t_src(vp, &src[1]); + o_inst->src3=ZERO_SRC_1; + goto next; + + case OPCODE_XPD: + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + NOTE: might need MAD_2 + */ + + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP); + + o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + + o_inst->src3=ZERO_SRC_1; + o_inst++; + u_temp_i--; + + o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + + o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + + o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + o_inst->src3=MAKE_VSF_SOURCE(u_temp_i+1, + VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, + VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, + VSF_IN_CLASS_TMP, + VSF_FLAG_NONE); + + goto next; + + case OPCODE_RCC: + fprintf(stderr, "Dont know how to handle op %d yet\n", vpi->Opcode); + exit(-1); + break; + case OPCODE_END: + break; + default: + break; + } + + o_inst->op=MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); + + if(are_srcs_scalar){ + switch(operands){ + case 1: + o_inst->src1=t_src_scalar(vp, &src[0]); + o_inst->src2=ZERO_SRC_0; + o_inst->src3=ZERO_SRC_0; + break; + + case 2: + o_inst->src1=t_src_scalar(vp, &src[0]); + o_inst->src2=t_src_scalar(vp, &src[1]); + o_inst->src3=ZERO_SRC_1; + break; + + case 3: + o_inst->src1=t_src_scalar(vp, &src[0]); + o_inst->src2=t_src_scalar(vp, &src[1]); + o_inst->src3=t_src_scalar(vp, &src[2]); + break; + + default: + fprintf(stderr, "scalars and op RCC not handled yet"); + exit(-1); + break; + } + }else{ + switch(operands){ + case 1: + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=ZERO_SRC_0; + o_inst->src3=ZERO_SRC_0; + break; + + case 2: + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=t_src(vp, &src[1]); + o_inst->src3=ZERO_SRC_1; + break; + + case 3: + o_inst->src1=t_src(vp, &src[0]); + o_inst->src2=t_src(vp, &src[1]); + o_inst->src3=t_src(vp, &src[2]); + break; + + default: + fprintf(stderr, "scalars and op RCC not handled yet"); + exit(-1); + break; + } + } + next: ; + } + + /* Will most likely segfault before we get here... fix later. */ + if(o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH/4) { + vp->program.length = 0; + vp->native = GL_FALSE; + return ; + } + vp->program.length=(o_inst - vp->program.body.i) * 4; +#if 0 + fprintf(stderr, "hw program:\n"); + for(i=0; i < vp->program.length; i++) + fprintf(stderr, "%08x\n", vp->program.body.d[i]); +#endif +} + +static void position_invariant(struct gl_program *prog) +{ + struct prog_instruction *vpi; + struct gl_program_parameter_list *paramList; + int i; + + GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX }; + +#ifdef PREFER_DP4 + tokens[5] = STATE_MATRIX; +#else + tokens[5] = STATE_MATRIX_TRANSPOSE; +#endif + paramList = prog->Parameters; + + vpi = _mesa_alloc_instructions (prog->NumInstructions + 4); + _mesa_init_instructions (vpi, prog->NumInstructions + 4); + + for (i=0; i < 4; i++) { + GLint idx; + tokens[3] = tokens[4] = i; + idx = _mesa_add_state_reference(paramList, tokens); +#ifdef PREFER_DP4 + vpi[i].Opcode = OPCODE_DP4; + vpi[i].StringPos = 0; + vpi[i].Data = 0; + + vpi[i].DstReg.File = PROGRAM_OUTPUT; + vpi[i].DstReg.Index = VERT_RESULT_HPOS; + vpi[i].DstReg.WriteMask = 1 << i; + vpi[i].DstReg.CondMask = COND_TR; + + vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; + vpi[i].SrcReg[0].Index = idx; + vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + vpi[i].SrcReg[1].File = PROGRAM_INPUT; + vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; + vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW; +#else + if (i == 0) + vpi[i].Opcode = OPCODE_MUL; + else + vpi[i].Opcode = OPCODE_MAD; + + vpi[i].StringPos = 0; + vpi[i].Data = 0; + + if (i == 3) + vpi[i].DstReg.File = PROGRAM_OUTPUT; + else + vpi[i].DstReg.File = PROGRAM_TEMPORARY; + vpi[i].DstReg.Index = 0; + vpi[i].DstReg.WriteMask = 0xf; + vpi[i].DstReg.CondMask = COND_TR; + + vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; + vpi[i].SrcReg[0].Index = idx; + vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + vpi[i].SrcReg[1].File = PROGRAM_INPUT; + vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; + vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i); + + if (i > 0) { + vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; + vpi[i].SrcReg[2].Index = 0; + vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW; + } +#endif + } + + _mesa_memcpy(&vpi[i], prog->Instructions, prog->NumInstructions * sizeof(struct prog_instruction)); + + free(prog->Instructions); + + prog->Instructions = vpi; + + prog->NumInstructions += 4; + vpi = &prog->Instructions[prog->NumInstructions-1]; + + assert(vpi->Opcode == OPCODE_END); +} + +static void insert_wpos(struct r300_vertex_program *vp, + struct gl_program *prog, + GLuint temp_index) +{ + struct prog_instruction *vpi; + struct prog_instruction *vpi_insert; + int i = 0; + + vpi = _mesa_alloc_instructions (prog->NumInstructions + 2); + _mesa_init_instructions (vpi, prog->NumInstructions + 2); + /* all but END */ + _mesa_memcpy(vpi, prog->Instructions, (prog->NumInstructions - 1) * sizeof(struct prog_instruction)); + /* END */ + _mesa_memcpy(&vpi[prog->NumInstructions + 1], &prog->Instructions[prog->NumInstructions - 1], + sizeof(struct prog_instruction)); + vpi_insert = &vpi[prog->NumInstructions - 1]; + + vpi_insert[i].Opcode = OPCODE_MOV; + + vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; + vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS; + vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; + vpi_insert[i].DstReg.CondMask = COND_TR; + + vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; + vpi_insert[i].SrcReg[0].Index = temp_index; + vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + i++; + + vpi_insert[i].Opcode = OPCODE_MOV; + + vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; + vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0+vp->wpos_idx; + vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; + vpi_insert[i].DstReg.CondMask = COND_TR; + + vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; + vpi_insert[i].SrcReg[0].Index = temp_index; + vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + i++; + + free(prog->Instructions); + + prog->Instructions = vpi; + + prog->NumInstructions += i; + vpi = &prog->Instructions[prog->NumInstructions-1]; + + assert(vpi->Opcode == OPCODE_END); +} + +static void pos_as_texcoord(struct r300_vertex_program *vp, + struct gl_program *prog) +{ + struct prog_instruction *vpi; + GLuint tempregi = prog->NumTemporaries; + /* should do something else if no temps left... */ + prog->NumTemporaries++; + + for(vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++){ + if( vpi->DstReg.File == PROGRAM_OUTPUT && + vpi->DstReg.Index == VERT_RESULT_HPOS ){ + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = tempregi; + } + } + insert_wpos(vp, prog, tempregi); +} + +static struct r300_vertex_program *build_program(struct r300_vertex_program_key *wanted_key, + struct gl_vertex_program *mesa_vp, + GLint wpos_idx) +{ + struct r300_vertex_program *vp; + + vp = _mesa_calloc(sizeof(*vp)); + _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); + + vp->wpos_idx = wpos_idx; + + if(mesa_vp->IsPositionInvariant) { + position_invariant(&mesa_vp->Base); + } + + if(wpos_idx > -1) + pos_as_texcoord(vp, &mesa_vp->Base); + + assert(mesa_vp->Base.NumInstructions); + + vp->num_temporaries=mesa_vp->Base.NumTemporaries; + + r300_translate_vertex_shader(vp, mesa_vp->Base.Instructions); + + return vp; +} + +void r300_select_vertex_shader(r300ContextPtr r300) +{ + GLcontext *ctx = ctx = r300->radeon.glCtx; + GLuint InputsRead; + struct r300_vertex_program_key wanted_key = { 0 }; + GLint i; + struct r300_vertex_program_cont *vpc; + struct r300_vertex_program *vp; + GLint wpos_idx; + + vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + + wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS; + + wpos_idx = -1; + if (InputsRead & FRAG_BIT_WPOS){ + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (!(InputsRead & (FRAG_BIT_TEX0 << i))) + break; + + if(i == ctx->Const.MaxTextureUnits){ + fprintf(stderr, "\tno free texcoord found\n"); + exit(0); + } + + InputsRead |= (FRAG_BIT_TEX0 << i); + wpos_idx = i; + } + + if (InputsRead & FRAG_BIT_COL0) + wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0; + + if ((InputsRead & FRAG_BIT_COL1) /*|| + (InputsRead & FRAG_BIT_FOGC)*/) + wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (InputsRead & (FRAG_BIT_TEX0 << i)) + wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + + wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; + if(vpc->mesa_program.IsPositionInvariant) { + /* we wan't position don't we ? */ + wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); + } + + for (vp = vpc->progs; vp; vp = vp->next) + if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { + r300->selected_vp = vp; + return ; + } + + //_mesa_print_program(&vpc->mesa_program.Base); + + vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx); + vp->next = vpc->progs; + vpc->progs = vp; + r300->selected_vp = vp; +} -- cgit v1.2.3 From 0c25d9ab198f79afee23ec1bf8ac61c4cd801d3a Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Thu, 15 Mar 2007 20:55:30 +0000 Subject: r300: Added _mesa_copy_instructions. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 2 +- src/mesa/drivers/dri/r300/r300_vertprog.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index e05abdb7c6..251fd26082 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -1853,7 +1853,7 @@ static void insert_wpos(struct gl_program *prog) fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); i++; - _mesa_memcpy(&fpi[i], prog->Instructions, prog->NumInstructions * sizeof(struct prog_instruction)); + _mesa_copy_instructions (&fpi[i], prog->Instructions, prog->NumInstructions); free(prog->Instructions); diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 0c43270d75..092ebb1140 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -946,7 +946,7 @@ static void position_invariant(struct gl_program *prog) #endif } - _mesa_memcpy(&vpi[i], prog->Instructions, prog->NumInstructions * sizeof(struct prog_instruction)); + _mesa_copy_instructions (&vpi[i], prog->Instructions, prog->NumInstructions); free(prog->Instructions); @@ -969,10 +969,11 @@ static void insert_wpos(struct r300_vertex_program *vp, vpi = _mesa_alloc_instructions (prog->NumInstructions + 2); _mesa_init_instructions (vpi, prog->NumInstructions + 2); /* all but END */ - _mesa_memcpy(vpi, prog->Instructions, (prog->NumInstructions - 1) * sizeof(struct prog_instruction)); + _mesa_copy_instructions (vpi, prog->Instructions, prog->NumInstructions - 1); /* END */ - _mesa_memcpy(&vpi[prog->NumInstructions + 1], &prog->Instructions[prog->NumInstructions - 1], - sizeof(struct prog_instruction)); + _mesa_copy_instructions (&vpi[prog->NumInstructions + 1], + &prog->Instructions[prog->NumInstructions - 1], + 1); vpi_insert = &vpi[prog->NumInstructions - 1]; vpi_insert[i].Opcode = OPCODE_MOV; -- cgit v1.2.3 From f04979ae481acc9fdc423da06514c4d557edd7cd Mon Sep 17 00:00:00 2001 From: Mathias Hopf Date: Fri, 16 Mar 2007 08:28:34 -0600 Subject: added null xmctx check to XMesaResizeBuffers(), bug 7205 --- src/mesa/drivers/x11/xm_api.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index cbbbd56efd..ba020fc3d6 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -2499,6 +2499,8 @@ XMesaResizeBuffers( XMesaBuffer b ) { GET_CURRENT_CONTEXT(ctx); XMesaContext xmctx = XMESA_CONTEXT(ctx); + if (!xmctx) + return; xmesa_check_and_update_buffer_size(xmctx, b); } -- cgit v1.2.3 From e5070bc3ca75dee31034cc543f3d2ee04e5dc032 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 16 Mar 2007 11:00:07 -0600 Subject: Assorted fixes for dealing with zero-size frame/renderbuffers. In xmesa_check_and_update_buffer_size() handle xmctx==NULL correctly: still call _mesa_resize_framebufer(). If we don't we can wind up in a situation where the framebuffer size is non-zero but an attached renderbuffer size is still initialized to zero. This inconsistancy can later cause problems. Check for zero-size renderbuffers in update_color_draw_buffers() and update_color_read_buffer(). See bug 7205. --- src/mesa/drivers/x11/xm_api.c | 14 ++++++++------ src/mesa/drivers/x11/xm_buffer.c | 39 +++++++++++++++++++++++---------------- 2 files changed, 31 insertions(+), 22 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index ba020fc3d6..b513dc8d40 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -1842,16 +1842,18 @@ XMesaDestroyBuffer(XMesaBuffer b) * 1. the first time a buffer is bound to a context. * 2. from glViewport to poll for window size changes * 3. from the XMesaResizeBuffers() API function. + * Note: it's possible (and legal) for xmctx to be NULL. That can happen + * when resizing a buffer when no rendering context is bound. */ void xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer) { GLuint width, height; - xmesa_get_window_size(xmctx->display, drawBuffer, &width, &height); + xmesa_get_window_size(drawBuffer->display, drawBuffer, &width, &height); if (drawBuffer->mesa_buffer.Width != width || drawBuffer->mesa_buffer.Height != height) { - _mesa_resize_framebuffer(&(xmctx->mesa), - &(drawBuffer->mesa_buffer), width, height); + GLcontext *ctx = xmctx ? &xmctx->mesa : NULL; + _mesa_resize_framebuffer(ctx, &(drawBuffer->mesa_buffer), width, height); } drawBuffer->mesa_buffer.Initialized = GL_TRUE; /* XXX TEMPORARY? */ } @@ -2175,7 +2177,7 @@ void XMesaSwapBuffers( XMesaBuffer b ) } #endif if (b->backxrb->ximage) { - /* Copy Ximage from host's memory to server's window */ + /* Copy Ximage (back buf) from client memory to server window */ #if defined(USE_XSHM) && !defined(XFree86Server) if (b->shm) { /*_glthread_LOCK_MUTEX(_xmesa_lock);*/ @@ -2197,8 +2199,8 @@ void XMesaSwapBuffers( XMesaBuffer b ) /*_glthread_UNLOCK_MUTEX(_xmesa_lock);*/ } } - else { - /* Copy pixmap to window on server */ + else if (b->backxrb->pixmap) { + /* Copy pixmap (back buf) to window (front buf) on server */ /*_glthread_LOCK_MUTEX(_xmesa_lock);*/ XMesaCopyArea( b->xm_visual->display, b->backxrb->pixmap, /* source drawable */ diff --git a/src/mesa/drivers/x11/xm_buffer.c b/src/mesa/drivers/x11/xm_buffer.c index 73c46b1fe6..c1fa23328f 100644 --- a/src/mesa/drivers/x11/xm_buffer.c +++ b/src/mesa/drivers/x11/xm_buffer.c @@ -168,9 +168,6 @@ alloc_back_shm_ximage(XMesaBuffer b, GLuint width, GLuint height) static void alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) { - if (width == 0 || height == 0) - return; - if (b->db_mode == BACK_XIMAGE) { /* Deallocate the old backxrb->ximage, if any */ if (b->backxrb->ximage) { @@ -186,6 +183,9 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) b->backxrb->ximage = NULL; } + if (width == 0 || height == 0) + return; + /* Allocate new back buffer */ #ifdef XFree86Server /* Allocate a regular XImage for the back buffer. */ @@ -218,20 +218,20 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) b->backxrb->pixmap = None; } else if (b->db_mode == BACK_PIXMAP) { - if (!width) - width = 1; - if (!height) - height = 1; - /* Free the old back pixmap */ if (b->backxrb->pixmap) { - XMesaFreePixmap(b->xm_visual->display, b->backxrb->pixmap); + XMesaFreePixmap(b->xm_visual->display, b->backxrb->pixmap); + b->backxrb->pixmap = 0; } - /* Allocate new back pixmap */ - b->backxrb->pixmap = XMesaCreatePixmap(b->xm_visual->display, - b->frontxrb->drawable, - width, height, - GET_VISUAL_DEPTH(b->xm_visual)); + + if (width > 0 && height > 0) { + /* Allocate new back pixmap */ + b->backxrb->pixmap = XMesaCreatePixmap(b->xm_visual->display, + b->frontxrb->drawable, + width, height, + GET_VISUAL_DEPTH(b->xm_visual)); + } + b->backxrb->ximage = NULL; } } @@ -250,6 +250,7 @@ xmesa_delete_renderbuffer(struct gl_renderbuffer *rb) /** * Reallocate renderbuffer storage for front color buffer. + * Called via gl_renderbuffer::AllocStorage() */ static GLboolean xmesa_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb, @@ -260,6 +261,7 @@ xmesa_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb, /* just clear these to be sure we don't accidentally use them */ xrb->origin1 = NULL; xrb->origin2 = NULL; + xrb->origin3 = NULL; xrb->origin4 = NULL; /* for the FLIP macro: */ @@ -275,6 +277,7 @@ xmesa_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb, /** * Reallocate renderbuffer storage for back color buffer. + * Called via gl_renderbuffer::AllocStorage() */ static GLboolean xmesa_alloc_back_storage(GLcontext *ctx, struct gl_renderbuffer *rb, @@ -309,8 +312,12 @@ xmesa_alloc_back_storage(GLcontext *ctx, struct gl_renderbuffer *rb, xrb->origin4 = (GLuint *) xrb->ximage->data + xrb->width4 * (height - 1); } else { - /* this assertion will fail if we happend to run out of memory */ - /*assert(xrb->pixmap);*/ + /* out of memory or buffer size is 0 x 0 */ + xrb->width1 = xrb->width2 = xrb->width3 = xrb->width4 = 0; + xrb->origin1 = NULL; + xrb->origin2 = NULL; + xrb->origin3 = NULL; + xrb->origin4 = NULL; } return GL_TRUE; -- cgit v1.2.3 From 6a9b0cd0b43ba01b24871ec1fa155e192ddeaa56 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 17 Mar 2007 16:46:24 +1100 Subject: r300: change vendor string to DRI R300 Project --- src/mesa/drivers/dri/r300/radeon_context.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c index 66d1b153b3..22f943ebf3 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.c +++ b/src/mesa/drivers/dri/r300/radeon_context.c @@ -70,7 +70,10 @@ static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name) switch (name) { case GL_VENDOR: - return (GLubyte *) "Tungsten Graphics, Inc."; + if (IS_R300_CLASS(radeon->radeonScreen)) + return (GLubyte *) "DRI R300 Project"; + else + return (GLubyte *) "Tungsten Graphics, Inc."; case GL_RENDERER: { -- cgit v1.2.3 From 84081774e62a8af18e6bf894ea69f63b97dcfe96 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Sun, 18 Mar 2007 18:26:16 +0800 Subject: i965: fix for FXT1 & S3TC texture format choose the right mesa texformat for FXT1 & S3TC --- src/mesa/drivers/dri/i965/brw_tex.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index c3ffa9e657..467aec64a0 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -148,18 +148,25 @@ brwChooseTextureFormat( GLcontext *ctx, GLint internalFormat, return &_mesa_texformat_ycbcr_rev; case GL_COMPRESSED_RGB_FXT1_3DFX: + return &_mesa_texformat_rgb_fxt1; case GL_COMPRESSED_RGBA_FXT1_3DFX: - return &_mesa_texformat_rgb_fxt1; + return &_mesa_texformat_rgba_fxt1; case GL_RGB_S3TC: case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + return &_mesa_texformat_rgb_dxt1; + + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return &_mesa_texformat_rgba_dxt1; + case GL_RGBA_S3TC: case GL_RGBA4_S3TC: case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return &_mesa_texformat_rgba_dxt3; + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; /* there is no rgba support? */ + return &_mesa_texformat_rgba_dxt5; case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT16: -- cgit v1.2.3 From bcf4f4de76197dbb88b884ce55b5e457ff2f79b0 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 19 Mar 2007 00:08:45 +1100 Subject: nouveau: avoid using uninitialised TexSrcUnit. --- src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 73c1f7c2a5..211483dc02 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -263,6 +263,23 @@ pass0_make_mask(GLuint mesa_mask) return mask; } +static GLboolean +pass0_opcode_is_tex(enum prog_opcode op) +{ + switch (op) { + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXD: + case OPCODE_TXL: + case OPCODE_TXP: + return GL_TRUE; + default: + break; + } + + return GL_FALSE; +} + static nvsTexTarget pass0_make_tex_target(GLuint mesa) { @@ -721,7 +738,11 @@ pass0_translate_arith(nouveauShader *nvs, struct gl_program *prog, (inst->SaturateMode != SATURATE_OFF), src[0], src[1], src[2]); nvsinst->tex_unit = inst->TexSrcUnit; - nvsinst->tex_target = pass0_make_tex_target(inst->TexSrcTarget); + if (pass0_opcode_is_tex(inst->Opcode)) + nvsinst->tex_target = + pass0_make_tex_target(inst->TexSrcTarget); + else + nvsinst->tex_target = NVS_TEX_TARGET_UNKNOWN; ret = GL_TRUE; } else -- cgit v1.2.3 From 106210652133e48ef557147f288ef28896517e0c Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Sun, 18 Mar 2007 16:44:20 +0000 Subject: r300: Corrected the string for polygon mode; it wasn't updated from unkXXX. --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index a106bbc6ab..0fb2e5a2e0 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -338,7 +338,7 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.unk4260.cmd[0] = cmdpacket0(0x4260, 3); ALLOC_STATE( shade, always, 5, "shade", 0 ); r300->hw.shade.cmd[0] = cmdpacket0(R300_RE_SHADE, 4); - ALLOC_STATE( polygon_mode, always, 4, "unk4288", 0 ); + ALLOC_STATE( polygon_mode, always, 4, "polygon_mode", 0 ); r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3); ALLOC_STATE( fogp, always, 3, "fogp", 0 ); r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2); -- cgit v1.2.3 From dd868b4100f748e5187c58138d1b55fe8e8eddef Mon Sep 17 00:00:00 2001 From: Oliver McFadden Date: Sun, 18 Mar 2007 16:57:45 +0000 Subject: r300: Removed the deprecated $XFree86$ CVS keywords. --- src/mesa/drivers/dri/r300/r300_maos.c | 1 - src/mesa/drivers/dri/r300/r300_maos.h | 1 - src/mesa/drivers/dri/r300/r300_tex.c | 1 - src/mesa/drivers/dri/r300/r300_tex.h | 1 - src/mesa/drivers/dri/r300/r300_texmem.c | 1 - src/mesa/drivers/dri/r300/r300_texstate.c | 1 - src/mesa/drivers/dri/r300/radeon_span.c | 1 - 7 files changed, 7 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_maos.c b/src/mesa/drivers/dri/r300/r300_maos.c index fcb87cbbb5..b0d96f7601 100644 --- a/src/mesa/drivers/dri/r300/r300_maos.c +++ b/src/mesa/drivers/dri/r300/r300_maos.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_maos_arrays.c,v 1.3 2003/02/23 23:59:01 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/r300_maos.h b/src/mesa/drivers/dri/r300/r300_maos.h index 679f1c2558..ab28317894 100644 --- a/src/mesa/drivers/dri/r300/r300_maos.h +++ b/src/mesa/drivers/dri/r300/r300_maos.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_maos.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 6348ba1982..eb72802f8b 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_tex.c,v 1.2 2002/11/05 17:46:08 tsi Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h index a18ff0e2ce..10aabc8b4b 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.h +++ b/src/mesa/drivers/dri/r300/r300_tex.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_tex.h,v 1.1 2002/10/30 12:51:53 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c index 96973c0098..f531b54d11 100644 --- a/src/mesa/drivers/dri/r300/r300_texmem.c +++ b/src/mesa/drivers/dri/r300/r300_texmem.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_texmem.c,v 1.5 2002/12/17 00:32:56 dawes Exp $ */ /************************************************************************** Copyright (C) Tungsten Graphics 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index cc356afb23..4bc0ea14f8 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_texstate.c,v 1.3 2003/02/15 22:18:47 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c index 1b74f6779b..cc779d684f 100644 --- a/src/mesa/drivers/dri/r300/radeon_span.c +++ b/src/mesa/drivers/dri/r300/radeon_span.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_span.c,v 1.6 2002/10/30 12:51:56 alanh Exp $ */ /************************************************************************** Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. -- cgit v1.2.3 From c41d6ab6f062ebce1076ca79f9ad0c7368a0e2d0 Mon Sep 17 00:00:00 2001 From: Miguel Marte Date: Sun, 18 Mar 2007 11:08:29 -0600 Subject: screen offset changes, bug 9965 --- src/mesa/drivers/dri/unichrome/via_context.c | 31 +++++++++++++++++----------- src/mesa/drivers/dri/unichrome/via_context.h | 5 ----- src/mesa/drivers/dri/unichrome/via_ioctl.c | 24 +++++++++------------ src/mesa/drivers/dri/unichrome/via_span.c | 4 ++-- src/mesa/drivers/dri/unichrome/via_state.c | 18 +++++----------- 5 files changed, 36 insertions(+), 46 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/unichrome/via_context.c b/src/mesa/drivers/dri/unichrome/via_context.c index 89533421b4..4d25d328e3 100644 --- a/src/mesa/drivers/dri/unichrome/via_context.c +++ b/src/mesa/drivers/dri/unichrome/via_context.c @@ -768,9 +768,7 @@ void viaXMesaWindowMoved(struct via_context *vmesa) drawable); } - draw_buffer->drawXoff = (GLuint)(((drawable->x * bytePerPixel) & 0x1f) / - bytePerPixel); - draw_buffer->drawX = drawable->x - draw_buffer->drawXoff; + draw_buffer->drawX = drawable->x; draw_buffer->drawY = drawable->y; draw_buffer->drawW = drawable->w; draw_buffer->drawH = drawable->h; @@ -782,9 +780,7 @@ void viaXMesaWindowMoved(struct via_context *vmesa) readable); } - read_buffer->drawXoff = (GLuint)(((readable->x * bytePerPixel) & 0x1f) / - bytePerPixel); - read_buffer->drawX = readable->x - read_buffer->drawXoff; + read_buffer->drawX = readable->x; read_buffer->drawY = readable->y; read_buffer->drawW = readable->w; read_buffer->drawH = readable->h; @@ -795,13 +791,24 @@ void viaXMesaWindowMoved(struct via_context *vmesa) draw_buffer->drawX * bytePerPixel); vmesa->front.origMap = (vmesa->front.map + - draw_buffer->drawY * vmesa->front.pitch + - draw_buffer->drawX * bytePerPixel); + draw_buffer->drawY * vmesa->front.pitch + + draw_buffer->drawX * bytePerPixel); + + vmesa->back.orig = (vmesa->back.offset + + draw_buffer->drawY * vmesa->back.pitch + + draw_buffer->drawX * bytePerPixel); - vmesa->back.orig = vmesa->back.offset; - vmesa->depth.orig = vmesa->depth.offset; - vmesa->back.origMap = vmesa->back.map; - vmesa->depth.origMap = vmesa->depth.map; + vmesa->back.origMap = (vmesa->back.map + + draw_buffer->drawY * vmesa->back.pitch + + draw_buffer->drawX * bytePerPixel); + + vmesa->depth.orig = (vmesa->depth.offset + + draw_buffer->drawY * vmesa->depth.pitch + + draw_buffer->drawX * bytePerPixel); + + vmesa->depth.origMap = (vmesa->depth.map + + draw_buffer->drawY * vmesa->depth.pitch + + draw_buffer->drawX * bytePerPixel); viaCalcViewport(vmesa->glCtx); } diff --git a/src/mesa/drivers/dri/unichrome/via_context.h b/src/mesa/drivers/dri/unichrome/via_context.h index 77161a8d5d..fecd2782fb 100644 --- a/src/mesa/drivers/dri/unichrome/via_context.h +++ b/src/mesa/drivers/dri/unichrome/via_context.h @@ -104,11 +104,6 @@ struct via_renderbuffer { int drawW; int drawH; - int drawXoff; /* drawX is 32byte aligned - this is - * the delta to the real origin, in - * pixel units. - */ - __DRIdrawablePrivate *dPriv; }; diff --git a/src/mesa/drivers/dri/unichrome/via_ioctl.c b/src/mesa/drivers/dri/unichrome/via_ioctl.c index 5d102de93e..4a733fb00c 100644 --- a/src/mesa/drivers/dri/unichrome/via_ioctl.c +++ b/src/mesa/drivers/dri/unichrome/via_ioctl.c @@ -187,7 +187,7 @@ static void viaFillBuffer(struct via_context *vmesa, int w = pbox[i].x2 - pbox[i].x1; int h = pbox[i].y2 - pbox[i].y1; - int offset = (buffer->orig + + int offset = (buffer->offset + y * buffer->pitch + x * bytePerPixel); @@ -276,7 +276,7 @@ static void viaClear(GLcontext *ctx, GLbitfield mask) /* flip top to bottom */ cy = dPriv->h - cy - ch; - cx += vrb->drawX + vrb->drawXoff; + cx += vrb->drawX; cy += vrb->drawY; if (!all) { @@ -359,8 +359,8 @@ static void viaDoSwapBuffers(struct via_context *vmesa, GLint w = b->x2 - b->x1; GLint h = b->y2 - b->y1; - GLuint src = back->orig + y * back->pitch + x * bytePerPixel; - GLuint dest = front->orig + y * front->pitch + x * bytePerPixel; + GLuint src = back->offset + y * back->pitch + x * bytePerPixel; + GLuint dest = front->offset + y * front->pitch + x * bytePerPixel; viaBlit(vmesa, bytePerPixel << 3, @@ -747,7 +747,7 @@ static void via_emit_cliprect(struct via_context *vmesa, : HC_HDBFM_RGB565); GLuint pitch = buffer->pitch; - GLuint offset = buffer->orig; + GLuint offset = buffer->offset; if (0) fprintf(stderr, "emit cliprect for box %d,%d %d,%d\n", @@ -768,7 +768,7 @@ static void via_emit_cliprect(struct via_context *vmesa, vb[4] = (HC_SubA_HDBBasL << 24) | (offset & 0xFFFFFF); vb[5] = (HC_SubA_HDBBasH << 24) | ((offset & 0xFF000000) >> 24); - vb[6] = (HC_SubA_HSPXYOS << 24) | ((31 - buffer->drawXoff) << HC_HSPXOS_SHIFT); + vb[6] = (HC_SubA_HSPXYOS << 24); vb[7] = (HC_SubA_HDBFM << 24) | HC_HDBLoc_Local | format | pitch; } @@ -887,22 +887,18 @@ void viaFlushDmaLocked(struct via_context *vmesa, GLuint flags) struct via_renderbuffer *const vrb = (struct via_renderbuffer *) dPriv->driverPrivate; - for (i = 0; i < vmesa->numClipRects; i++) { drm_clip_rect_t b; - b.x1 = pbox[i].x1 - (vrb->drawX + vrb->drawXoff); - b.x2 = pbox[i].x2 - (vrb->drawX + vrb->drawXoff); - b.y1 = pbox[i].y1 - vrb->drawY; - b.y2 = pbox[i].y2 - vrb->drawY; + b.x1 = pbox[i].x1; + b.x2 = pbox[i].x2; + b.y1 = pbox[i].y1; + b.y2 = pbox[i].y2; if (vmesa->scissor && !intersect_rect(&b, &b, &vmesa->scissorRect)) continue; - b.x1 += vrb->drawXoff; - b.x2 += vrb->drawXoff; - via_emit_cliprect(vmesa, &b); if (fire_buffer(vmesa) != 0) { diff --git a/src/mesa/drivers/dri/unichrome/via_span.c b/src/mesa/drivers/dri/unichrome/via_span.c index f1ed98036b..3a16dadd23 100644 --- a/src/mesa/drivers/dri/unichrome/via_span.c +++ b/src/mesa/drivers/dri/unichrome/via_span.c @@ -46,7 +46,7 @@ GLuint pitch = vrb->pitch; \ GLuint height = dPriv->h; \ GLint p = 0; \ - char *buf = (char *)(vrb->origMap + vrb->drawXoff * vrb->bpp); \ + char *buf = (char *)(vrb->origMap); \ (void) p; /* ================================================================ @@ -82,7 +82,7 @@ __DRIdrawablePrivate *dPriv = vrb->dPriv; \ GLuint depth_pitch = vrb->pitch; \ GLuint height = dPriv->h; \ - char *buf = (char *)(vrb->map + (vrb->drawXoff * vrb->bpp/8)) + char *buf = (char *)(vrb->map) #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS diff --git a/src/mesa/drivers/dri/unichrome/via_state.c b/src/mesa/drivers/dri/unichrome/via_state.c index 30b9dc289a..bccdbe9326 100644 --- a/src/mesa/drivers/dri/unichrome/via_state.c +++ b/src/mesa/drivers/dri/unichrome/via_state.c @@ -500,10 +500,8 @@ void viaEmitState(struct via_context *vmesa) OUT_RING( HC_HEADER2 ); OUT_RING( (HC_ParaType_NotTex << 16) ); - OUT_RING( (HC_SubA_HSPXYOS << 24) | - (((32- vrb->drawXoff) & 0x1f) << HC_HSPXOS_SHIFT)); - OUT_RING( (HC_SubA_HSPXYOS << 24) | - (((32 - vrb->drawXoff) & 0x1f) << HC_HSPXOS_SHIFT)); + OUT_RING( (HC_SubA_HSPXYOS << 24) ); + OUT_RING( (HC_SubA_HSPXYOS << 24) ); ADVANCE_RING(); } @@ -712,12 +710,8 @@ static void viaColorMask(GLcontext *ctx, } -/* ============================================================= - */ - -/* Using drawXoff like this is incorrect outside of locked regions. - * This hardware just isn't capable of private back buffers without +/* This hardware just isn't capable of private back buffers without * glitches and/or a hefty locking scheme. */ void viaCalcViewport(GLcontext *ctx) @@ -729,12 +723,10 @@ void viaCalcViewport(GLcontext *ctx) const GLfloat *v = ctx->Viewport._WindowMap.m; GLfloat *m = vmesa->ViewportMatrix.m; - /* See also via_translate_vertex. - */ m[MAT_SX] = v[MAT_SX]; - m[MAT_TX] = v[MAT_TX] + SUBPIXEL_X + vrb->drawXoff; + m[MAT_TX] = v[MAT_TX] + vrb->drawX + SUBPIXEL_X; m[MAT_SY] = - v[MAT_SY]; - m[MAT_TY] = - v[MAT_TY] + dPriv->h + SUBPIXEL_Y; + m[MAT_TY] = - v[MAT_TY] + vrb->drawY + SUBPIXEL_Y + vrb->drawH; m[MAT_SZ] = v[MAT_SZ] * (1.0 / vmesa->depth_max); m[MAT_TZ] = v[MAT_TZ] * (1.0 / vmesa->depth_max); } -- cgit v1.2.3 From 07265280a962253913e974829e6729c64ca18bc2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 18 Mar 2007 20:12:38 +0000 Subject: fix typo in subrect_disable packet --- src/mesa/drivers/dri/i915tex/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i915tex/i915_reg.h b/src/mesa/drivers/dri/i915tex/i915_reg.h index 04b199905c..34c6821405 100644 --- a/src/mesa/drivers/dri/i915tex/i915_reg.h +++ b/src/mesa/drivers/dri/i915tex/i915_reg.h @@ -138,7 +138,7 @@ /* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ -#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<19) | 0x2) +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2) /* p161 */ #define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) -- cgit v1.2.3 From c9e39aeaef0135d6d1aa92a3dc5b3c91fa17904d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 18 Mar 2007 20:12:53 +0000 Subject: fix typo in subrect_disable packet --- src/mesa/drivers/dri/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h index 3ba792a234..694cd4c8c3 100644 --- a/src/mesa/drivers/dri/i915/i915_reg.h +++ b/src/mesa/drivers/dri/i915/i915_reg.h @@ -138,7 +138,7 @@ /* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ -#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<19) | 0x2) +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2) /* p161 */ #define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) -- cgit v1.2.3 From 77544d7b7d7c6fd03c0df81dca07f1bb3a67c119 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 18 Mar 2007 20:13:06 +0000 Subject: fix off-by-one in load_state_immediate --- src/mesa/drivers/dri/i915/i915_state.c | 2 +- src/mesa/drivers/dri/i915/i915_vtbl.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index fd11e10652..5e00e6597b 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -813,7 +813,7 @@ static void i915_init_packets( i915ContextPtr i915 ) I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | - (4)); + (3)); i915->state.Ctx[I915_CTXREG_LIS2] = 0; i915->state.Ctx[I915_CTXREG_LIS4] = 0; i915->state.Ctx[I915_CTXREG_LIS5] = 0; diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 2936a0fb72..b533e153be 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -180,7 +180,7 @@ static void i915_emit_invarient_state( intelContextPtr intel ) */ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | - (1)); + (0)); OUT_BATCH(0); /* XXX: Use this */ -- cgit v1.2.3 From 9b42100c04f14b4f2c1e5fe9748bb0519ed6c516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 19 Mar 2007 17:23:44 +0100 Subject: i915tex: Fix triple buffering after recent Mesa core changes. Remove superfluous _mesa_resize_framebuffer call which is now harmful because it causes the third renderbuffer to have width/height 0, so Mesa refuses to render to it. In the long term, it would be nice to remove the hack in intel_alloc_window_storage in favour of a proper Mesa interface for flipping between more than two colour buffers. --- src/mesa/drivers/dri/i915tex/intel_buffers.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i915tex/intel_buffers.c b/src/mesa/drivers/dri/i915tex/intel_buffers.c index 62ff54b007..c0b4f438be 100644 --- a/src/mesa/drivers/dri/i915tex/intel_buffers.c +++ b/src/mesa/drivers/dri/i915tex/intel_buffers.c @@ -349,6 +349,28 @@ intelWindowMoved(struct intel_context *intel) /* Update Mesa's notion of window size */ driUpdateFramebufferSize(ctx, dPriv); + + /* Update size of third renderbuffer */ + if (intel_fb->pf_num_pages == 3) { + struct gl_renderbuffer *rb = &intel_fb->color_rb[(intel_fb->pf_current_page + + 2) % 3]->Base; + + /* only resize if size is changing */ + if (rb->Width != intel_fb->Base.Width || + rb->Height != intel_fb->Base.Height) { + /* could just as well pass rb->_ActualFormat here */ + if (rb->AllocStorage(ctx, rb, rb->InternalFormat, + intel_fb->Base.Width, intel_fb->Base.Height)) { + ASSERT(rb->Width == intel_fb->Base.Width); + ASSERT(rb->Height == intel_fb->Base.Height); + } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "Resizing framebuffer"); + /* no return */ + } + } + } + intel_fb->Base.Initialized = GL_TRUE; /* XXX remove someday */ /* Update hardware scissor */ -- cgit v1.2.3 From 07db8c9115c0b07d79be778976e25f8eb18d42a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 19 Mar 2007 18:34:27 +0100 Subject: i915tex: The intended triple buffering fix. Making modifications while the editor spawned by git-commit was suspended didn't have the intended effect. --- src/mesa/drivers/dri/i915tex/intel_buffers.c | 22 ---------------------- src/mesa/drivers/dri/i915tex/intel_context.c | 6 +----- 2 files changed, 1 insertion(+), 27 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i915tex/intel_buffers.c b/src/mesa/drivers/dri/i915tex/intel_buffers.c index c0b4f438be..62ff54b007 100644 --- a/src/mesa/drivers/dri/i915tex/intel_buffers.c +++ b/src/mesa/drivers/dri/i915tex/intel_buffers.c @@ -349,28 +349,6 @@ intelWindowMoved(struct intel_context *intel) /* Update Mesa's notion of window size */ driUpdateFramebufferSize(ctx, dPriv); - - /* Update size of third renderbuffer */ - if (intel_fb->pf_num_pages == 3) { - struct gl_renderbuffer *rb = &intel_fb->color_rb[(intel_fb->pf_current_page - + 2) % 3]->Base; - - /* only resize if size is changing */ - if (rb->Width != intel_fb->Base.Width || - rb->Height != intel_fb->Base.Height) { - /* could just as well pass rb->_ActualFormat here */ - if (rb->AllocStorage(ctx, rb, rb->InternalFormat, - intel_fb->Base.Width, intel_fb->Base.Height)) { - ASSERT(rb->Width == intel_fb->Base.Width); - ASSERT(rb->Height == intel_fb->Base.Height); - } - else { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "Resizing framebuffer"); - /* no return */ - } - } - } - intel_fb->Base.Initialized = GL_TRUE; /* XXX remove someday */ /* Update hardware scissor */ diff --git a/src/mesa/drivers/dri/i915tex/intel_context.c b/src/mesa/drivers/dri/i915tex/intel_context.c index 5c2cdf0c7d..acda7b1c16 100644 --- a/src/mesa/drivers/dri/i915tex/intel_context.c +++ b/src/mesa/drivers/dri/i915tex/intel_context.c @@ -581,11 +581,7 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, } /* set GLframebuffer size to match window, if needed */ - if (intel_fb->Base.Width != driDrawPriv->w) { - _mesa_resize_framebuffer(&intel->ctx, &intel_fb->Base, - driDrawPriv->w, driDrawPriv->h); - } - if (readFb->Width != driReadPriv->w) { + if (driReadPriv != driDrawPriv && readFb->Width != driReadPriv->w) { _mesa_resize_framebuffer(&intel->ctx, readFb, driReadPriv->w, driReadPriv->h); } -- cgit v1.2.3 From 7b430acd71f04dce3e21bdcfe70115a23d751f30 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sun, 18 Mar 2007 02:15:56 +0100 Subject: r300: Fix fragment program instruction pairing and register allocation There were a number of bugs related to the pairing of vector and scalar operations where swizzles ended up using the wrong source register, or an instruction was moved forward and ended up overwriting an aliased register. The new algorithm for register allocation is quite conservative and may run out of registers before necessary. On the plus side, It Just Works. Pairing is done whenever possible, and in more cases than before, so in practice this change should be a net win. --- src/mesa/drivers/dri/r300/r300_context.h | 94 +++- src/mesa/drivers/dri/r300/r300_fragprog.c | 774 ++++++++++++++++++++---------- src/mesa/drivers/dri/r300/r300_reg.h | 4 +- 3 files changed, 582 insertions(+), 290 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index bd9ed6f170..bc43953ff3 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -647,38 +647,84 @@ struct r300_vertex_program_cont { #define PFS_NUM_TEMP_REGS 32 #define PFS_NUM_CONST_REGS 16 -/* Tracking data for Mesa registers */ +/* Mapping Mesa registers to R300 temporaries */ struct reg_acc { int reg; /* Assigned hw temp */ unsigned int refcount; /* Number of uses by mesa program */ }; +/** + * Describe the current lifetime information for an R300 temporary + */ +struct reg_lifetime { + /* Index of the first slot where this register is free in the sense + that it can be used as a new destination register. + This is -1 if the register has been assigned to a Mesa register + and the last access to the register has not yet been emitted */ + int free; + + /* Index of the first slot where this register is currently reserved. + This is used to stop e.g. a scalar operation from being moved + before the allocation time of a register that was first allocated + for a vector operation. */ + int reserved; + + /* Index of the first slot in which the register can be used as a + source without losing the value that is written by the last + emitted instruction that writes to the register */ + int vector_valid; + int scalar_valid; +}; + + +/** + * Store usage information about an ALU instruction slot during the + * compilation of a fragment program. + */ +#define SLOT_SRC_VECTOR (1<<0) +#define SLOT_SRC_SCALAR (1<<3) +#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) +#define SLOT_OP_VECTOR (1<<16) +#define SLOT_OP_SCALAR (1<<17) +#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) + +struct r300_pfs_compile_slot { + /* Bitmask indicating which parts of the slot are used, using SLOT_ constants + defined above */ + unsigned int used; + + /* Selected sources */ + int vsrc[3]; + int ssrc[3]; +}; + +/** + * Store information during compilation of fragment programs. + */ struct r300_pfs_compile_state { - int v_pos, s_pos; /* highest ALU slots used */ - - /* Track some information gathered during opcode - * construction. - * - * NOTE: Data is only set by the code, and isn't used yet. - */ - struct { - int vsrc[3]; - int ssrc[3]; - int umask; - } slot[PFS_MAX_ALU_INST]; - - /* Used to map Mesa's inputs/temps onto hardware temps */ - int temp_in_use; - struct reg_acc temps[PFS_NUM_TEMP_REGS]; - struct reg_acc inputs[32]; /* don't actually need 32... */ - - /* Track usage of hardware temps, for register allocation, - * indirection detection, etc. */ - int hwreg_in_use; - GLuint used_in_node; - GLuint dest_in_node; + int nrslots; /* number of ALU slots used so far */ + + /* Track which (parts of) slots are already filled with instructions */ + struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST]; + + /* Track the validity of R300 temporaries */ + struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; + + /* Used to map Mesa's inputs/temps onto hardware temps */ + int temp_in_use; + struct reg_acc temps[PFS_NUM_TEMP_REGS]; + struct reg_acc inputs[32]; /* don't actually need 32... */ + + /* Track usage of hardware temps, for register allocation, + * indirection detection, etc. */ + GLuint used_in_node; + GLuint dest_in_node; }; +/** + * Store everything about a fragment program that is needed + * to render with that program. + */ struct r300_fragment_program { struct gl_fragment_program mesa_program; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 251fd26082..b2c89ccb36 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -94,8 +94,9 @@ #define REG_NEGV_SHIFT 18 #define REG_NEGS_SHIFT 19 #define REG_ABS_SHIFT 20 -#define REG_NO_USE_SHIFT 21 -#define REG_VALID_SHIFT 22 +#define REG_NO_USE_SHIFT 21 // Hack for refcounting +#define REG_VALID_SHIFT 22 // Does the register contain a defined value? +#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)? #define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT) #define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT) @@ -106,12 +107,14 @@ #define REG_ABS_MASK (0x01 << REG_ABS_SHIFT) #define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT) #define REG_VALID_MASK (0x01 << REG_VALID_SHIFT) +#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT) -#define REG(type, index, vswz, sswz, nouse, valid) \ +#define REG(type, index, vswz, sswz, nouse, valid, builtin) \ (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \ ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \ ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \ ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \ ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \ ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) #define REG_GET_TYPE(reg) \ @@ -126,6 +129,8 @@ ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT) #define REG_GET_VALID(reg) \ ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT) +#define REG_GET_BUILTIN(reg) \ + ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT) #define REG_SET_TYPE(reg, type) \ reg = ((reg & ~REG_TYPE_MASK) | \ ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK)) @@ -144,6 +149,9 @@ #define REG_SET_VALID(reg, valid) \ reg = ((reg & ~REG_VALID_MASK) | \ ((valid << REG_VALID_SHIFT) & REG_VALID_MASK)) +#define REG_SET_BUILTIN(reg, builtin) \ + reg = ((reg & ~REG_BUILTIN_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK)) #define REG_ABS(reg) \ reg = (reg | REG_ABS_MASK) #define REG_NEGV(reg) \ @@ -184,9 +192,6 @@ static const struct { * * REG_VSWZ/REG_SSWZ is an index into this table */ -#define SLOT_VECTOR (1<<0) -#define SLOT_SCALAR (1<<3) -#define SLOT_BOTH (SLOT_VECTOR | SLOT_SCALAR) /* mapping from SWIZZLE_* to r300 native values for scalar insns */ #define SWIZZLE_HALF 6 @@ -202,14 +207,14 @@ static const struct r300_pfs_swizzle { GLuint flags; } v_swiz[] = { /* native swizzles */ - { MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_VECTOR }, - { MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_VECTOR }, - { MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_VECTOR }, - { MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_VECTOR }, - { MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SCALAR }, - { MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_VECTOR }, - { MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_VECTOR }, - { MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_BOTH }, + { MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR }, + { MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH }, { MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, { MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, { MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0}, @@ -241,10 +246,10 @@ static const struct { int stride; /* difference between SRC0/1/2 */ GLuint flags; } s_swiz[] = { - { R300_FPI2_ARGA_SRC0C_X, 3, SLOT_VECTOR }, - { R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_VECTOR }, - { R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_VECTOR }, - { R300_FPI2_ARGA_SRC0A , 1, SLOT_SCALAR }, + { R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR }, + { R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR }, + { R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR }, + { R300_FPI2_ARGA_SRC0A , 1, SLOT_SRC_SCALAR }, { R300_FPI2_ARGA_ZERO , 0, 0 }, { R300_FPI2_ARGA_ONE , 0, 0 }, { R300_FPI2_ARGA_HALF , 0, 0 } @@ -256,6 +261,7 @@ static const GLuint undef = REG(REG_TYPE_TEMP, SWIZZLE_XYZ, SWIZZLE_W, GL_FALSE, + GL_FALSE, GL_FALSE); /* constant one source */ @@ -264,6 +270,7 @@ static const GLuint pfs_one = REG(REG_TYPE_CONST, SWIZZLE_111, SWIZZLE_ONE, GL_FALSE, + GL_TRUE, GL_TRUE); /* constant half source */ @@ -272,6 +279,7 @@ static const GLuint pfs_half = REG(REG_TYPE_CONST, SWIZZLE_HHH, SWIZZLE_HALF, GL_FALSE, + GL_TRUE, GL_TRUE); /* constant zero source */ @@ -280,6 +288,7 @@ static const GLuint pfs_zero = REG(REG_TYPE_CONST, SWIZZLE_000, SWIZZLE_ZERO, GL_FALSE, + GL_TRUE, GL_TRUE); /* @@ -291,47 +300,105 @@ static void emit_arith(struct r300_fragment_program *rp, int op, GLuint src0, GLuint src1, GLuint src2, int flags); -/* - * Helper functions prototypes +/** + * Get an R300 temporary that can be written to in the given slot. */ -static int get_hw_temp(struct r300_fragment_program *rp) +static int get_hw_temp(struct r300_fragment_program *rp, int slot) { COMPILE_STATE; - int r = ffs(~cs->hwreg_in_use); - if (!r) { + int r; + + for(r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) { ERROR("Out of hardware temps\n"); return 0; } - - cs->hwreg_in_use |= (1 << --r); + + // Reserved is used to avoid the following scenario: + // R300 temporary X is first assigned to Mesa temporary Y during vector ops + // R300 temporary X is then assigned to Mesa temporary Z for further vector ops + // Then scalar ops on Mesa temporary Z are emitted and move back in time + // to overwrite the value of temporary Y. + // End scenario. + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = 0; + cs->hwtemps[r].scalar_valid = 0; + if (r > rp->max_temp_idx) rp->max_temp_idx = r; - + return r; } +/** + * Get an R300 temporary that will act as a TEX destination register. + */ static int get_hw_temp_tex(struct r300_fragment_program *rp) { COMPILE_STATE; int r; - r = ffs(~(cs->hwreg_in_use | cs->used_in_node)); - if (!r) - return get_hw_temp(rp); /* Will cause an indirection */ + for(r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->used_in_node & (1 << r)) + continue; + + // Note: Be very careful here + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) + return get_hw_temp(rp, 0); /* Will cause an indirection */ - cs->hwreg_in_use |= (1 << --r); + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = cs->nrslots; + cs->hwtemps[r].scalar_valid = cs->nrslots; + if (r > rp->max_temp_idx) rp->max_temp_idx = r; return r; } +/** + * Mark the given hardware register as free. + */ static void free_hw_temp(struct r300_fragment_program *rp, int idx) { COMPILE_STATE; - cs->hwreg_in_use &= ~(1<hwtemps[idx].free = cs->nrslots+1; } + +/** + * Create a new Mesa temporary register. + */ static GLuint get_temp_reg(struct r300_fragment_program *rp) { COMPILE_STATE; @@ -354,6 +421,10 @@ static GLuint get_temp_reg(struct r300_fragment_program *rp) return r; } +/** + * Create a new Mesa temporary register that will act as the destination + * register for a texture read. + */ static GLuint get_temp_reg_tex(struct r300_fragment_program *rp) { COMPILE_STATE; @@ -376,6 +447,9 @@ static GLuint get_temp_reg_tex(struct r300_fragment_program *rp) return r; } +/** + * Free a Mesa temporary and the associated R300 temporary. + */ static void free_temp(struct r300_fragment_program *rp, GLuint r) { COMPILE_STATE; @@ -762,10 +836,10 @@ static int t_hw_src(struct r300_fragment_program *rp, switch(REG_GET_TYPE(src)) { case REG_TYPE_TEMP: /* NOTE: if reg==-1 here, a source is being read that - * hasn't been written to. Undefined results + * hasn't been written to. Undefined results. */ if (cs->temps[index].reg == -1) - cs->temps[index].reg = get_hw_temp(rp); + cs->temps[index].reg = get_hw_temp(rp, cs->nrslots); idx = cs->temps[index].reg; @@ -795,7 +869,8 @@ static int t_hw_src(struct r300_fragment_program *rp, static int t_hw_dst(struct r300_fragment_program *rp, GLuint dest, - GLboolean tex) + GLboolean tex, + int slot) { COMPILE_STATE; int idx; @@ -806,7 +881,7 @@ static int t_hw_dst(struct r300_fragment_program *rp, case REG_TYPE_TEMP: if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { if (!tex) { - cs->temps[index].reg = get_hw_temp(rp); + cs->temps[index].reg = get_hw_temp(rp, slot); } else { cs->temps[index].reg = get_hw_temp_tex(rp); } @@ -839,26 +914,20 @@ static int t_hw_dst(struct r300_fragment_program *rp, return idx; } -static void emit_nop(struct r300_fragment_program *rp, - GLuint mask, - GLboolean sync) +static void emit_nop(struct r300_fragment_program *rp) { COMPILE_STATE; - if (sync) - cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); - - if (mask & WRITEMASK_XYZ) { - rp->alu.inst[cs->v_pos].inst0 = NOP_INST0; - rp->alu.inst[cs->v_pos].inst1 = NOP_INST1; - cs->v_pos++; - } - - if (mask & WRITEMASK_W) { - rp->alu.inst[cs->s_pos].inst2 = NOP_INST2; - rp->alu.inst[cs->s_pos].inst3 = NOP_INST3; - cs->s_pos++; + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return; } + + rp->alu.inst[cs->nrslots].inst0 = NOP_INST0; + rp->alu.inst[cs->nrslots].inst1 = NOP_INST1; + rp->alu.inst[cs->nrslots].inst2 = NOP_INST2; + rp->alu.inst[cs->nrslots].inst3 = NOP_INST3; + cs->nrslots++; } static void emit_tex(struct r300_fragment_program *rp, @@ -882,7 +951,7 @@ static void emit_tex(struct r300_fragment_program *rp, rdest = dest; dest = get_temp_reg_tex(rp); } - hwdest = t_hw_dst(rp, dest, GL_TRUE); + hwdest = t_hw_dst(rp, dest, GL_TRUE, rp->node[rp->cur_node].alu_offset); /* Use a temp that hasn't been used in this node, rather * than causing an indirection @@ -904,15 +973,11 @@ static void emit_tex(struct r300_fragment_program *rp, (din & (1<v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); - if (rp->node[rp->cur_node].alu_offset == cs->v_pos) { - /* No alu instructions in the node? Emit a NOP. */ - emit_nop(rp, WRITEMASK_XYZW, GL_TRUE); - cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); - } + if (rp->node[rp->cur_node].alu_offset == cs->nrslots) + emit_nop(rp); rp->node[rp->cur_node].alu_end = - cs->v_pos - rp->node[rp->cur_node].alu_offset - 1; + cs->nrslots - rp->node[rp->cur_node].alu_offset - 1; assert(rp->node[rp->cur_node].alu_end >= 0); if (++rp->cur_node >= PFS_MAX_TEX_INDIRECT) { @@ -922,7 +987,7 @@ static void emit_tex(struct r300_fragment_program *rp, /* Start new node */ rp->node[rp->cur_node].tex_offset = rp->tex.length; - rp->node[rp->cur_node].alu_offset = cs->v_pos; + rp->node[rp->cur_node].alu_offset = cs->nrslots; rp->node[rp->cur_node].tex_end = -1; rp->node[rp->cur_node].alu_end = -1; rp->node[rp->cur_node].flags = 0; @@ -954,84 +1019,243 @@ static void emit_tex(struct r300_fragment_program *rp, } } -/* Add sources to FPI1/FPI3 lists. If source is already on list, - * reuse the index instead of wasting a source. + +/** + * Returns the first slot where we could possibly allow writing to dest, + * according to register allocation. */ -static int add_src(struct r300_fragment_program *rp, - int reg, - int pos, - int srcmask) +static int get_earliest_allowed_write( + struct r300_fragment_program* rp, + GLuint dest) { COMPILE_STATE; - int csm, i; - - /* Look for matches */ - for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) { - /* If sources have been allocated in this position(s)... */ - if ((cs->slot[pos].umask & csm) == csm) { - /* ... and the register number(s) match, re-use the - source */ - if (srcmask == SLOT_VECTOR && - cs->slot[pos].vsrc[i] == reg) - return i; - if (srcmask == SLOT_SCALAR && - cs->slot[pos].ssrc[i] == reg) - return i; - if (srcmask == SLOT_BOTH && - cs->slot[pos].vsrc[i] == reg && - cs->slot[pos].ssrc[i] == reg) - return i; - } - } + int idx; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); - /* Look for free spaces */ - for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) { - /* If the position(s) haven't been allocated */ - if ((cs->slot[pos].umask & csm) == 0) { - cs->slot[pos].umask |= csm; - - if (srcmask & SLOT_VECTOR) - cs->slot[pos].vsrc[i] = reg; - if (srcmask & SLOT_SCALAR) - cs->slot[pos].ssrc[i] = reg; - return i; - } + switch(REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[index].reg == -1) + return 0; + + idx = cs->temps[index].reg; + break; + case REG_TYPE_OUTPUT: + return 0; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; } - //ERROR("Failed to allocate sources in FPI1/FPI3!\n"); - return 0; + return cs->hwtemps[idx].reserved; } -/* Determine whether or not to position opcode in the same ALU slot for both - * vector and scalar portions of an instruction. + +/** + * Allocates a slot for an ALU instruction that can consist of + * a vertex part or a scalar part or both. + * + * Sources from src (src[0] to src[argc-1]) are added to the slot in the + * appropriate position (vector and/or scalar), and their positions are + * recorded in the srcpos array. + * + * This function emits instruction code for the source fetch and the + * argument selection. It does not emit instruction code for the + * opcode or the destination selection. * - * It's not necessary to force the first case, but it makes disassembled - * shaders easier to read. + * @return the index of the slot */ -static GLboolean force_same_slot(int vop, - int sop, - GLboolean emit_vop, - GLboolean emit_sop, - int argc, - GLuint *src) +static int find_and_prepare_slot(struct r300_fragment_program* rp, + GLboolean emit_vop, + GLboolean emit_sop, + int argc, + GLuint* src, + GLuint dest) { - int i; - - if (emit_vop && emit_sop) - return GL_TRUE; + COMPILE_STATE; + int hwsrc[3]; + int srcpos[3]; + unsigned int used; + int tempused; + int tempvsrc[3]; + int tempssrc[3]; + int pos; + int regnr; + int i,j; + + // Determine instruction slots, whether sources are required on + // vector or scalar side, and the smallest slot number where + // all source registers are available + used = 0; + if (emit_vop) + used |= SLOT_OP_VECTOR; + if (emit_sop) + used |= SLOT_OP_SCALAR; + + pos = get_earliest_allowed_write(rp, dest); + + if (rp->node[rp->cur_node].alu_offset > pos) + pos = rp->node[rp->cur_node].alu_offset; + for(i = 0; i < argc; ++i) { + if (!REG_GET_BUILTIN(src[i])) { + if (emit_vop) + used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i; + if (emit_sop) + used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; + } + + hwsrc[i] = t_hw_src(rp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ + regnr = hwsrc[i] & 31; + + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_valid > pos) + pos = cs->hwtemps[regnr].vector_valid; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_valid > pos) + pos = cs->hwtemps[regnr].scalar_valid; + } + } + } + + // Find a slot that fits + for(; ; ++pos) { + if (cs->slot[pos].used & used & SLOT_OP_BOTH) + continue; + + if (pos >= cs->nrslots) { + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return -1; + } - if (emit_vop && vop == R300_FPI0_OUTC_REPL_ALPHA) - return GL_TRUE; + rp->alu.inst[pos].inst0 = NOP_INST0; + rp->alu.inst[pos].inst2 = NOP_INST2; + cs->nrslots++; + } + + // Note: When we need both parts (vector and scalar) of a source, + // we always try to put them into the same position. This makes the + // code easier to read, and it is optimal (i.e. one doesn't gain + // anything by splitting the parts). + // It also avoids headaches with swizzles that access both parts (i.e WXY) + tempused = cs->slot[pos].used; + for(i = 0; i < 3; ++i) { + tempvsrc[i] = cs->slot[pos].vsrc[i]; + tempssrc[i] = cs->slot[pos].ssrc[i]; + } + + for(i = 0; i < argc; ++i) { + int flags = (used >> i) & SLOT_SRC_BOTH; + + if (!flags) { + srcpos[i] = 0; + continue; + } + + for(j = 0; j < 3; ++j) { + if ((tempused >> j) & flags & SLOT_SRC_VECTOR) { + if (tempvsrc[j] != hwsrc[i]) + continue; + } + + if ((tempused >> j) & flags & SLOT_SRC_SCALAR) { + if (tempssrc[j] != hwsrc[i]) + continue; + } + + break; + } + + if (j == 3) + break; + + srcpos[i] = j; + tempused |= flags << j; + if (flags & SLOT_SRC_VECTOR) + tempvsrc[j] = hwsrc[i]; + if (flags & SLOT_SRC_SCALAR) + tempssrc[j] = hwsrc[i]; + } + + if (i == argc) + break; + } + + // Found a slot, reserve it + cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH); + for(i = 0; i < 3; ++i) { + cs->slot[pos].vsrc[i] = tempvsrc[i]; + cs->slot[pos].ssrc[i] = tempssrc[i]; + } + + // Emit the source fetch code + rp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; + rp->alu.inst[pos].inst1 |= + ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | + (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | + (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); + + rp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK; + rp->alu.inst[pos].inst3 |= + ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | + (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | + (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); + + // Emit the argument selection code if (emit_vop) { - for (i=0;ialu.inst[pos].inst0 &= + ~(R300_FPI0_ARG0C_MASK|R300_FPI0_ARG1C_MASK|R300_FPI0_ARG2C_MASK); + rp->alu.inst[pos].inst0 |= + (swz[0] << R300_FPI0_ARG0C_SHIFT) | + (swz[1] << R300_FPI0_ARG1C_SHIFT) | + (swz[2] << R300_FPI0_ARG2C_SHIFT); + } + + if (emit_sop) { + int swz[3]; + + for(i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + + (srcpos[i] * s_swiz[REG_GET_SSWZ(src[i])].stride)) | + ((src[i] & REG_NEGV_MASK) ? ARG_NEG : 0) | + ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0); + } else { + swz[i] = R300_FPI2_ARGA_ZERO; + } + } + + rp->alu.inst[pos].inst2 &= + ~(R300_FPI2_ARG0A_MASK|R300_FPI2_ARG1A_MASK|R300_FPI2_ARG2A_MASK); + rp->alu.inst[pos].inst2 |= + (swz[0] << R300_FPI2_ARG0A_SHIFT) | + (swz[1] << R300_FPI2_ARG1A_SHIFT) | + (swz[2] << R300_FPI2_ARG2A_SHIFT); } - return GL_FALSE; + return pos; } + +/** + * Append an ALU instruction to the instruction list. + */ static void emit_arith(struct r300_fragment_program *rp, int op, GLuint dest, @@ -1043,87 +1267,31 @@ static void emit_arith(struct r300_fragment_program *rp, { COMPILE_STATE; GLuint src[3] = { src0, src1, src2 }; - int hwsrc[3], sswz[3], vswz[3]; int hwdest; - GLboolean emit_vop = GL_FALSE, emit_sop = GL_FALSE; + GLboolean emit_vop, emit_sop; int vop, sop, argc; - int vpos, spos; - int i; + int pos; vop = r300_fpop[op].v_op; sop = r300_fpop[op].s_op; argc = r300_fpop[op].argc; + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && + REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) + mask &= ~WRITEMASK_XYZ; + + emit_vop = GL_FALSE; + emit_sop = GL_FALSE; if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) emit_vop = GL_TRUE; if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) emit_sop = GL_TRUE; - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && - REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) - emit_vop = GL_FALSE; - - if (force_same_slot(vop, sop, emit_vop, emit_sop, argc, src)) { - vpos = spos = MAX2(cs->v_pos, cs->s_pos); - } else { - vpos = cs->v_pos; - spos = cs->s_pos; - /* Here is where we'd decide on where a safe place is to - * combine this instruction with a previous one. - * - * This is extremely simple for now.. if a source depends - * on the opposite stream, force the same instruction. - */ - for (i=0;i<3;i++) { - if (emit_vop && - (v_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_SCALAR)) { - vpos = spos = MAX2(vpos, spos); - break; - } - if (emit_sop && - (s_swiz[REG_GET_SSWZ(src[i])].flags & SLOT_VECTOR)) { - vpos = spos = MAX2(vpos, spos); - break; - } - } - } + pos = find_and_prepare_slot(rp, emit_vop, emit_sop, argc, src, dest); + if (pos < 0) + return; - /* - Convert src->hwsrc, record for FPI1/FPI3 - * - Determine ARG parts of FPI0/FPI2, unused args are filled - * with ARG_ZERO. - */ - for (i=0;i<3;i++) { - int srcpos; - - if (i >= argc) { - vswz[i] = R300_FPI0_ARGC_ZERO; - sswz[i] = R300_FPI2_ARGA_ZERO; - continue; - } - - hwsrc[i] = t_hw_src(rp, src[i], GL_FALSE); - - if (emit_vop && vop != R300_FPI0_OUTC_REPL_ALPHA) { - srcpos = add_src(rp, hwsrc[i], vpos, - v_swiz[REG_GET_VSWZ(src[i])].flags); - vswz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + - (srcpos * - v_swiz[REG_GET_VSWZ(src[i])].stride)) | - ((src[i] & REG_NEGV_MASK) ? ARG_NEG : 0) | - ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0); - } else vswz[i] = R300_FPI0_ARGC_ZERO; - - if (emit_sop) { - srcpos = add_src(rp, hwsrc[i], spos, - s_swiz[REG_GET_SSWZ(src[i])].flags); - sswz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + - (srcpos * - s_swiz[REG_GET_SSWZ(src[i])].stride)) | - ((src[i] & REG_NEGS_MASK) ? ARG_NEG : 0) | - ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0); - } else sswz[i] = R300_FPI2_ARGA_ZERO; - } - hwdest = t_hw_dst(rp, dest, GL_FALSE); + hwdest = t_hw_dst(rp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ if (flags & PFS_FLAG_SAT) { vop |= R300_FPI0_OUTC_SAT; @@ -1131,58 +1299,45 @@ static void emit_arith(struct r300_fragment_program *rp, } /* Throw the pieces together and get FPI0/1 */ - rp->alu.inst[vpos].inst1 = - ((cs->slot[vpos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | - (cs->slot[vpos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | - (cs->slot[vpos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); if (emit_vop) { - rp->alu.inst[vpos].inst0 = vop | - (vswz[0] << R300_FPI0_ARG0C_SHIFT) | - (vswz[1] << R300_FPI0_ARG1C_SHIFT) | - (vswz[2] << R300_FPI0_ARG2C_SHIFT); + rp->alu.inst[pos].inst0 |= vop; - rp->alu.inst[vpos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; + rp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - rp->alu.inst[vpos].inst1 |= + rp->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; } else assert(0); } else { - rp->alu.inst[vpos].inst1 |= + rp->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_REG_MASK_SHIFT; + + cs->hwtemps[hwdest].vector_valid = pos+1; } - cs->v_pos = vpos+1; - } else if (spos >= vpos) - rp->alu.inst[spos].inst0 = NOP_INST0; + } /* And now FPI2/3 */ - rp->alu.inst[spos].inst3 = - ((cs->slot[spos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | - (cs->slot[spos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | - (cs->slot[spos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); if (emit_sop) { - rp->alu.inst[spos].inst2 = sop | - sswz[0] << R300_FPI2_ARG0A_SHIFT | - sswz[1] << R300_FPI2_ARG1A_SHIFT | - sswz[2] << R300_FPI2_ARG2A_SHIFT; + rp->alu.inst[pos].inst2 |= sop; if (mask & WRITEMASK_W) { if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - rp->alu.inst[spos].inst3 |= + rp->alu.inst[pos].inst3 |= (hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_OUTPUT; } else if (REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { - rp->alu.inst[spos].inst3 |= R300_FPI3_DSTA_DEPTH; + rp->alu.inst[pos].inst3 |= R300_FPI3_DSTA_DEPTH; } else assert(0); } else { - rp->alu.inst[spos].inst3 |= + rp->alu.inst[pos].inst3 |= (hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_REG; + + cs->hwtemps[hwdest].scalar_valid = pos+1; } } - cs->s_pos = spos+1; - } else if (vpos >= spos) - rp->alu.inst[vpos].inst2 = NOP_INST2; - + } + return; } @@ -1922,7 +2077,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) for (i=0;ictx->Const.MaxTextureUnits;i++) { if (InputsRead & (FRAG_BIT_TEX0 << i)) { cs->inputs[FRAG_ATTRIB_TEX0+i].refcount = 0; - cs->inputs[FRAG_ATTRIB_TEX0+i].reg = get_hw_temp(rp); + cs->inputs[FRAG_ATTRIB_TEX0+i].reg = get_hw_temp(rp, 0); } } InputsRead &= ~FRAG_BITS_TEX_ANY; @@ -1930,7 +2085,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) /* fragment position treated as a texcoord */ if (InputsRead & FRAG_BIT_WPOS) { cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(rp); + cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(rp, 0); insert_wpos(&mp->Base); } InputsRead &= ~FRAG_BIT_WPOS; @@ -1938,14 +2093,14 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) /* Then primary colour */ if (InputsRead & FRAG_BIT_COL0) { cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(rp); + cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(rp, 0); } InputsRead &= ~FRAG_BIT_COL0; /* Secondary color */ if (InputsRead & FRAG_BIT_COL1) { cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(rp); + cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(rp, 0); } InputsRead &= ~FRAG_BIT_COL1; @@ -2030,13 +2185,12 @@ void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_pr } /* Finish off */ - cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); rp->node[rp->cur_node].alu_end = - cs->v_pos - rp->node[rp->cur_node].alu_offset - 1; + cs->nrslots - rp->node[rp->cur_node].alu_offset - 1; if (rp->node[rp->cur_node].tex_end < 0) rp->node[rp->cur_node].tex_end = 0; rp->alu_offset = 0; - rp->alu_end = cs->v_pos - 1; + rp->alu_end = cs->nrslots - 1; rp->tex_offset = 0; rp->tex_end = rp->tex.length ? rp->tex.length - 1 : 0; assert(rp->node[rp->cur_node].alu_end >= 0); @@ -2053,7 +2207,7 @@ void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_pr /* just some random things... */ static void dump_program(struct r300_fragment_program *rp) { - int i; + int n, i, j; static int pc = 0; fprintf(stderr, "pc=%d*************************************\n", pc++); @@ -2066,46 +2220,136 @@ static void dump_program(struct r300_fragment_program *rp) fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); - fprintf(stderr, "tex:\n"); - - for(i=0;itex.length;i++) { - fprintf(stderr, "%08x\n", rp->tex.inst[i]); - } - - for (i=0;i<(rp->cur_node+1);i++) { + for (n = 0; n < (rp->cur_node+1); n++) { fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "\ - "alu_end: %d, tex_end: %d\n", i, - rp->node[i].alu_offset, - rp->node[i].tex_offset, - rp->node[i].alu_end, - rp->node[i].tex_end); + "alu_end: %d, tex_end: %d\n", n, + rp->node[n].alu_offset, + rp->node[n].tex_offset, + rp->node[n].alu_end, + rp->node[n].tex_end); + + if (rp->tex.length) { + fprintf(stderr, " TEX:\n"); + for(i = rp->node[n].tex_offset; i <= rp->node[n].tex_end; ++i) + fprintf(stderr, " %08x\n", rp->tex.inst[i]); + } + + for(i = rp->node[n].alu_offset; i <= rp->node[n].alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + + for(j = 0; j < 3; ++j) { + int regc = rp->alu.inst[i].inst1 >> (j*6); + int rega = rp->alu.inst[i].inst3 >> (j*6); + + sprintf(srcc[j], "%c%i", (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", (rega & 32) ? 'c' : 't', rega & 31); + } + + sprintf(dstc, "t%i.%c%c%c o%i.%c%c%c", + (rp->alu.inst[i].inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_X) ? 'x' : ' ', + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_Y) ? 'y' : ' ', + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_Z) ? 'z' : ' ', + (rp->alu.inst[i].inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_X) ? 'x' : ' ', + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? 'y' : ' ', + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? 'z' : ' '); + + sprintf(dsta, "t%i.%c o%i.%c %c", + (rp->alu.inst[i].inst3 >> R300_FPI3_DSTA_SHIFT) & 31, + (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) ? 'w' : ' ', + (rp->alu.inst[i].inst3 >> R300_FPI3_DSTA_SHIFT) & 31, + (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) ? 'w' : ' ', + (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) ? 'Z' : ' '); + + fprintf(stderr, "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", + i, + srcc[0], srcc[1], srcc[2], dstc, rp->alu.inst[i].inst1, + srca[0], srca[1], srca[2], dsta, rp->alu.inst[i].inst3); + + for(j = 0; j < 3; ++j) { + int regc = rp->alu.inst[i].inst0 >> (j*7); + int rega = rp->alu.inst[i].inst2 >> (j*7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch(d % 4) { + case R300_FPI0_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d-12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch(d/3) { + case 0: + sprintf(buf, "%s.yzx", srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, + (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], 'x' + (char)(d%3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d-9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, + (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], rp->alu.inst[i].inst0, + arga[0], arga[1], arga[2], rp->alu.inst[i].inst2); + } } - - fprintf(stderr, "%08x\n", - ((rp->tex_end << 16) | (R300_PFS_TEXI_0 >> 2))); - for (i=0;i<=rp->tex_end;i++) - fprintf(stderr, "%08x\n", rp->tex.inst[i]); - - /* dump program in pretty_print_command_stream.tcl-readable format */ - fprintf(stderr, "%08x\n", - ((rp->alu_end << 16) | (R300_PFS_INSTR0_0 >> 2))); - for (i=0;i<=rp->alu_end;i++) - fprintf(stderr, "%08x\n", rp->alu.inst[i].inst0); - - fprintf(stderr, "%08x\n", - ((rp->alu_end << 16) | (R300_PFS_INSTR1_0 >> 2))); - for (i=0;i<=rp->alu_end;i++) - fprintf(stderr, "%08x\n", rp->alu.inst[i].inst1); - - fprintf(stderr, "%08x\n", - ((rp->alu_end << 16) | (R300_PFS_INSTR2_0 >> 2))); - for (i=0;i<=rp->alu_end;i++) - fprintf(stderr, "%08x\n", rp->alu.inst[i].inst2); - - fprintf(stderr, "%08x\n", - ((rp->alu_end << 16) | (R300_PFS_INSTR3_0 >> 2))); - for (i=0;i<=rp->alu_end;i++) - fprintf(stderr, "%08x\n", rp->alu.inst[i].inst3); - - fprintf(stderr, "00000000\n"); } diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 3de15752b1..1f4a2d2e64 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1047,7 +1047,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * WRT swizzling. If, for example, you want to load an R component into an * Alpha operand, this R component is taken from a *color* source, not from * an alpha source. The corresponding register doesn't even have to appear in - * the alpha sources list. (I hope this alll makes sense to you) + * the alpha sources list. (I hope this all makes sense to you) * * Destination selection * The destination register index is in FPI1 (color) and FPI3 (alpha) @@ -1074,6 +1074,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_FPI1_SRC2C_SHIFT 12 # define R300_FPI1_SRC2C_MASK (31 << 12) # define R300_FPI1_SRC2C_CONST (1 << 17) +# define R300_FPI1_SRC_MASK 0x0003ffff # define R300_FPI1_DSTC_SHIFT 18 # define R300_FPI1_DSTC_MASK (31 << 18) # define R300_FPI1_DSTC_REG_MASK_SHIFT 23 @@ -1095,6 +1096,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_FPI3_SRC2A_SHIFT 12 # define R300_FPI3_SRC2A_MASK (31 << 12) # define R300_FPI3_SRC2A_CONST (1 << 17) +# define R300_FPI3_SRC_MASK 0x0003ffff # define R300_FPI3_DSTA_SHIFT 18 # define R300_FPI3_DSTA_MASK (31 << 18) # define R300_FPI3_DSTA_REG (1 << 23) -- cgit v1.2.3 From a8e65a010c17444c63859c17786ecb4010bd49c1 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sun, 18 Mar 2007 12:46:53 +0100 Subject: r300: Fix hw fragment program dump Dumps of fragment programs were incorrect when the program consisted of multiple nodes. Also, improved the formatting a bit. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 51 ++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index b2c89ccb36..c3d902a4aa 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -2230,15 +2230,16 @@ static void dump_program(struct r300_fragment_program *rp) if (rp->tex.length) { fprintf(stderr, " TEX:\n"); - for(i = rp->node[n].tex_offset; i <= rp->node[n].tex_end; ++i) + for(i = rp->node[n].tex_offset; i <= rp->node[n].tex_offset+rp->node[n].tex_end; ++i) fprintf(stderr, " %08x\n", rp->tex.inst[i]); } - for(i = rp->node[n].alu_offset; i <= rp->node[n].alu_end; ++i) { + for(i = rp->node[n].alu_offset; i <= rp->node[n].alu_offset+rp->node[n].alu_end; ++i) { char srcc[3][10], dstc[20]; char srca[3][10], dsta[20]; char argc[3][20]; char arga[3][20]; + char flags[5], tmp[10]; for(j = 0; j < 3; ++j) { int regc = rp->alu.inst[i].inst1 >> (j*6); @@ -2248,22 +2249,38 @@ static void dump_program(struct r300_fragment_program *rp) sprintf(srca[j], "%c%i", (rega & 32) ? 'c' : 't', rega & 31); } - sprintf(dstc, "t%i.%c%c%c o%i.%c%c%c", - (rp->alu.inst[i].inst1 >> R300_FPI1_DSTC_SHIFT) & 31, - (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_X) ? 'x' : ' ', - (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_Y) ? 'y' : ' ', - (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_Z) ? 'z' : ' ', - (rp->alu.inst[i].inst1 >> R300_FPI1_DSTC_SHIFT) & 31, - (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_X) ? 'x' : ' ', - (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? 'y' : ' ', - (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? 'z' : ' '); + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (rp->alu.inst[i].inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (rp->alu.inst[i].inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } - sprintf(dsta, "t%i.%c o%i.%c %c", - (rp->alu.inst[i].inst3 >> R300_FPI3_DSTA_SHIFT) & 31, - (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) ? 'w' : ' ', - (rp->alu.inst[i].inst3 >> R300_FPI3_DSTA_SHIFT) & 31, - (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) ? 'w' : ' ', - (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) ? 'Z' : ' '); + dsta[0] = 0; + if (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) { + sprintf(dsta, "t%i.w ", (rp->alu.inst[i].inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + } + if (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", (rp->alu.inst[i].inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) { + strcat(dsta, "Z"); + } fprintf(stderr, "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" " w: %3s %3s %3s -> %-20s (%08x)\n", -- cgit v1.2.3 From ec1a77c86481d7f77542fbecda0e81b74732c90f Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sun, 18 Mar 2007 13:09:21 +0100 Subject: r300: Fragment program dumps format tex instructions --- src/mesa/drivers/dri/r300/r300_fragprog.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index c3d902a4aa..3c54830312 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -2230,8 +2230,34 @@ static void dump_program(struct r300_fragment_program *rp) if (rp->tex.length) { fprintf(stderr, " TEX:\n"); - for(i = rp->node[n].tex_offset; i <= rp->node[n].tex_offset+rp->node[n].tex_end; ++i) - fprintf(stderr, " %08x\n", rp->tex.inst[i]); + for(i = rp->node[n].tex_offset; i <= rp->node[n].tex_offset+rp->node[n].tex_end; ++i) { + const char* instr; + + switch((rp->tex.inst[i] >> R300_FPITX_OPCODE_SHIFT) & 15) { + case R300_FPITX_OP_TEX: + instr = "TEX"; + break; + case R300_FPITX_OP_KIL: + instr = "KIL"; + break; + case R300_FPITX_OP_TXP: + instr = "TXP"; + break; + case R300_FPITX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (rp->tex.inst[i] >> R300_FPITX_DST_SHIFT) & 31, + (rp->tex.inst[i] & R300_FPITX_SRC_CONST) ? 'c': 't', + (rp->tex.inst[i] >> R300_FPITX_SRC_SHIFT) & 31, + (rp->tex.inst[i] & R300_FPITX_IMAGE_MASK) >> R300_FPITX_IMAGE_SHIFT, + rp->tex.inst[i]); + } } for(i = rp->node[n].alu_offset; i <= rp->node[n].alu_offset+rp->node[n].alu_end; ++i) { -- cgit v1.2.3 From ff6ab9b45b180ab9bf261afa50888e6e740d7924 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sun, 18 Mar 2007 13:29:18 +0100 Subject: r300: Fix fragment program reordering Do not move an instruction that writes to a temp forward past an instruction that reads the same temporary. --- src/mesa/drivers/dri/r300/r300_context.h | 5 +++++ src/mesa/drivers/dri/r300/r300_fragprog.c | 37 ++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index bc43953ff3..29436ab9e0 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -674,6 +674,11 @@ struct reg_lifetime { emitted instruction that writes to the register */ int vector_valid; int scalar_valid; + + /* Index to the slot where the register was last read. + This is also the first slot in which the register may be written again */ + int vector_lastread; + int scalar_lastread; }; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 3c54830312..89e9f6531a 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -1026,10 +1026,11 @@ static void emit_tex(struct r300_fragment_program *rp, */ static int get_earliest_allowed_write( struct r300_fragment_program* rp, - GLuint dest) + GLuint dest, int mask) { COMPILE_STATE; int idx; + int pos; GLuint index = REG_GET_INDEX(dest); assert(REG_GET_VALID(dest)); @@ -1047,7 +1048,17 @@ static int get_earliest_allowed_write( return 0; } - return cs->hwtemps[idx].reserved; + pos = cs->hwtemps[idx].reserved; + if (mask & WRITEMASK_XYZ) { + if (pos < cs->hwtemps[idx].vector_lastread) + pos = cs->hwtemps[idx].vector_lastread; + } + if (mask & WRITEMASK_W) { + if (pos < cs->hwtemps[idx].scalar_lastread) + pos = cs->hwtemps[idx].scalar_lastread; + } + + return pos; } @@ -1070,7 +1081,8 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, GLboolean emit_sop, int argc, GLuint* src, - GLuint dest) + GLuint dest, + int mask) { COMPILE_STATE; int hwsrc[3]; @@ -1092,7 +1104,7 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, if (emit_sop) used |= SLOT_OP_SCALAR; - pos = get_earliest_allowed_write(rp, dest); + pos = get_earliest_allowed_write(rp, dest, mask); if (rp->node[rp->cur_node].alu_offset > pos) pos = rp->node[rp->cur_node].alu_offset; @@ -1191,6 +1203,21 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, cs->slot[pos].ssrc[i] = tempssrc[i]; } + for(i = 0; i < argc; ++i) { + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + int regnr = hwsrc[i] & 31; + + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_lastread < pos) + cs->hwtemps[regnr].vector_lastread = pos; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_lastread < pos) + cs->hwtemps[regnr].scalar_lastread = pos; + } + } + } + // Emit the source fetch code rp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; rp->alu.inst[pos].inst1 |= @@ -1287,7 +1314,7 @@ static void emit_arith(struct r300_fragment_program *rp, if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) emit_sop = GL_TRUE; - pos = find_and_prepare_slot(rp, emit_vop, emit_sop, argc, src, dest); + pos = find_and_prepare_slot(rp, emit_vop, emit_sop, argc, src, dest, mask); if (pos < 0) return; -- cgit v1.2.3 From b645e8c96dc1e3b153cf882c8931f10e0c006f04 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sun, 18 Mar 2007 18:32:32 +0100 Subject: r300: Streamlined fragment program LIT implementation Fix a bug in the LIT implementation (clamp exponent to 128, not 0.5) and change the implementation around. In theory, the new implementation needs as little as 5 instruction slots. Unfortunately, the dependency analysis in find_and_replace_slot is not strong enough to look at individual components of a register yet. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 163 ++++++++++++++++++------------ 1 file changed, 101 insertions(+), 62 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 89e9f6531a..b0681e2808 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -492,7 +492,7 @@ static GLuint emit_param4fv(struct r300_fragment_program *rp, return r; } -static GLuint emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp) +static GLuint emit_const4fv(struct r300_fragment_program *rp, const GLfloat* cp) { GLuint r = undef; GLuint index; @@ -1405,15 +1405,112 @@ static void make_sin_const(struct r300_fragment_program *rp) } } +/** + * Emit a LIT instruction. + * \p flags may be PFS_FLAG_SAT + * + * Definition of LIT (from ARB_fragment_program): + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots. So unless there's some special undocumented opcode, + * this implementation is potentially optimal. Unfortunately, + * emit_arith is a bit too conservative because it doesn't understand + * partial writes to the vector component. + */ +static void emit_lit(struct r300_fragment_program *rp, + GLuint dest, + int mask, + GLuint src, + int flags) +{ + COMPILE_STATE; + static const GLfloat cnstv[4] = { 127.999999, 127.999999, 127.999999, -127.999999 }; + GLuint cnst; + int needTemporary; + GLuint temp; + + cnst = emit_const4fv(rp, cnstv); + + needTemporary = 0; + if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = keep(get_temp_reg(rp)); + } else { + temp = keep(dest); + } + + // Npte: The order of emit_arith inside the slots is relevant, + // because emit_arith only looks at scalar vs. vector when resolving + // dependencies, and it does not consider individual vector components, + // so swizzling between the two parts can create fake dependencies. + + // First slot + emit_arith(rp, PFS_OP_MAX, temp, WRITEMASK_XY, + keep(src), pfs_zero, undef, 0); + emit_arith(rp, PFS_OP_MAX, temp, WRITEMASK_W, + src, cnst, undef, 0); + + // Second slot + emit_arith(rp, PFS_OP_MIN, temp, WRITEMASK_Z, + swizzle(temp, W, W, W, W), cnst, undef, 0); + emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W, + swizzle(temp, Y, Y, Y, Y), undef, undef, 0); + + // Third slot + // If desired, we saturate the y result here. + // This does not affect the use as a condition variable in the CMP later + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, + temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y, + swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); + + // Fourth slot + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, + pfs_one, pfs_one, pfs_zero, 0); + emit_arith(rp, PFS_OP_EX2, temp, WRITEMASK_W, + temp, undef, undef, 0); + + // Fifth slot + emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z, + swizzle(temp, W, W, W, W), pfs_zero, swizzle(temp, Y, Y, Y, Y), flags); + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, + pfs_one, pfs_one, pfs_zero, 0); + + if (needTemporary) { + emit_arith(rp, PFS_OP_MAD, dest, mask, + temp, pfs_one, pfs_zero, flags); + free_temp(rp, temp); + } else { + // Decrease refcount of the destination + t_hw_dst(rp, dest, GL_FALSE, cs->nrslots); + } +} + + static GLboolean parse_program(struct r300_fragment_program *rp) { struct gl_fragment_program *mp = &rp->mesa_program; const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; GLuint src[3], dest, temp[2]; - GLuint cnst; int flags, mask = 0; - GLfloat cnstv[4] = {0.0, 0.0, 0.0, 0.0}; if (!inst || inst[0].Opcode == OPCODE_END) { ERROR("empty program?\n"); @@ -1612,66 +1709,8 @@ static GLboolean parse_program(struct r300_fragment_program *rp) flags); break; case OPCODE_LIT: - /* LIT - * if (s.x < 0) t.x = 0; else t.x = s.x; - * if (s.y < 0) t.y = 0; else t.y = s.y; - * if (s.w > 128.0) t.w = 128.0; else t.w = s.w; - * if (s.w < -128.0) t.w = -128.0; else t.w = s.w; - * r.x = 1.0 - * if (t.x > 0) r.y = pow(t.y, t.w); else r.y = 0; - * Also r.y = 0 if t.y < 0 - * For the t.x > 0 FGLRX use the CMPH opcode which - * change the compare to (t.x + 0.5) > 0.5 we may - * save one instruction by doing CMP -t.x - */ - cnstv[0] = cnstv[1] = cnstv[2] = cnstv[3] = 0.50001; src[0] = t_src(rp, fpi->SrcReg[0]); - temp[0] = get_temp_reg(rp); - cnst = emit_const4fv(rp, cnstv); - emit_arith(rp, PFS_OP_CMP, temp[0], - WRITEMASK_X | WRITEMASK_Y, - src[0], pfs_zero, src[0], flags); - emit_arith(rp, PFS_OP_MIN, temp[0], WRITEMASK_Z, - swizzle(keep(src[0]), W, W, W, W), - cnst, undef, flags); - emit_arith(rp, PFS_OP_LG2, temp[0], WRITEMASK_W, - swizzle(temp[0], Y, Y, Y, Y), - undef, undef, flags); - emit_arith(rp, PFS_OP_MAX, temp[0], WRITEMASK_Z, - temp[0], negate(cnst), undef, flags); - emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_W, - temp[0], swizzle(temp[0], Z, Z, Z, Z), - pfs_zero, flags); - emit_arith(rp, PFS_OP_EX2, temp[0], WRITEMASK_W, - temp[0], undef, undef, flags); - emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y, - swizzle(keep(temp[0]), X, X, X, X), - pfs_one, pfs_zero, flags); -#if 0 - emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X, - temp[0], pfs_one, pfs_half, flags); - emit_arith(rp, PFS_OP_CMPH, temp[0], WRITEMASK_Z, - swizzle(keep(temp[0]), W, W, W, W), - pfs_zero, swizzle(keep(temp[0]), X, X, X, X), - flags); -#else - emit_arith(rp, PFS_OP_CMP, temp[0], WRITEMASK_Z, - pfs_zero, - swizzle(keep(temp[0]), W, W, W, W), - negate(swizzle(keep(temp[0]), X, X, X, X)), - flags); -#endif - emit_arith(rp, PFS_OP_CMP, dest, WRITEMASK_Z, - pfs_zero, temp[0], - negate(swizzle(keep(temp[0]), Y, Y, Y, Y)), - flags); - emit_arith(rp, PFS_OP_MAD, dest, - WRITEMASK_X | WRITEMASK_W, - pfs_one, - pfs_one, - pfs_zero, - flags); - free_temp(rp, temp[0]); + emit_lit(rp, dest, mask, src[0], flags); break; case OPCODE_LRP: src[0] = t_src(rp, fpi->SrcReg[0]); -- cgit v1.2.3 From c4bf863f4cb48c2de284933bb1fc725b540ee810 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Mon, 19 Mar 2007 19:45:45 +0100 Subject: r300: Fix WRITEMASK handling when writing to result.depth This is a necessary change to emit the right instructions when writing to result.depth. However, even with this test, Z-write doesn't work properly, and I don't fully understand why. In addition to this, we'll at least have to disable early-Z, but even that doesn't seem to be enough. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index b0681e2808..fb559e880a 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -1304,9 +1304,14 @@ static void emit_arith(struct r300_fragment_program *rp, argc = r300_fpop[op].argc; if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && - REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) - mask &= ~WRITEMASK_XYZ; - + REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { + if (mask & WRITEMASK_Z) { + mask = WRITEMASK_W; + } else { + return; + } + } + emit_vop = GL_FALSE; emit_sop = GL_FALSE; if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) -- cgit v1.2.3 From 7b992d024b20df111db007286e5a54afcb531fb1 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Mon, 19 Mar 2007 19:46:25 +0100 Subject: r300: Whitespace cleanup (remove trailing spaces) --- src/mesa/drivers/dri/r300/r300_fragprog.c | 218 +++++++++++++++--------------- 1 file changed, 109 insertions(+), 109 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index fb559e880a..93b9c39635 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -307,17 +307,17 @@ static int get_hw_temp(struct r300_fragment_program *rp, int slot) { COMPILE_STATE; int r; - + for(r = 0; r < PFS_NUM_TEMP_REGS; ++r) { if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot) break; } - + if (r >= PFS_NUM_TEMP_REGS) { ERROR("Out of hardware temps\n"); return 0; } - + // Reserved is used to avoid the following scenario: // R300 temporary X is first assigned to Mesa temporary Y during vector ops // R300 temporary X is then assigned to Mesa temporary Z for further vector ops @@ -326,17 +326,17 @@ static int get_hw_temp(struct r300_fragment_program *rp, int slot) // End scenario. cs->hwtemps[r].reserved = cs->hwtemps[r].free; cs->hwtemps[r].free = -1; - + // Reset to some value that won't mess things up when the user // tries to read from a temporary that hasn't been assigned a value yet. // In the normal case, vector_valid and scalar_valid should be set to // a sane value by the first emit that writes to this temporary. cs->hwtemps[r].vector_valid = 0; cs->hwtemps[r].scalar_valid = 0; - + if (r > rp->max_temp_idx) rp->max_temp_idx = r; - + return r; } @@ -351,25 +351,25 @@ static int get_hw_temp_tex(struct r300_fragment_program *rp) for(r = 0; r < PFS_NUM_TEMP_REGS; ++r) { if (cs->used_in_node & (1 << r)) continue; - + // Note: Be very careful here if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0) break; } - + if (r >= PFS_NUM_TEMP_REGS) return get_hw_temp(rp, 0); /* Will cause an indirection */ cs->hwtemps[r].reserved = cs->hwtemps[r].free; cs->hwtemps[r].free = -1; - + // Reset to some value that won't mess things up when the user // tries to read from a temporary that hasn't been assigned a value yet. // In the normal case, vector_valid and scalar_valid should be set to // a sane value by the first emit that writes to this temporary. cs->hwtemps[r].vector_valid = cs->nrslots; cs->hwtemps[r].scalar_valid = cs->nrslots; - + if (r > rp->max_temp_idx) rp->max_temp_idx = r; @@ -382,7 +382,7 @@ static int get_hw_temp_tex(struct r300_fragment_program *rp) static void free_hw_temp(struct r300_fragment_program *rp, int idx) { COMPILE_STATE; - + // Be very careful here. Consider sequences like // MAD r0, r1,r2,r3 // TEX r4, ... @@ -457,7 +457,7 @@ static void free_temp(struct r300_fragment_program *rp, GLuint r) if (!(cs->temp_in_use & (1 << index))) return; - + if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { free_hw_temp(rp, cs->temps[index].reg); cs->temps[index].reg = -1; @@ -493,7 +493,7 @@ static GLuint emit_param4fv(struct r300_fragment_program *rp, } static GLuint emit_const4fv(struct r300_fragment_program *rp, const GLfloat* cp) -{ +{ GLuint r = undef; GLuint index; @@ -691,7 +691,7 @@ static GLuint do_swizzle(struct r300_fragment_program *rp, GLuint offset; for(i=0; i < 4; ++i){ offset = GET_SWZ(arbswz, i); - + newswz |= (offset <= 3)?GET_SWZ(vsrcswz, offset) << i*3:offset << i*3; } @@ -800,7 +800,7 @@ static GLuint t_dst(struct r300_fragment_program *rp, struct prog_dst_register dest) { GLuint r = undef; - + switch (dest.File) { case PROGRAM_TEMPORARY: REG_SET_INDEX(r, dest.Index); @@ -910,19 +910,19 @@ static int t_hw_dst(struct r300_fragment_program *rp, ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); return 0; } - + return idx; } static void emit_nop(struct r300_fragment_program *rp) { COMPILE_STATE; - + if (cs->nrslots >= PFS_MAX_ALU_INST) { ERROR("Out of ALU instruction slots\n"); return; } - + rp->alu.inst[cs->nrslots].inst0 = NOP_INST0; rp->alu.inst[cs->nrslots].inst1 = NOP_INST1; rp->alu.inst[cs->nrslots].inst2 = NOP_INST2; @@ -940,7 +940,7 @@ static void emit_tex(struct r300_fragment_program *rp, GLuint din = cs->dest_in_node, uin = cs->used_in_node; int unit = fpi->TexSrcUnit; int hwsrc, hwdest; - + /* Resolve source/dest to hardware registers */ hwsrc = t_hw_src(rp, coord, GL_TRUE); if (opcode != R300_FPITX_OP_KIL) { @@ -952,7 +952,7 @@ static void emit_tex(struct r300_fragment_program *rp, dest = get_temp_reg_tex(rp); } hwdest = t_hw_dst(rp, dest, GL_TRUE, rp->node[rp->cur_node].alu_offset); - + /* Use a temp that hasn't been used in this node, rather * than causing an indirection */ @@ -965,17 +965,17 @@ static void emit_tex(struct r300_fragment_program *rp, hwdest = 0; unit = 0; } - + /* Indirection if source has been written in this node, or if the * dest has been read/written in this node */ if ((REG_GET_TYPE(coord) != REG_TYPE_CONST && (din & (1<node[rp->cur_node].alu_offset == cs->nrslots) emit_nop(rp); - + rp->node[rp->cur_node].alu_end = cs->nrslots - rp->node[rp->cur_node].alu_offset - 1; assert(rp->node[rp->cur_node].alu_end >= 0); @@ -989,12 +989,12 @@ static void emit_tex(struct r300_fragment_program *rp, rp->node[rp->cur_node].tex_offset = rp->tex.length; rp->node[rp->cur_node].alu_offset = cs->nrslots; rp->node[rp->cur_node].tex_end = -1; - rp->node[rp->cur_node].alu_end = -1; + rp->node[rp->cur_node].alu_end = -1; rp->node[rp->cur_node].flags = 0; cs->used_in_node = 0; cs->dest_in_node = 0; } - + if (rp->cur_node == 0) rp->first_node_has_tex = 1; @@ -1005,7 +1005,7 @@ static void emit_tex(struct r300_fragment_program *rp, /* not entirely sure about this */ | (opcode << R300_FPITX_OPCODE_SHIFT); - cs->dest_in_node |= (1 << hwdest); + cs->dest_in_node |= (1 << hwdest); if (REG_GET_TYPE(coord) != REG_TYPE_CONST) cs->used_in_node |= (1 << hwsrc); @@ -1038,7 +1038,7 @@ static int get_earliest_allowed_write( case REG_TYPE_TEMP: if (cs->temps[index].reg == -1) return 0; - + idx = cs->temps[index].reg; break; case REG_TYPE_OUTPUT: @@ -1047,7 +1047,7 @@ static int get_earliest_allowed_write( ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); return 0; } - + pos = cs->hwtemps[idx].reserved; if (mask & WRITEMASK_XYZ) { if (pos < cs->hwtemps[idx].vector_lastread) @@ -1057,7 +1057,7 @@ static int get_earliest_allowed_write( if (pos < cs->hwtemps[idx].scalar_lastread) pos = cs->hwtemps[idx].scalar_lastread; } - + return pos; } @@ -1094,7 +1094,7 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, int pos; int regnr; int i,j; - + // Determine instruction slots, whether sources are required on // vector or scalar side, and the smallest slot number where // all source registers are available @@ -1103,9 +1103,9 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, used |= SLOT_OP_VECTOR; if (emit_sop) used |= SLOT_OP_SCALAR; - + pos = get_earliest_allowed_write(rp, dest, mask); - + if (rp->node[rp->cur_node].alu_offset > pos) pos = rp->node[rp->cur_node].alu_offset; for(i = 0; i < argc; ++i) { @@ -1115,10 +1115,10 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, if (emit_sop) used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; } - + hwsrc[i] = t_hw_src(rp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ regnr = hwsrc[i] & 31; - + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { if (used & (SLOT_SRC_VECTOR << i)) { if (cs->hwtemps[regnr].vector_valid > pos) @@ -1130,12 +1130,12 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, } } } - + // Find a slot that fits for(; ; ++pos) { if (cs->slot[pos].used & used & SLOT_OP_BOTH) continue; - + if (pos >= cs->nrslots) { if (cs->nrslots >= PFS_MAX_ALU_INST) { ERROR("Out of ALU instruction slots\n"); @@ -1147,7 +1147,7 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, cs->nrslots++; } - + // Note: When we need both parts (vector and scalar) of a source, // we always try to put them into the same position. This makes the // code easier to read, and it is optimal (i.e. one doesn't gain @@ -1158,32 +1158,32 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, tempvsrc[i] = cs->slot[pos].vsrc[i]; tempssrc[i] = cs->slot[pos].ssrc[i]; } - + for(i = 0; i < argc; ++i) { int flags = (used >> i) & SLOT_SRC_BOTH; - + if (!flags) { srcpos[i] = 0; continue; } - + for(j = 0; j < 3; ++j) { if ((tempused >> j) & flags & SLOT_SRC_VECTOR) { if (tempvsrc[j] != hwsrc[i]) continue; } - + if ((tempused >> j) & flags & SLOT_SRC_SCALAR) { if (tempssrc[j] != hwsrc[i]) continue; } - + break; } - + if (j == 3) break; - + srcpos[i] = j; tempused |= flags << j; if (flags & SLOT_SRC_VECTOR) @@ -1191,22 +1191,22 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, if (flags & SLOT_SRC_SCALAR) tempssrc[j] = hwsrc[i]; } - + if (i == argc) break; } - + // Found a slot, reserve it cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH); for(i = 0; i < 3; ++i) { cs->slot[pos].vsrc[i] = tempvsrc[i]; cs->slot[pos].ssrc[i] = tempssrc[i]; } - + for(i = 0; i < argc; ++i) { if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { int regnr = hwsrc[i] & 31; - + if (used & (SLOT_SRC_VECTOR << i)) { if (cs->hwtemps[regnr].vector_lastread < pos) cs->hwtemps[regnr].vector_lastread = pos; @@ -1217,24 +1217,24 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, } } } - + // Emit the source fetch code rp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; rp->alu.inst[pos].inst1 |= ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); - + rp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK; rp->alu.inst[pos].inst3 |= ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); - + // Emit the argument selection code if (emit_vop) { int swz[3]; - + for(i = 0; i < 3; ++i) { if (i < argc) { swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + @@ -1245,7 +1245,7 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, swz[i] = R300_FPI0_ARGC_ZERO; } } - + rp->alu.inst[pos].inst0 &= ~(R300_FPI0_ARG0C_MASK|R300_FPI0_ARG1C_MASK|R300_FPI0_ARG2C_MASK); rp->alu.inst[pos].inst0 |= @@ -1253,10 +1253,10 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, (swz[1] << R300_FPI0_ARG1C_SHIFT) | (swz[2] << R300_FPI0_ARG2C_SHIFT); } - + if (emit_sop) { int swz[3]; - + for(i = 0; i < 3; ++i) { if (i < argc) { swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + @@ -1267,7 +1267,7 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, swz[i] = R300_FPI2_ARGA_ZERO; } } - + rp->alu.inst[pos].inst2 &= ~(R300_FPI2_ARG0A_MASK|R300_FPI2_ARG1A_MASK|R300_FPI2_ARG2A_MASK); rp->alu.inst[pos].inst2 |= @@ -1322,9 +1322,9 @@ static void emit_arith(struct r300_fragment_program *rp, pos = find_and_prepare_slot(rp, emit_vop, emit_sop, argc, src, dest, mask); if (pos < 0) return; - + hwdest = t_hw_dst(rp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ - + if (flags & PFS_FLAG_SAT) { vop |= R300_FPI0_OUTC_SAT; sop |= R300_FPI2_OUTA_SAT; @@ -1335,7 +1335,7 @@ static void emit_arith(struct r300_fragment_program *rp, rp->alu.inst[pos].inst0 |= vop; rp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; - + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { rp->alu.inst[pos].inst1 |= @@ -1344,7 +1344,7 @@ static void emit_arith(struct r300_fragment_program *rp, } else { rp->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_REG_MASK_SHIFT; - + cs->hwtemps[hwdest].vector_valid = pos+1; } } @@ -1356,7 +1356,7 @@ static void emit_arith(struct r300_fragment_program *rp, if (mask & WRITEMASK_W) { if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - rp->alu.inst[pos].inst3 |= + rp->alu.inst[pos].inst3 |= (hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_OUTPUT; } else if (REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { rp->alu.inst[pos].inst3 |= R300_FPI3_DSTA_DEPTH; @@ -1364,12 +1364,12 @@ static void emit_arith(struct r300_fragment_program *rp, } else { rp->alu.inst[pos].inst3 |= (hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_REG; - + cs->hwtemps[hwdest].scalar_valid = pos+1; } } } - + return; } @@ -1443,9 +1443,9 @@ static void emit_lit(struct r300_fragment_program *rp, GLuint cnst; int needTemporary; GLuint temp; - + cnst = emit_const4fv(rp, cnstv); - + needTemporary = 0; if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { needTemporary = 1; @@ -1454,30 +1454,30 @@ static void emit_lit(struct r300_fragment_program *rp, // in creating special code for this case needTemporary = 1; } - + if (needTemporary) { temp = keep(get_temp_reg(rp)); } else { temp = keep(dest); } - + // Npte: The order of emit_arith inside the slots is relevant, // because emit_arith only looks at scalar vs. vector when resolving // dependencies, and it does not consider individual vector components, // so swizzling between the two parts can create fake dependencies. - + // First slot emit_arith(rp, PFS_OP_MAX, temp, WRITEMASK_XY, keep(src), pfs_zero, undef, 0); emit_arith(rp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); - + // Second slot emit_arith(rp, PFS_OP_MIN, temp, WRITEMASK_Z, swizzle(temp, W, W, W, W), cnst, undef, 0); emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W, swizzle(temp, Y, Y, Y, Y), undef, undef, 0); - + // Third slot // If desired, we saturate the y result here. // This does not affect the use as a condition variable in the CMP later @@ -1485,19 +1485,19 @@ static void emit_lit(struct r300_fragment_program *rp, temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y, swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); - + // Fourth slot emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, pfs_one, pfs_one, pfs_zero, 0); emit_arith(rp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); - + // Fifth slot emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z, swizzle(temp, W, W, W, W), pfs_zero, swizzle(temp, Y, Y, Y, Y), flags); emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, pfs_zero, 0); - + if (needTemporary) { emit_arith(rp, PFS_OP_MAD, dest, mask, temp, pfs_one, pfs_zero, flags); @@ -1510,7 +1510,7 @@ static void emit_lit(struct r300_fragment_program *rp, static GLboolean parse_program(struct r300_fragment_program *rp) -{ +{ struct gl_fragment_program *mp = &rp->mesa_program; const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; @@ -1604,7 +1604,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) absolute(swizzle(temp[0], Z, Z, Z, Z)), swizzle(temp[0], X, X, X, X), 0); - + emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Y, swizzle(temp[0], X, X, X, X), absolute(swizzle(temp[0], X, X, X, X)), @@ -1648,12 +1648,12 @@ static GLboolean parse_program(struct r300_fragment_program *rp) 0); emit_arith(rp, PFS_OP_DP4, dest, mask, temp[0], src[1], undef, - flags); + flags); free_temp(rp, temp[0]); #else emit_arith(rp, PFS_OP_DP4, dest, mask, swizzle(src[0], X, Y, Z, ONE), src[1], - undef, flags); + undef, flags); #endif break; case OPCODE_DST: @@ -1684,7 +1684,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) src[0], undef, undef, flags); break; - case OPCODE_FLR: + case OPCODE_FLR: src[0] = t_src(rp, fpi->SrcReg[0]); temp[0] = get_temp_reg(rp); /* FRC temp, src0 @@ -1734,7 +1734,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) src[0], src[1], temp[0], flags); free_temp(rp, temp[0]); - break; + break; case OPCODE_MAD: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); @@ -1761,7 +1761,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) case OPCODE_SWZ: src[0] = t_src(rp, fpi->SrcReg[0]); emit_arith(rp, PFS_OP_MAD, dest, mask, - src[0], pfs_one, pfs_zero, + src[0], pfs_one, pfs_zero, flags); break; case OPCODE_MUL: @@ -1774,7 +1774,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) case OPCODE_POW: src[0] = t_scalar_src(rp, fpi->SrcReg[0]); src[1] = t_scalar_src(rp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(rp); + temp[0] = get_temp_reg(rp); emit_arith(rp, PFS_OP_LG2, temp[0], WRITEMASK_W, src[0], undef, undef, 0); @@ -1932,7 +1932,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) absolute(swizzle(temp[0], Z, Z, Z, Z)), swizzle(temp[0], X, X, X, X), 0); - + emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Y, swizzle(temp[0], X, X, X, X), absolute(swizzle(temp[0], X, X, X, X)), @@ -1989,7 +1989,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) swizzle(keep(src[1]), Y, Z, X, W), pfs_zero, 0); - /* dest.xyz = src0.yzx * src1.zxy - temp + /* dest.xyz = src0.yzx * src1.zxy - temp * dest.w = undefined * */ emit_arith(rp, PFS_OP_MAD, dest, mask & WRITEMASK_XYZ, @@ -2089,7 +2089,7 @@ static void insert_wpos(struct gl_program *prog) fpi = &prog->Instructions[prog->NumInstructions-1]; assert(fpi->Opcode == OPCODE_END); - + for(fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++){ for(i=0; i<3; i++) if( fpi->SrcReg[i].File == PROGRAM_INPUT && @@ -2106,7 +2106,7 @@ static void insert_wpos(struct gl_program *prog) static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) { struct r300_pfs_compile_state *cs = NULL; - struct gl_fragment_program *mp = &rp->mesa_program; + struct gl_fragment_program *mp = &rp->mesa_program; struct prog_instruction *fpi; GLuint InputsRead = mp->Base.InputsRead; GLuint temps_used = 0; /* for rp->temps[] */ @@ -2127,7 +2127,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) rp->node[0].alu_end = -1; rp->node[0].tex_end = -1; rp->const_sin[0] = -1; - + _mesa_memset(cs, 0, sizeof(*rp->cs)); for (i=0;islot[i].ssrc[j] = SRC_CONST; } } - + /* Work out what temps the Mesa inputs correspond to, this must match * what setup_rs_unit does, which shouldn't be a problem as rs_unit * configures itself based on the fragprog's InputsRead @@ -2167,7 +2167,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(rp, 0); } InputsRead &= ~FRAG_BIT_COL0; - + /* Secondary color */ if (InputsRead & FRAG_BIT_COL1) { cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; @@ -2194,7 +2194,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) for (fpi=mp->Base.Instructions;fpi->Opcode != OPCODE_END; fpi++) { int idx; - + for (i=0;i<3;i++) { idx = fpi->SrcReg[i].Index; switch (fpi->SrcReg[i].File) { @@ -2246,7 +2246,7 @@ void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_pr struct r300_pfs_compile_state *cs = NULL; if (!rp->translated) { - + init_program(r300, rp); cs = rp->cs; @@ -2254,7 +2254,7 @@ void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_pr dump_program(rp); return; } - + /* Finish off */ rp->node[rp->cur_node].alu_end = cs->nrslots - rp->node[rp->cur_node].alu_offset - 1; @@ -2266,9 +2266,9 @@ void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_pr rp->tex_end = rp->tex.length ? rp->tex.length - 1 : 0; assert(rp->node[rp->cur_node].alu_end >= 0); assert(rp->alu_end >= 0); - + rp->translated = GL_TRUE; - if (0) dump_program(rp); + if (1) dump_program(rp); r300UpdateStateParameters(rp->ctx, _NEW_PROGRAM); } @@ -2282,7 +2282,7 @@ static void dump_program(struct r300_fragment_program *rp) static int pc = 0; fprintf(stderr, "pc=%d*************************************\n", pc++); - + fprintf(stderr, "Mesa program:\n"); fprintf(stderr, "-------------\n"); _mesa_print_program(&rp->mesa_program.Base); @@ -2290,7 +2290,7 @@ static void dump_program(struct r300_fragment_program *rp) fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); - + for (n = 0; n < (rp->cur_node+1); n++) { fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "\ "alu_end: %d, tex_end: %d\n", n, @@ -2298,12 +2298,12 @@ static void dump_program(struct r300_fragment_program *rp) rp->node[n].tex_offset, rp->node[n].alu_end, rp->node[n].tex_end); - + if (rp->tex.length) { fprintf(stderr, " TEX:\n"); for(i = rp->node[n].tex_offset; i <= rp->node[n].tex_offset+rp->node[n].tex_end; ++i) { const char* instr; - + switch((rp->tex.inst[i] >> R300_FPITX_OPCODE_SHIFT) & 15) { case R300_FPITX_OP_TEX: instr = "TEX"; @@ -2320,7 +2320,7 @@ static void dump_program(struct r300_fragment_program *rp) default: instr = "UNKNOWN"; } - + fprintf(stderr, " %s t%i, %c%i, texture[%i] (%08x)\n", instr, (rp->tex.inst[i] >> R300_FPITX_DST_SHIFT) & 31, @@ -2330,22 +2330,22 @@ static void dump_program(struct r300_fragment_program *rp) rp->tex.inst[i]); } } - + for(i = rp->node[n].alu_offset; i <= rp->node[n].alu_offset+rp->node[n].alu_end; ++i) { char srcc[3][10], dstc[20]; char srca[3][10], dsta[20]; char argc[3][20]; char arga[3][20]; char flags[5], tmp[10]; - + for(j = 0; j < 3; ++j) { int regc = rp->alu.inst[i].inst1 >> (j*6); int rega = rp->alu.inst[i].inst3 >> (j*6); - + sprintf(srcc[j], "%c%i", (regc & 32) ? 'c' : 't', regc & 31); sprintf(srca[j], "%c%i", (rega & 32) ? 'c' : 't', rega & 31); } - + dstc[0] = 0; sprintf(flags, "%s%s%s", (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "", @@ -2366,7 +2366,7 @@ static void dump_program(struct r300_fragment_program *rp) flags); strcat(dstc, tmp); } - + dsta[0] = 0; if (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) { sprintf(dsta, "t%i.w ", (rp->alu.inst[i].inst3 >> R300_FPI3_DSTA_SHIFT) & 31); @@ -2378,13 +2378,13 @@ static void dump_program(struct r300_fragment_program *rp) if (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) { strcat(dsta, "Z"); } - + fprintf(stderr, "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" " w: %3s %3s %3s -> %-20s (%08x)\n", i, srcc[0], srcc[1], srcc[2], dstc, rp->alu.inst[i].inst1, srca[0], srca[1], srca[2], dsta, rp->alu.inst[i].inst3); - + for(j = 0; j < 3; ++j) { int regc = rp->alu.inst[i].inst0 >> (j*7); int rega = rp->alu.inst[i].inst2 >> (j*7); @@ -2431,13 +2431,13 @@ static void dump_program(struct r300_fragment_program *rp) } else { sprintf(buf, "%i", d); } - + sprintf(argc[j], "%s%s%s%s", (regc & 32) ? "-" : "", (regc & 64) ? "|" : "", buf, (regc & 64) ? "|" : ""); - + d = rega & 31; if (d < 9) { sprintf(buf, "%s.%c", srcc[d / 3], 'x' + (char)(d%3)); @@ -2452,14 +2452,14 @@ static void dump_program(struct r300_fragment_program *rp) } else { sprintf(buf, "%i", d); } - + sprintf(arga[j], "%s%s%s%s", (rega & 32) ? "-" : "", (rega & 64) ? "|" : "", buf, (rega & 64) ? "|" : ""); } - + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" " w: %8s %8s %8s op: %08x\n", argc[0], argc[1], argc[2], rp->alu.inst[i].inst0, -- cgit v1.2.3 From 826815a5d27d6e79e9d0e0b0fc63bb3fd092d40d Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Mon, 19 Mar 2007 20:01:20 +0100 Subject: r300: Dump fragment program after translation if RADEON_DEBUG=pixel is set --- src/mesa/drivers/dri/r300/r300_fragprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 93b9c39635..6262dc7a44 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -2268,7 +2268,7 @@ void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_pr assert(rp->alu_end >= 0); rp->translated = GL_TRUE; - if (1) dump_program(rp); + if (RADEON_DEBUG & DEBUG_PIXEL) dump_program(rp); r300UpdateStateParameters(rp->ctx, _NEW_PROGRAM); } -- cgit v1.2.3 From b3acba87d7f5ede486cba11db036cf36dff6c29e Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Mon, 19 Mar 2007 22:17:16 +0100 Subject: r300: Clear fragment program instruction slots on first use Make sure that instruction slots are fully initialized with NOPs during find_and_prepare_slot(). This fixes a bug when a fragment program was translated more than once (e.g. due to a second call to glProgramStringARB). This partially fixes glean/fragProg1. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 6262dc7a44..3f9d83f109 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -1143,7 +1143,9 @@ static int find_and_prepare_slot(struct r300_fragment_program* rp, } rp->alu.inst[pos].inst0 = NOP_INST0; + rp->alu.inst[pos].inst1 = NOP_INST1; rp->alu.inst[pos].inst2 = NOP_INST2; + rp->alu.inst[pos].inst3 = NOP_INST3; cs->nrslots++; } -- cgit v1.2.3 From 5a6547878373798113f8b55b912abc5bfb93add5 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Mon, 19 Mar 2007 22:26:08 +0100 Subject: r300: Fix special case (tmp.x <= 0) in fragment program LIT instruction Also, fix a typo in a related comment. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 3f9d83f109..1d462ebec8 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -1463,7 +1463,7 @@ static void emit_lit(struct r300_fragment_program *rp, temp = keep(dest); } - // Npte: The order of emit_arith inside the slots is relevant, + // Note: The order of emit_arith inside the slots is relevant, // because emit_arith only looks at scalar vs. vector when resolving // dependencies, and it does not consider individual vector components, // so swizzling between the two parts can create fake dependencies. @@ -1496,7 +1496,7 @@ static void emit_lit(struct r300_fragment_program *rp, // Fifth slot emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z, - swizzle(temp, W, W, W, W), pfs_zero, swizzle(temp, Y, Y, Y, Y), flags); + pfs_zero, swizzle(temp, W, W, W, W), negate(swizzle(temp, Y, Y, Y, Y)), flags); emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, pfs_zero, 0); -- cgit v1.2.3 From 61821a41c07b6b383a275acf31ade56af2ecfb3c Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Mon, 19 Mar 2007 23:32:36 +0100 Subject: r300: Cleanup fragment program constant allocation, share constants The constant/parameter allocation was significantly simplified, removing one unnecessary copy operation of parameters. The dirty state tracking is unchanged and far from optimal, since all state is always re-fetched. Constants and parameters are now emitted only once, which significantly reduces the resource pressure on larger programs. --- src/mesa/drivers/dri/r300/r300_context.h | 20 +++--- src/mesa/drivers/dri/r300/r300_fragprog.c | 114 ++++++++++++++---------------- 2 files changed, 61 insertions(+), 73 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 29436ab9e0..bbe44f5e7f 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -767,23 +767,21 @@ struct r300_fragment_program { int tex_offset; int tex_end; - /* Hardware constants */ - GLfloat constant[PFS_NUM_CONST_REGS][4]; + /* Hardware constants. + * Contains a pointer to the value. The destination of the pointer + * is supposed to be updated when GL state changes. + * Typically, this is either a pointer into + * gl_program_parameter_list::ParameterValues, or a pointer to a + * global constant (e.g. for sin/cos-approximation) + */ + const GLfloat* constant[PFS_NUM_CONST_REGS]; int const_nr; - /* Tracked parameters */ - struct { - int idx; /* hardware index */ - GLfloat *values; /* pointer to values */ - } param[PFS_NUM_CONST_REGS]; - int param_nr; - GLboolean params_uptodate; - int max_temp_idx; /* the index of the sin constant is stored here */ GLint const_sin[2]; - + GLuint optimization; }; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 1d462ebec8..2145c48b80 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -468,47 +468,39 @@ static void free_temp(struct r300_fragment_program *rp, GLuint r) } } -static GLuint emit_param4fv(struct r300_fragment_program *rp, - GLfloat *values) +/** + * Emit a hardware constant/parameter. + * + * \p cp Stable pointer to an array of 4 floats. + * The pointer must be stable in the sense that it remains to be valid + * and hold the contents of the constant/parameter throughout the lifetime + * of the fragment program (actually, up until the next time the fragment + * program is translated). + */ +static GLuint emit_const4fv(struct r300_fragment_program *rp, const GLfloat* cp) { - GLuint r = undef; - GLuint index; - int pidx; + GLuint reg = undef; + int index; - pidx = rp->param_nr++; - index = rp->const_nr++; - if (pidx >= PFS_NUM_CONST_REGS || index >= PFS_NUM_CONST_REGS) { - ERROR("Out of const/param slots!\n"); - return r; + for(index = 0; index < rp->const_nr; ++index) { + if (rp->constant[index] == cp) + break; } - rp->param[pidx].idx = index; - rp->param[pidx].values = values; - rp->params_uptodate = GL_FALSE; - - REG_SET_TYPE(r, REG_TYPE_CONST); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; -} - -static GLuint emit_const4fv(struct r300_fragment_program *rp, const GLfloat* cp) -{ - GLuint r = undef; - GLuint index; + if (index >= rp->const_nr) { + if (index >= PFS_NUM_CONST_REGS) { + ERROR("Out of hw constants!\n"); + return reg; + } - index = rp->const_nr++; - if (index >= PFS_NUM_CONST_REGS) { - ERROR("Out of hw constants!\n"); - return r; + rp->const_nr++; + rp->constant[index] = cp; } - COPY_4V(rp->constant[index], cp); - - REG_SET_TYPE(r, REG_TYPE_CONST); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; + REG_SET_TYPE(reg, REG_TYPE_CONST); + REG_SET_INDEX(reg, index); + REG_SET_VALID(reg, GL_TRUE); + return reg; } static inline GLuint negate(GLuint r) @@ -762,16 +754,16 @@ static GLuint t_src(struct r300_fragment_program *rp, REG_SET_TYPE(r, REG_TYPE_INPUT); break; case PROGRAM_LOCAL_PARAM: - r = emit_param4fv(rp, + r = emit_const4fv(rp, rp->mesa_program.Base.LocalParams[fpsrc.Index]); break; case PROGRAM_ENV_PARAM: - r = emit_param4fv(rp, + r = emit_const4fv(rp, rp->ctx->FragmentProgram.Parameters[fpsrc.Index]); break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: - r = emit_param4fv(rp, + r = emit_const4fv(rp, rp->mesa_program.Base.Parameters->ParameterValues[fpsrc.Index]); break; default: @@ -1393,22 +1385,27 @@ static GLuint get_attrib(struct r300_fragment_program *rp, GLuint attr) } #endif +static GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.0, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } +}; + + static void make_sin_const(struct r300_fragment_program *rp) { - if(rp->const_sin[0] == -1){ - GLfloat cnstv[4]; - - cnstv[0] = 1.273239545; // 4/PI - cnstv[1] =-0.405284735; // -4/(PI*PI) - cnstv[2] = 3.141592654; // PI - cnstv[3] = 0.2225; // weight - rp->const_sin[0] = emit_const4fv(rp, cnstv); - - cnstv[0] = 0.75; - cnstv[1] = 0.0; - cnstv[2] = 0.159154943; // 1/(2*PI) - cnstv[3] = 6.283185307; // 2*PI - rp->const_sin[1] = emit_const4fv(rp, cnstv); + if(rp->const_sin[0] == -1) { + rp->const_sin[0] = emit_const4fv(rp, SinCosConsts[0]); + rp->const_sin[1] = emit_const4fv(rp, SinCosConsts[1]); } } @@ -1434,6 +1431,8 @@ static void make_sin_const(struct r300_fragment_program *rp) * emit_arith is a bit too conservative because it doesn't understand * partial writes to the vector component. */ +static const GLfloat LitConst[4] = { 127.999999, 127.999999, 127.999999, -127.999999 }; + static void emit_lit(struct r300_fragment_program *rp, GLuint dest, int mask, @@ -1441,12 +1440,11 @@ static void emit_lit(struct r300_fragment_program *rp, int flags) { COMPILE_STATE; - static const GLfloat cnstv[4] = { 127.999999, 127.999999, 127.999999, -127.999999 }; GLuint cnst; int needTemporary; GLuint temp; - cnst = emit_const4fv(rp, cnstv); + cnst = emit_const4fv(rp, LitConst); needTemporary = 0; if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { @@ -2123,8 +2121,6 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) rp->cur_node = 0; rp->first_node_has_tex = 0; rp->const_nr = 0; - rp->param_nr = 0; - rp->params_uptodate = GL_FALSE; rp->max_temp_idx = 0; rp->node[0].alu_end = -1; rp->node[0].tex_end = -1; @@ -2231,16 +2227,10 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) static void update_params(struct r300_fragment_program *rp) { struct gl_fragment_program *mp = &rp->mesa_program; - int i; /* Ask Mesa nicely to fill in ParameterValues for us */ - if (rp->param_nr) + if (mp->Base.Parameters) _mesa_load_state_parameters(rp->ctx, mp->Base.Parameters); - - for (i=0;iparam_nr;i++) - COPY_4V(rp->constant[rp->param[i].idx], rp->param[i].values); - - rp->params_uptodate = GL_TRUE; } void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_program *rp) -- cgit v1.2.3 From 9622a634f61f02ed1a23087762a2ec8a305ae77e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 20 Mar 2007 15:05:35 +1100 Subject: nouveau: NVSDBG macro --- src/mesa/drivers/dri/nouveau/nouveau_shader.c | 12 ++++++++++++ src/mesa/drivers/dri/nouveau/nouveau_shader.h | 6 ++++++ src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 2 ++ src/mesa/drivers/dri/nouveau/nouveau_shader_1.c | 2 ++ src/mesa/drivers/dri/nouveau/nouveau_shader_2.c | 2 ++ 5 files changed, 24 insertions(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.c b/src/mesa/drivers/dri/nouveau/nouveau_shader.c index ba471325aa..9cb837ff3b 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.c @@ -48,6 +48,7 @@ static void nouveauBindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog) { + NVSDBG("target=%s, prog=%p\n", _mesa_lookup_enum_by_nr(target), prog); } static struct gl_program * @@ -55,7 +56,10 @@ nouveauNewProgram(GLcontext *ctx, GLenum target, GLuint id) { nouveauShader *nvs; + NVSDBG("target=%s, id=%d\n", _mesa_lookup_enum_by_nr(target), id); + nvs = CALLOC_STRUCT(_nouveauShader); + NVSDBG("prog=%p\n", nvs); switch (target) { case GL_VERTEX_PROGRAM_ARB: return _mesa_init_vertex_program(ctx, &nvs->mesa.vp, target, id); @@ -75,6 +79,8 @@ nouveauDeleteProgram(GLcontext *ctx, struct gl_program *prog) { nouveauShader *nvs = (nouveauShader *)prog; + NVSDBG("prog=%p\n", prog); + if (nvs->translated) FREE(nvs->program); _mesa_delete_program(ctx, prog); @@ -86,6 +92,8 @@ nouveauProgramStringNotify(GLcontext *ctx, GLenum target, { nouveauShader *nvs = (nouveauShader *)prog; + NVSDBG("target=%s, prog=%p\n", _mesa_lookup_enum_by_nr(target), prog); + if (nvs->translated) FREE(nvs->program); nvs->translated = 0; @@ -98,6 +106,8 @@ nouveauIsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { nouveauShader *nvs = (nouveauShader *)prog; + NVSDBG("target=%s, prog=%p\n", _mesa_lookup_enum_by_nr(target), prog); + return nvs->translated; } @@ -108,6 +118,8 @@ nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs) struct gl_program_parameter_list *plist; int i; + NVSDBG("prog=%p\n", nvs); + /* Translate to HW format now if necessary */ if (!nvs->translated) { /* Mesa ASM shader -> nouveauShader */ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index b2df3546f6..56ae270764 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -4,6 +4,12 @@ #include "mtypes.h" #include "bufferobj.h" +#define NVSDBG(fmt, args...) do { \ + if (NOUVEAU_DEBUG & DEBUG_SHADERS) { \ + fprintf(stderr, "%s: "fmt, __func__, ##args); \ + } \ +} while(0) + typedef struct _nvsFunc nvsFunc; #define NVS_MAX_TEMPS 32 diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 211483dc02..0308a6c397 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -974,6 +974,8 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) struct pass0_rec *rec; int ret = GL_FALSE; + NVSDBG("start: nvs=%p\n", nvs); + rec = CALLOC_STRUCT(pass0_rec); if (!rec) return GL_FALSE; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c index 90c57d3807..78c1401f7d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c @@ -2,11 +2,13 @@ #include "macros.h" #include "enums.h" +#include "nouveau_context.h" #include "nouveau_shader.h" GLboolean nouveau_shader_pass1(nvsPtr nvs) { + NVSDBG("start: nvs=%p\n", nvs); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c index b043f877e4..130ef35e57 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c @@ -209,6 +209,8 @@ nouveau_shader_pass2(nvsPtr nvs) struct pass2_rec *rec; int i; + NVSDBG("start: nvs=%p\n", nvs); + rec = calloc(1, sizeof(struct pass2_rec)); for (i=0; itemps[i] = -1; -- cgit v1.2.3 From 4185037af2d89c5b245646f5e4a7c6dc946cae43 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 20 Mar 2007 15:52:57 +1100 Subject: nouveau: fail translate if we use too many params somehow --- src/mesa/drivers/dri/nouveau/nouveau_shader.c | 4 +++- src/mesa/drivers/dri/nouveau/nouveau_shader.h | 7 +++++++ src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 25 +++++++++++++++++++++---- 3 files changed, 31 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.c b/src/mesa/drivers/dri/nouveau/nouveau_shader.c index 9cb837ff3b..3d5b6843a1 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.c @@ -96,7 +96,9 @@ nouveauProgramStringNotify(GLcontext *ctx, GLenum target, if (nvs->translated) FREE(nvs->program); - nvs->translated = 0; + + nvs->error = GL_FALSE; + nvs->translated = GL_FALSE; _tnl_program_string(ctx, target, prog); } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index 56ae270764..7125a2ae82 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -51,6 +51,7 @@ typedef struct _nouveauShader { nvsFunc *func; /* State of the final program */ + GLboolean error; GLboolean translated; GLboolean on_hardware; unsigned int *program; @@ -424,6 +425,12 @@ nvsSwizzle(nvsRegister reg, nvsSwzComp x, nvsSwzComp y, return reg; } +#define nvsProgramError(nvs,fmt,args...) do { \ + fprintf(stderr, "nvsProgramError (%s): "fmt, __func__, ##args); \ + (nvs)->error = GL_TRUE; \ + (nvs)->translated = GL_FALSE; \ +} while(0) + extern GLboolean nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs); extern void nvsDisasmHWShader(nvsPtr); extern void nvsDumpFragmentList(nvsFragmentHeader *f, int lvl); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 0308a6c397..7c2e2b9443 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -924,7 +924,7 @@ pass0_rebase_mesa_consts(nouveauShader *nvs) } } -static void +static GLboolean pass0_resolve_mesa_consts(nouveauShader *nvs) { struct pass0_rec *rec = nvs->pass_rec; @@ -945,6 +945,11 @@ pass0_resolve_mesa_consts(nouveauShader *nvs) for (i=0; iNumParameters; i++) { int hw = rec->mesa_const_base + i; + if (hw > NVS_MAX_CONSTS) { + nvsProgramError(nvs, "hw = %d > NVS_MAX_CONSTS!\n", hw); + return GL_FALSE; + } + switch (plist->Parameters[i].Type) { case PROGRAM_NAMED_PARAM: case PROGRAM_STATE_VAR: @@ -958,10 +963,13 @@ pass0_resolve_mesa_consts(nouveauShader *nvs) COPY_4V(nvs->params[hw].val, plist->ParameterValues[i]); break; default: - assert(0); - break; + nvsProgramError(nvs, "hit bad type=%d on param %d\n", + plist->Parameters[i].Type, i); + return GL_FALSE; } } + + return GL_TRUE; } GLboolean @@ -976,6 +984,14 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) NVSDBG("start: nvs=%p\n", nvs); + /* Previously detected an error, and haven't recieved new program + * string, so fail immediately. + */ + if (nvs->error) { + NVSDBG("failed previous compile attempt, not retrying\n"); + return GL_FALSE; + } + rec = CALLOC_STRUCT(pass0_rec); if (!rec) return GL_FALSE; @@ -1020,7 +1036,8 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) ret = pass0_translate_instructions(nvs, 0, 0, nvs->program_tree); if (ret) - pass0_resolve_mesa_consts(nvs); + ret = pass0_resolve_mesa_consts(nvs); + /*XXX: if (!ret) DESTROY TREE!!! */ FREE(rec); -- cgit v1.2.3 From ecb1a1c82f48dd78203230f6ea3dee49d7ade17d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 20 Mar 2007 15:59:55 +1100 Subject: nouveau: fix typo --- src/mesa/drivers/dri/nouveau/nouveau_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_sync.c b/src/mesa/drivers/dri/nouveau/nouveau_sync.c index 428b19b46e..30e6696269 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_sync.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_sync.c @@ -124,7 +124,7 @@ nouveau_notifier_wait_status(nouveau_notifier *notifier, GLuint id, while (time <= timeout) { if (n[NV_NOTIFY_STATE/4] & NV_NOTIFY_STATE_ERROR_CODE_MASK) { MESSAGE("Notifier returned error: 0x%04x\n", - n[NV_NOTIFY_STATE] & + n[NV_NOTIFY_STATE/4] & NV_NOTIFY_STATE_ERROR_CODE_MASK); return GL_FALSE; } -- cgit v1.2.3 From bec665d5b5b35c0c9f3263ece9793bcea87073ab Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Tue, 20 Mar 2007 13:15:58 +0000 Subject: use passed target parameter --- src/mesa/drivers/dri/common/dri_bufmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c index 370b56c3a3..65d6545965 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr.c @@ -431,7 +431,7 @@ void driBOCreateList(int target, drmBOList * list) { _glthread_LOCK_MUTEX(bmMutex); - BM_CKFATAL(drmBOCreateList(20, list)); + BM_CKFATAL(drmBOCreateList(target, list)); _glthread_UNLOCK_MUTEX(bmMutex); } -- cgit v1.2.3 From cf4272d2569940fa4ac2ffaf3afbdc2aa3e5cc06 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 20 Mar 2007 22:12:03 +0800 Subject: fix for bug#10347 not sure which brw surface for DXT3 & DXT5, so restore the previous choice.(changed in commit 84081774e62a8af18e6bf894ea69f63b97dcfe96) --- src/mesa/drivers/dri/i965/brw_tex.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index 467aec64a0..9d4b9867d2 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -154,19 +154,13 @@ brwChooseTextureFormat( GLcontext *ctx, GLint internalFormat, case GL_RGB_S3TC: case GL_RGB4_S3TC: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; - - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - return &_mesa_texformat_rgba_dxt1; - case GL_RGBA_S3TC: case GL_RGBA4_S3TC: case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - return &_mesa_texformat_rgba_dxt3; - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return &_mesa_texformat_rgba_dxt5; + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + return &_mesa_texformat_rgb_dxt1; /* there is no rgba support? */ case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT16: -- cgit v1.2.3 From cbe38dc0ceade8543d8a65f6f547d2890bb177f0 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Mon, 19 Mar 2007 23:30:26 +0100 Subject: r300: Whitespace cleanup (remove trailing spaces) --- src/mesa/drivers/dri/r300/r300_context.h | 52 ++++++++++++++++---------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index bbe44f5e7f..68151d865e 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -71,7 +71,7 @@ typedef struct r300_context *r300ContextPtr; /* Checkpoint.. for convenience */ #define CPT { fprintf(stderr, "%s:%s line %d\n", __FILE__, __FUNCTION__, __LINE__); } /* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . - I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble + I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble with other compilers ... GLUE! */ #if 1 @@ -548,7 +548,7 @@ struct r300_stencilbuffer_state { /* Perhaps more if we store programs in vmem? */ /* drm_r300_cmd_header_t->vpu->count is unsigned char */ #define VSF_MAX_FRAGMENT_LENGTH (255*4) - + /* Can be tested with colormat currently. */ #define VSF_MAX_FRAGMENT_TEMPS (14) @@ -593,7 +593,7 @@ struct r300_vertex_shader_state { int unknown_ptr2; /* pointer within program space */ int unknown_ptr3; /* pointer within program space */ }; - + extern int hw_tcl_on; //#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current) @@ -602,16 +602,16 @@ extern int hw_tcl_on; /* Should but doesnt work */ //#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp) -//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && (OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit)))) : +//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && (OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit)))) : // (r300->state.render_inputs & (_TNL_BIT_TEX0<<(unit)))) -//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit))) : +//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit))) : // ctx->Texture.Unit[unit]._ReallyEnabled && r300->state.render_inputs & (_TNL_BIT_TEX0<<(unit))) #define TMU_ENABLED(ctx, unit) (ctx->Texture.Unit[unit]._ReallyEnabled) /* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday. * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly. - */ + */ struct r300_vertex_program_key { GLuint InputsRead; @@ -622,9 +622,9 @@ struct r300_vertex_program { struct r300_vertex_program *next; struct r300_vertex_program_key key; int translated; - + struct r300_vertex_shader_fragment program; - + int pos_end; int num_temporaries; /* Number of temp vars used by program */ int wpos_idx; @@ -662,19 +662,19 @@ struct reg_lifetime { This is -1 if the register has been assigned to a Mesa register and the last access to the register has not yet been emitted */ int free; - + /* Index of the first slot where this register is currently reserved. This is used to stop e.g. a scalar operation from being moved before the allocation time of a register that was first allocated for a vector operation. */ int reserved; - + /* Index of the first slot in which the register can be used as a source without losing the value that is written by the last emitted instruction that writes to the register */ int vector_valid; int scalar_valid; - + /* Index to the slot where the register was last read. This is also the first slot in which the register may be written again */ int vector_lastread; @@ -694,7 +694,7 @@ struct reg_lifetime { #define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) struct r300_pfs_compile_slot { - /* Bitmask indicating which parts of the slot are used, using SLOT_ constants + /* Bitmask indicating which parts of the slot are used, using SLOT_ constants defined above */ unsigned int used; @@ -708,18 +708,18 @@ struct r300_pfs_compile_slot { */ struct r300_pfs_compile_state { int nrslots; /* number of ALU slots used so far */ - + /* Track which (parts of) slots are already filled with instructions */ struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST]; - + /* Track the validity of R300 temporaries */ struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; - + /* Used to map Mesa's inputs/temps onto hardware temps */ int temp_in_use; struct reg_acc temps[PFS_NUM_TEMP_REGS]; struct reg_acc inputs[32]; /* don't actually need 32... */ - + /* Track usage of hardware temps, for register allocation, * indirection detection, etc. */ GLuint used_in_node; @@ -808,10 +808,10 @@ struct radeon_vertex_buffer { void *Elts; int elt_size; int elt_min, elt_max; /* debug */ - + struct dt AttribPtr[VERT_ATTRIB_MAX]; - - const struct _mesa_prim *Primitive; + + const struct _mesa_prim *Primitive; GLuint PrimitiveCount; GLint LockFirst; GLsizei LockCount; @@ -843,16 +843,16 @@ struct r300_state { GLuint *Elts; struct r300_dma_region elt_dma; - - DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. - They are the same as tnl->render_inputs for fixed pipeline */ - + + DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. + They are the same as tnl->render_inputs for fixed pipeline */ + struct { int transform_offset; /* Transform matrix offset, -1 if none */ } vap_param; /* vertex processor parameter allocation - tells where to write parameters */ - + struct r300_stencilbuffer_state stencil; - + }; #define R300_FALLBACK_NONE 0 @@ -904,7 +904,7 @@ struct r300_buffer_object { struct gl_buffer_object mesa_obj; int id; }; - + #define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) static __inline GLuint r300PackColor( GLuint cpp, -- cgit v1.2.3 From 4bafc547df4af0b560dcc6b72c0a6c37d7754abb Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Wed, 21 Mar 2007 00:56:38 +0100 Subject: r300: Remove the program-global const_sin index The index is no longer necessary to share constants between multiple SIN/COS/SCS instructions inside a single fragment program, and storing a tiny implementation detail like this in the fragment_program structure itself was just nasty. --- src/mesa/drivers/dri/r300/r300_context.h | 3 -- src/mesa/drivers/dri/r300/r300_fragprog.c | 51 ++++++++++++++----------------- 2 files changed, 23 insertions(+), 31 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 68151d865e..fe261dbbc6 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -779,9 +779,6 @@ struct r300_fragment_program { int max_temp_idx; - /* the index of the sin constant is stored here */ - GLint const_sin[2]; - GLuint optimization; }; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 2145c48b80..0d7d1f1af2 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -1401,14 +1401,6 @@ static GLfloat SinCosConsts[2][4] = { }; -static void make_sin_const(struct r300_fragment_program *rp) -{ - if(rp->const_sin[0] == -1) { - rp->const_sin[0] = emit_const4fv(rp, SinCosConsts[0]); - rp->const_sin[1] = emit_const4fv(rp, SinCosConsts[1]); - } -} - /** * Emit a LIT instruction. * \p flags may be PFS_FLAG_SAT @@ -1516,6 +1508,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) struct prog_instruction *fpi; GLuint src[3], dest, temp[2]; int flags, mask = 0; + int const_sin[2]; if (!inst || inst[0].Opcode == OPCODE_END) { ERROR("empty program?\n"); @@ -1568,15 +1561,16 @@ static GLboolean parse_program(struct r300_fragment_program *rp) * result = sin(x) */ temp[0] = get_temp_reg(rp); - make_sin_const(rp); + const_sin[0] = emit_const4fv(rp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(rp, SinCosConsts[1]); src[0] = t_scalar_src(rp, fpi->SrcReg[0]); /* add 0.5*PI and do range reduction */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(src[0], X, X, X, X), - swizzle(rp->const_sin[1], Z, Z, Z, Z), - swizzle(rp->const_sin[1], X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + swizzle(const_sin[1], X, X, X, X), 0); emit_arith(rp, PFS_OP_FRC, temp[0], WRITEMASK_X, @@ -1587,15 +1581,15 @@ static GLboolean parse_program(struct r300_fragment_program *rp) emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), - swizzle(rp->const_sin[1], W, W, W, W), //2*PI - negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //-PI + swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI 0); /* SIN */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), - rp->const_sin[0], + const_sin[0], pfs_zero, 0); @@ -1614,7 +1608,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) emit_arith(rp, PFS_OP_MAD, dest, mask, swizzle(temp[0], Y, Y, Y, Y), - swizzle(rp->const_sin[0], W, W, W, W), + swizzle(const_sin[0], W, W, W, W), swizzle(temp[0], X, X, X, X), flags); @@ -1808,19 +1802,20 @@ static GLboolean parse_program(struct r300_fragment_program *rp) */ temp[0] = get_temp_reg(rp); temp[1] = get_temp_reg(rp); - make_sin_const(rp); + const_sin[0] = emit_const4fv(rp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(rp, SinCosConsts[1]); src[0] = t_scalar_src(rp, fpi->SrcReg[0]); /* x = -abs(x)+0.5*PI */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Z, - swizzle(rp->const_sin[0], Z, Z, Z, Z), //PI + swizzle(const_sin[0], Z, Z, Z, Z), //PI pfs_half, negate(abs(swizzle(keep(src[0]), X, X, X, X))), 0); /* C*x (sin) */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_W, - swizzle(rp->const_sin[0], Y, Y, Y, Y), + swizzle(const_sin[0], Y, Y, Y, Y), swizzle(keep(src[0]), X, X, X, X), pfs_zero, 0); @@ -1828,13 +1823,13 @@ static GLboolean parse_program(struct r300_fragment_program *rp) /* B*x, C*x (cos) */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), - rp->const_sin[0], + const_sin[0], pfs_zero, 0); /* B*x (sin) */ emit_arith(rp, PFS_OP_MAD, temp[1], WRITEMASK_W, - swizzle(rp->const_sin[0], X, X, X, X), + swizzle(const_sin[0], X, X, X, X), keep(src[0]), pfs_zero, 0); @@ -1864,7 +1859,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) /* dest.xy = mad(temp.xy, P, temp2.wz) */ emit_arith(rp, PFS_OP_MAD, dest, mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], - swizzle(rp->const_sin[0], W, W, W, W), + swizzle(const_sin[0], W, W, W, W), swizzle(temp[1], W, Z, Y, X), flags); @@ -1895,7 +1890,8 @@ static GLboolean parse_program(struct r300_fragment_program *rp) */ temp[0] = get_temp_reg(rp); - make_sin_const(rp); + const_sin[0] = emit_const4fv(rp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(rp, SinCosConsts[1]); src[0] = t_scalar_src(rp, fpi->SrcReg[0]); @@ -1903,7 +1899,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(keep(src[0]), X, X, X, X), - swizzle(rp->const_sin[1], Z, Z, Z, Z), + swizzle(const_sin[1], Z, Z, Z, Z), pfs_half, 0); @@ -1915,15 +1911,15 @@ static GLboolean parse_program(struct r300_fragment_program *rp) emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), - swizzle(rp->const_sin[1], W, W, W, W), //2*PI - negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //PI + swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI 0); /* SIN */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), - rp->const_sin[0], + const_sin[0], pfs_zero, 0); @@ -1942,7 +1938,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) emit_arith(rp, PFS_OP_MAD, dest, mask, swizzle(temp[0], Y, Y, Y, Y), - swizzle(rp->const_sin[0], W, W, W, W), + swizzle(const_sin[0], W, W, W, W), swizzle(temp[0], X, X, X, X), flags); @@ -2124,7 +2120,6 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) rp->max_temp_idx = 0; rp->node[0].alu_end = -1; rp->node[0].tex_end = -1; - rp->const_sin[0] = -1; _mesa_memset(cs, 0, sizeof(*rp->cs)); for (i=0;i Date: Wed, 21 Mar 2007 15:05:07 +1100 Subject: nouveau: SwapBuffers() needs to perform a glFlush() --- src/mesa/drivers/dri/nouveau/nouveau_context.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index aec7b19771..f217c4404e 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -352,6 +352,7 @@ static void nouveauDoSwapBuffers(nouveauContextPtr nmesa, OUT_RING (((box->y2 - box->y1) << 16) | (box->x2 - box->x1)); } + FIRE_RING(); UNLOCK_HARDWARE(nmesa); #endif -- cgit v1.2.3 From 4b5d6c0435acd84c13e0db3785758fed0bc48fe1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 21 Mar 2007 17:54:57 +1100 Subject: nouveau: update for drm interface changes (0.0.5) --- src/mesa/drivers/dri/nouveau/nouveau_context.h | 1 + src/mesa/drivers/dri/nouveau/nouveau_fifo.c | 1 + src/mesa/drivers/dri/nouveau/nouveau_object.c | 18 ++++++++++-------- src/mesa/drivers/dri/nouveau/nouveau_screen.c | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h index f79a8675f4..87e4479da3 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h @@ -45,6 +45,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "xmlconfig.h" typedef struct nouveau_fifo_t{ + int channel; u_int32_t* buffer; u_int32_t* mmio; u_int32_t put_base; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fifo.c b/src/mesa/drivers/dri/nouveau/nouveau_fifo.c index 67b5aa4f8a..bd2b2eddd0 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fifo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fifo.c @@ -124,6 +124,7 @@ GLboolean nouveauFifoInit(nouveauContextPtr nmesa) } /* Setup our initial FIFO tracking params */ + nmesa->fifo.channel = fifo_init.channel; nmesa->fifo.put_base = fifo_init.put_base; nmesa->fifo.current = 0; nmesa->fifo.put = 0; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index 468b18e6d9..b71acff430 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -10,8 +10,9 @@ GLboolean nouveauCreateContextObject(nouveauContextPtr nmesa, drm_nouveau_object_init_t cto; int ret; - cto.handle = handle; - cto.class = class; + cto.channel = nmesa->fifo.channel; + cto.handle = handle; + cto.class = class; ret = drmCommandWrite(nmesa->driFd, DRM_NOUVEAU_OBJECT_INIT, &cto, sizeof(cto)); return ret == 0; @@ -28,12 +29,13 @@ GLboolean nouveauCreateDmaObject(nouveauContextPtr nmesa, drm_nouveau_dma_object_init_t dma; int ret; - dma.class = class; - dma.handle = handle; - dma.target = target; - dma.access = access; - dma.offset = offset; - dma.size = size; + dma.channel = nmesa->fifo.channel; + dma.class = class; + dma.handle = handle; + dma.target = target; + dma.access = access; + dma.offset = offset; + dma.size = size; ret = drmCommandWriteRead(nmesa->driFd, DRM_NOUVEAU_DMA_OBJECT_INIT, &dma, sizeof(dma)); return ret == 0; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_screen.c b/src/mesa/drivers/dri/nouveau/nouveau_screen.c index e00080fce1..65bde99671 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_screen.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_screen.c @@ -328,7 +328,7 @@ void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIsc static const __DRIversion ddx_expected = { 1, 2, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; -#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 4 +#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 5 #error nouveau_drm.h version doesn't match expected version #endif dri_interface = interface; -- cgit v1.2.3 From c5fe807e426988fc0ef931b417b5b3f621250595 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Wed, 21 Mar 2007 13:19:02 +0100 Subject: fix copy and paste bug from last commit in fog generation code for GL_LINEAR fog --- src/mesa/drivers/dri/i965/brw_vs_tnl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i965/brw_vs_tnl.c b/src/mesa/drivers/dri/i965/brw_vs_tnl.c index e22a26b291..dd1664bf33 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_tnl.c +++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c @@ -1174,8 +1174,8 @@ static void build_fog( struct tnl_program *p ) switch (p->state->fog_option) { case FOG_LINEAR: { - emit_op3(p, OPCODE_MAD, tmp, 0, input, swizzle1(params,X), swizzle1(params,Y)); - emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W)); + emit_op1(p, OPCODE_ABS, tmp, 0, input); + emit_op3(p, OPCODE_MAD, tmp, 0, tmp, swizzle1(params,X), swizzle1(params,Y)); emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); break; -- cgit v1.2.3