diff options
Diffstat (limited to 'src/mesa/drivers')
258 files changed, 18310 insertions, 9726 deletions
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 3b397fef7d..f09106b77c 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -45,6 +45,9 @@ #include "main/fbobject.h" #include "main/texrender.h" #endif +#if FEATURE_ARB_sync +#include "main/syncobj.h" +#endif #include "shader/program.h" #include "shader/prog_execute.h" @@ -53,6 +56,7 @@ #include "swrast/swrast.h" #include "driverfuncs.h" +#include "meta.h" @@ -97,11 +101,11 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->TexSubImage2D = _mesa_store_texsubimage2d; driver->TexSubImage3D = _mesa_store_texsubimage3d; driver->GetTexImage = _mesa_get_teximage; - driver->CopyTexImage1D = _swrast_copy_teximage1d; - driver->CopyTexImage2D = _swrast_copy_teximage2d; - driver->CopyTexSubImage1D = _swrast_copy_texsubimage1d; - driver->CopyTexSubImage2D = _swrast_copy_texsubimage2d; - driver->CopyTexSubImage3D = _swrast_copy_texsubimage3d; + driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D; + driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D; + driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D; + driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D; + driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D; driver->GenerateMipmap = _mesa_generate_mipmap; driver->TestProxyTexImage = _mesa_test_proxy_teximage; driver->CompressedTexImage1D = _mesa_store_compressed_teximage1d; @@ -126,10 +130,10 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->UpdateTexturePalette = NULL; /* imaging */ - driver->CopyColorTable = _swrast_CopyColorTable; - driver->CopyColorSubTable = _swrast_CopyColorSubTable; - driver->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D; - driver->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D; + driver->CopyColorTable = _mesa_meta_CopyColorTable; + driver->CopyColorSubTable = _mesa_meta_CopyColorSubTable; + driver->CopyConvolutionFilter1D = _mesa_meta_CopyConvolutionFilter1D; + driver->CopyConvolutionFilter2D = _mesa_meta_CopyConvolutionFilter2D; /* Vertex/fragment programs */ driver->BindProgram = NULL; @@ -200,6 +204,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->GetDoublev = NULL; driver->GetFloatv = NULL; driver->GetIntegerv = NULL; + driver->GetInteger64v = NULL; driver->GetPointerv = NULL; /* buffer objects */ @@ -208,6 +213,10 @@ _mesa_init_driver_functions(struct dd_function_table *driver) /* query objects */ _mesa_init_query_object_functions(driver); +#if FEATURE_ARB_sync + _mesa_init_sync_object_functions(driver); +#endif + #if FEATURE_EXT_framebuffer_object driver->NewFramebuffer = _mesa_new_framebuffer; driver->NewRenderbuffer = _mesa_new_soft_renderbuffer; diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c new file mode 100644 index 0000000000..21756786c5 --- /dev/null +++ b/src/mesa/drivers/common/meta.c @@ -0,0 +1,2471 @@ +/* + * Mesa 3-D graphics library + * Version: 7.6 + * + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Meta operations. Some GL operations can be expressed in terms of + * other GL operations. For example, glBlitFramebuffer() can be done + * with texture mapping and glClear() can be done with polygon rendering. + * + * \author Brian Paul + */ + + +#include "main/glheader.h" +#include "main/mtypes.h" +#include "main/imports.h" +#include "main/arrayobj.h" +#include "main/blend.h" +#include "main/bufferobj.h" +#include "main/buffers.h" +#include "main/colortab.h" +#include "main/convolve.h" +#include "main/depth.h" +#include "main/enable.h" +#include "main/fbobject.h" +#include "main/image.h" +#include "main/macros.h" +#include "main/matrix.h" +#include "main/mipmap.h" +#include "main/polygon.h" +#include "main/readpix.h" +#include "main/scissor.h" +#include "main/shaders.h" +#include "main/state.h" +#include "main/stencil.h" +#include "main/texobj.h" +#include "main/texenv.h" +#include "main/teximage.h" +#include "main/texparam.h" +#include "main/texstate.h" +#include "main/varray.h" +#include "main/viewport.h" +#include "shader/program.h" +#include "shader/arbprogram.h" +#include "swrast/swrast.h" +#include "drivers/common/meta.h" + + +/** Return offset in bytes of the field within a vertex struct */ +#define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD)) + + +/** + * Flags passed to _mesa_meta_begin(). + */ +/*@{*/ +#define META_ALL ~0x0 +#define META_ALPHA_TEST 0x1 +#define META_BLEND 0x2 /**< includes logicop */ +#define META_COLOR_MASK 0x4 +#define META_DEPTH_TEST 0x8 +#define META_FOG 0x10 +#define META_PIXEL_STORE 0x20 +#define META_PIXEL_TRANSFER 0x40 +#define META_RASTERIZATION 0x80 +#define META_SCISSOR 0x100 +#define META_SHADER 0x200 +#define META_STENCIL_TEST 0x400 +#define META_TRANSFORM 0x800 /**< modelview, projection, clip planes */ +#define META_TEXTURE 0x1000 +#define META_VERTEX 0x2000 +#define META_VIEWPORT 0x4000 +/*@}*/ + + +/** + * State which we may save/restore across meta ops. + * XXX this may be incomplete... + */ +struct save_state +{ + GLbitfield SavedState; /**< bitmask of META_* flags */ + + /** META_ALPHA_TEST */ + GLboolean AlphaEnabled; + + /** META_BLEND */ + GLboolean BlendEnabled; + GLboolean ColorLogicOpEnabled; + + /** META_COLOR_MASK */ + GLubyte ColorMask[4]; + + /** META_DEPTH_TEST */ + struct gl_depthbuffer_attrib Depth; + + /** META_FOG */ + GLboolean Fog; + + /** META_PIXEL_STORE */ + struct gl_pixelstore_attrib Pack, Unpack; + + /** META_PIXEL_TRANSFER */ + GLfloat RedBias, RedScale; + GLfloat GreenBias, GreenScale; + GLfloat BlueBias, BlueScale; + GLfloat AlphaBias, AlphaScale; + GLfloat DepthBias, DepthScale; + GLboolean MapColorFlag; + GLboolean Convolution1DEnabled; + GLboolean Convolution2DEnabled; + GLboolean Separable2DEnabled; + + /** META_RASTERIZATION */ + GLenum FrontPolygonMode, BackPolygonMode; + GLboolean PolygonOffset; + GLboolean PolygonSmooth; + GLboolean PolygonStipple; + GLboolean PolygonCull; + + /** META_SCISSOR */ + struct gl_scissor_attrib Scissor; + + /** META_SHADER */ + GLboolean VertexProgramEnabled; + struct gl_vertex_program *VertexProgram; + GLboolean FragmentProgramEnabled; + struct gl_fragment_program *FragmentProgram; + GLuint Shader; + + /** META_STENCIL_TEST */ + struct gl_stencil_attrib Stencil; + + /** META_TRANSFORM */ + GLenum MatrixMode; + GLfloat ModelviewMatrix[16]; + GLfloat ProjectionMatrix[16]; + GLfloat TextureMatrix[16]; + GLbitfield ClipPlanesEnabled; + + /** META_TEXTURE */ + GLuint ActiveUnit; + GLuint ClientActiveUnit; + /** for unit[0] only */ + struct gl_texture_object *CurrentTexture[NUM_TEXTURE_TARGETS]; + /** mask of TEXTURE_2D_BIT, etc */ + GLbitfield TexEnabled[MAX_TEXTURE_UNITS]; + GLbitfield TexGenEnabled[MAX_TEXTURE_UNITS]; + GLuint EnvMode; /* unit[0] only */ + + /** META_VERTEX */ + struct gl_array_object *ArrayObj; + struct gl_buffer_object *ArrayBufferObj; + + /** META_VIEWPORT */ + GLint ViewportX, ViewportY, ViewportW, ViewportH; + GLclampd DepthNear, DepthFar; + + /** Miscellaneous (always disabled) */ + GLboolean Lighting; +}; + + +/** + * Temporary texture used for glBlitFramebuffer, glDrawPixels, etc. + * This is currently shared by all the meta ops. But we could create a + * separate one for each of glDrawPixel, glBlitFramebuffer, glCopyPixels, etc. + */ +struct temp_texture +{ + GLuint TexObj; + GLenum Target; /**< GL_TEXTURE_2D or GL_TEXTURE_RECTANGLE */ + GLsizei MinSize; /**< Min texture size to allocate */ + GLsizei MaxSize; /**< Max possible texture size */ + GLboolean NPOT; /**< Non-power of two size OK? */ + GLsizei Width, Height; /**< Current texture size */ + GLenum IntFormat; + GLfloat Sright, Ttop; /**< right, top texcoords */ +}; + + +/** + * State for glBlitFramebufer() + */ +struct blit_state +{ + GLuint ArrayObj; + GLuint VBO; + GLuint DepthFP; +}; + + +/** + * State for glClear() + */ +struct clear_state +{ + GLuint ArrayObj; + GLuint VBO; +}; + + +/** + * State for glCopyPixels() + */ +struct copypix_state +{ + GLuint ArrayObj; + GLuint VBO; +}; + + +/** + * State for glDrawPixels() + */ +struct drawpix_state +{ + GLuint ArrayObj; + + GLuint StencilFP; /**< Fragment program for drawing stencil images */ + GLuint DepthFP; /**< Fragment program for drawing depth images */ +}; + + +/** + * State for glBitmap() + */ +struct bitmap_state +{ + GLuint ArrayObj; + GLuint VBO; + struct temp_texture Tex; /**< separate texture from other meta ops */ +}; + + +/** + * State for _mesa_meta_generate_mipmap() + */ +struct gen_mipmap_state +{ + GLuint ArrayObj; + GLuint VBO; + GLuint FBO; +}; + + +/** + * All per-context meta state. + */ +struct gl_meta_state +{ + struct save_state Save; /**< state saved during meta-ops */ + + struct temp_texture TempTex; + + struct blit_state Blit; /**< For _mesa_meta_BlitFramebuffer() */ + struct clear_state Clear; /**< For _mesa_meta_Clear() */ + struct copypix_state CopyPix; /**< For _mesa_meta_CopyPixels() */ + struct drawpix_state DrawPix; /**< For _mesa_meta_DrawPixels() */ + struct bitmap_state Bitmap; /**< For _mesa_meta_Bitmap() */ + struct gen_mipmap_state Mipmap; /**< For _mesa_meta_GenerateMipmap() */ +}; + + +/** + * Initialize meta-ops for a context. + * To be called once during context creation. + */ +void +_mesa_meta_init(GLcontext *ctx) +{ + ASSERT(!ctx->Meta); + + ctx->Meta = CALLOC_STRUCT(gl_meta_state); +} + + +/** + * Free context meta-op state. + * To be called once during context destruction. + */ +void +_mesa_meta_free(GLcontext *ctx) +{ + struct gl_meta_state *meta = ctx->Meta; + + if (_mesa_get_current_context()) { + /* if there's no current context, these textures, buffers, etc should + * still get freed by _mesa_free_context_data(). + */ + + /* the temporary texture */ + _mesa_DeleteTextures(1, &meta->TempTex.TexObj); + + /* glBlitFramebuffer */ + _mesa_DeleteBuffersARB(1, & meta->Blit.VBO); + _mesa_DeleteVertexArraysAPPLE(1, &meta->Blit.ArrayObj); + _mesa_DeletePrograms(1, &meta->Blit.DepthFP); + + /* glClear */ + _mesa_DeleteBuffersARB(1, & meta->Clear.VBO); + _mesa_DeleteVertexArraysAPPLE(1, &meta->Clear.ArrayObj); + + /* glCopyPixels */ + _mesa_DeleteBuffersARB(1, & meta->CopyPix.VBO); + _mesa_DeleteVertexArraysAPPLE(1, &meta->CopyPix.ArrayObj); + + /* glDrawPixels */ + _mesa_DeleteVertexArraysAPPLE(1, &meta->DrawPix.ArrayObj); + _mesa_DeletePrograms(1, &meta->DrawPix.DepthFP); + _mesa_DeletePrograms(1, &meta->DrawPix.StencilFP); + + /* glBitmap */ + _mesa_DeleteBuffersARB(1, & meta->Bitmap.VBO); + _mesa_DeleteVertexArraysAPPLE(1, &meta->Bitmap.ArrayObj); + _mesa_DeleteTextures(1, &meta->Bitmap.Tex.TexObj); + } + + _mesa_free(ctx->Meta); + ctx->Meta = NULL; +} + + +/** + * Enter meta state. This is like a light-weight version of glPushAttrib + * but it also resets most GL state back to default values. + * + * \param state bitmask of META_* flags indicating which attribute groups + * to save and reset to their defaults + */ +static void +_mesa_meta_begin(GLcontext *ctx, GLbitfield state) +{ + struct save_state *save = &ctx->Meta->Save; + + save->SavedState = state; + + if (state & META_ALPHA_TEST) { + save->AlphaEnabled = ctx->Color.AlphaEnabled; + if (ctx->Color.AlphaEnabled) + _mesa_set_enable(ctx, GL_ALPHA_TEST, GL_FALSE); + } + + if (state & META_BLEND) { + save->BlendEnabled = ctx->Color.BlendEnabled; + if (ctx->Color.BlendEnabled) + _mesa_set_enable(ctx, GL_BLEND, GL_FALSE); + save->ColorLogicOpEnabled = ctx->Color.ColorLogicOpEnabled; + if (ctx->Color.ColorLogicOpEnabled) + _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, GL_FALSE); + } + + if (state & META_COLOR_MASK) { + COPY_4V(save->ColorMask, ctx->Color.ColorMask); + if (!ctx->Color.ColorMask[0] || + !ctx->Color.ColorMask[1] || + !ctx->Color.ColorMask[2] || + !ctx->Color.ColorMask[3]) + _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + } + + if (state & META_DEPTH_TEST) { + save->Depth = ctx->Depth; /* struct copy */ + if (ctx->Depth.Test) + _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE); + } + + if (state & META_FOG) { + save->Fog = ctx->Fog.Enabled; + if (ctx->Fog.Enabled) + _mesa_set_enable(ctx, GL_FOG, GL_FALSE); + } + + if (state & META_PIXEL_STORE) { + save->Pack = ctx->Pack; + save->Unpack = ctx->Unpack; + ctx->Pack = ctx->DefaultPacking; + ctx->Unpack = ctx->DefaultPacking; + } + + if (state & META_PIXEL_TRANSFER) { + save->RedScale = ctx->Pixel.RedScale; + save->RedBias = ctx->Pixel.RedBias; + save->GreenScale = ctx->Pixel.GreenScale; + save->GreenBias = ctx->Pixel.GreenBias; + save->BlueScale = ctx->Pixel.BlueScale; + save->BlueBias = ctx->Pixel.BlueBias; + save->AlphaScale = ctx->Pixel.AlphaScale; + save->AlphaBias = ctx->Pixel.AlphaBias; + save->MapColorFlag = ctx->Pixel.MapColorFlag; + save->Convolution1DEnabled = ctx->Pixel.Convolution1DEnabled; + save->Convolution2DEnabled = ctx->Pixel.Convolution2DEnabled; + save->Separable2DEnabled = ctx->Pixel.Separable2DEnabled; + ctx->Pixel.RedScale = 1.0F; + ctx->Pixel.RedBias = 0.0F; + ctx->Pixel.GreenScale = 1.0F; + ctx->Pixel.GreenBias = 0.0F; + ctx->Pixel.BlueScale = 1.0F; + ctx->Pixel.BlueBias = 0.0F; + ctx->Pixel.AlphaScale = 1.0F; + ctx->Pixel.AlphaBias = 0.0F; + ctx->Pixel.MapColorFlag = GL_FALSE; + ctx->Pixel.Convolution1DEnabled = GL_FALSE; + ctx->Pixel.Convolution2DEnabled = GL_FALSE; + ctx->Pixel.Separable2DEnabled = GL_FALSE; + /* XXX more state */ + ctx->NewState |=_NEW_PIXEL; + } + + if (state & META_RASTERIZATION) { + save->FrontPolygonMode = ctx->Polygon.FrontMode; + save->BackPolygonMode = ctx->Polygon.BackMode; + save->PolygonOffset = ctx->Polygon.OffsetFill; + save->PolygonSmooth = ctx->Polygon.SmoothFlag; + save->PolygonStipple = ctx->Polygon.StippleFlag; + save->PolygonCull = ctx->Polygon.CullFlag; + _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL); + _mesa_set_enable(ctx, GL_POLYGON_OFFSET_FILL, GL_FALSE); + _mesa_set_enable(ctx, GL_POLYGON_SMOOTH, GL_FALSE); + _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, GL_FALSE); + _mesa_set_enable(ctx, GL_CULL_FACE, GL_FALSE); + } + + if (state & META_SCISSOR) { + save->Scissor = ctx->Scissor; /* struct copy */ + } + + if (state & META_SHADER) { + if (ctx->Extensions.ARB_vertex_program) { + save->VertexProgramEnabled = ctx->VertexProgram.Enabled; + save->VertexProgram = ctx->VertexProgram.Current; + _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB, GL_FALSE); + } + + if (ctx->Extensions.ARB_fragment_program) { + save->FragmentProgramEnabled = ctx->FragmentProgram.Enabled; + save->FragmentProgram = ctx->FragmentProgram.Current; + _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_FALSE); + } + + if (ctx->Extensions.ARB_shader_objects) { + save->Shader = ctx->Shader.CurrentProgram ? + ctx->Shader.CurrentProgram->Name : 0; + _mesa_UseProgramObjectARB(0); + } + } + + if (state & META_STENCIL_TEST) { + save->Stencil = ctx->Stencil; /* struct copy */ + if (ctx->Stencil.Enabled) + _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE); + /* NOTE: other stencil state not reset */ + } + + if (state & META_TEXTURE) { + GLuint u, tgt; + + save->ActiveUnit = ctx->Texture.CurrentUnit; + save->ClientActiveUnit = ctx->Array.ActiveTexture; + save->EnvMode = ctx->Texture.Unit[0].EnvMode; + + /* Disable all texture units */ + for (u = 0; u < ctx->Const.MaxTextureUnits; u++) { + save->TexEnabled[u] = ctx->Texture.Unit[u].Enabled; + save->TexGenEnabled[u] = ctx->Texture.Unit[u].TexGenEnabled; + if (ctx->Texture.Unit[u].Enabled || + ctx->Texture.Unit[u].TexGenEnabled) { + _mesa_ActiveTextureARB(GL_TEXTURE0 + u); + _mesa_set_enable(ctx, GL_TEXTURE_1D, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_2D, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_3D, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_RECTANGLE, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_S, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_T, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_R, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_Q, GL_FALSE); + } + } + + /* save current texture objects for unit[0] only */ + for (tgt = 0; tgt < NUM_TEXTURE_TARGETS; tgt++) { + _mesa_reference_texobj(&save->CurrentTexture[tgt], + ctx->Texture.Unit[0].CurrentTex[tgt]); + } + + /* set defaults for unit[0] */ + _mesa_ActiveTextureARB(GL_TEXTURE0); + _mesa_ClientActiveTextureARB(GL_TEXTURE0); + _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + } + + if (state & META_TRANSFORM) { + GLuint activeTexture = ctx->Texture.CurrentUnit; + _mesa_memcpy(save->ModelviewMatrix, ctx->ModelviewMatrixStack.Top->m, + 16 * sizeof(GLfloat)); + _mesa_memcpy(save->ProjectionMatrix, ctx->ProjectionMatrixStack.Top->m, + 16 * sizeof(GLfloat)); + _mesa_memcpy(save->TextureMatrix, ctx->TextureMatrixStack[0].Top->m, + 16 * sizeof(GLfloat)); + save->MatrixMode = ctx->Transform.MatrixMode; + /* set 1:1 vertex:pixel coordinate transform */ + _mesa_ActiveTextureARB(GL_TEXTURE0); + _mesa_MatrixMode(GL_TEXTURE); + _mesa_LoadIdentity(); + _mesa_ActiveTextureARB(GL_TEXTURE0 + activeTexture); + _mesa_MatrixMode(GL_MODELVIEW); + _mesa_LoadIdentity(); + _mesa_MatrixMode(GL_PROJECTION); + _mesa_LoadIdentity(); + _mesa_Ortho(0.0F, ctx->DrawBuffer->Width, + 0.0F, ctx->DrawBuffer->Height, + -1.0F, 1.0F); + save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled; + if (ctx->Transform.ClipPlanesEnabled) { + GLuint i; + for (i = 0; i < ctx->Const.MaxClipPlanes; i++) { + _mesa_set_enable(ctx, GL_CLIP_PLANE0 + i, GL_FALSE); + } + } + } + + if (state & META_VERTEX) { + /* save vertex array object state */ + _mesa_reference_array_object(ctx, &save->ArrayObj, + ctx->Array.ArrayObj); + _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, + ctx->Array.ArrayBufferObj); + /* set some default state? */ + } + + if (state & META_VIEWPORT) { + /* save viewport state */ + save->ViewportX = ctx->Viewport.X; + save->ViewportY = ctx->Viewport.Y; + save->ViewportW = ctx->Viewport.Width; + save->ViewportH = ctx->Viewport.Height; + /* set viewport to match window size */ + if (ctx->Viewport.X != 0 || + ctx->Viewport.Y != 0 || + ctx->Viewport.Width != ctx->DrawBuffer->Width || + ctx->Viewport.Height != ctx->DrawBuffer->Height) { + _mesa_set_viewport(ctx, 0, 0, + ctx->DrawBuffer->Width, ctx->DrawBuffer->Height); + } + /* save depth range state */ + save->DepthNear = ctx->Viewport.Near; + save->DepthFar = ctx->Viewport.Far; + /* set depth range to default */ + _mesa_DepthRange(0.0, 1.0); + } + + /* misc */ + { + save->Lighting = ctx->Light.Enabled; + if (ctx->Light.Enabled) + _mesa_set_enable(ctx, GL_LIGHTING, GL_FALSE); + } +} + + +/** + * Leave meta state. This is like a light-weight version of glPopAttrib(). + */ +static void +_mesa_meta_end(GLcontext *ctx) +{ + struct save_state *save = &ctx->Meta->Save; + const GLbitfield state = save->SavedState; + + if (state & META_ALPHA_TEST) { + if (ctx->Color.AlphaEnabled != save->AlphaEnabled) + _mesa_set_enable(ctx, GL_ALPHA_TEST, save->AlphaEnabled); + } + + if (state & META_BLEND) { + if (ctx->Color.BlendEnabled != save->BlendEnabled) + _mesa_set_enable(ctx, GL_BLEND, save->BlendEnabled); + if (ctx->Color.ColorLogicOpEnabled != save->ColorLogicOpEnabled) + _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, save->ColorLogicOpEnabled); + } + + if (state & META_COLOR_MASK) { + if (!TEST_EQ_4V(ctx->Color.ColorMask, save->ColorMask)) + _mesa_ColorMask(save->ColorMask[0], save->ColorMask[1], + save->ColorMask[2], save->ColorMask[3]); + } + + if (state & META_DEPTH_TEST) { + if (ctx->Depth.Test != save->Depth.Test) + _mesa_set_enable(ctx, GL_DEPTH_TEST, save->Depth.Test); + _mesa_DepthFunc(save->Depth.Func); + _mesa_DepthMask(save->Depth.Mask); + } + + if (state & META_FOG) { + _mesa_set_enable(ctx, GL_FOG, save->Fog); + } + + if (state & META_PIXEL_STORE) { + ctx->Pack = save->Pack; + ctx->Unpack = save->Unpack; + } + + if (state & META_PIXEL_TRANSFER) { + ctx->Pixel.RedScale = save->RedScale; + ctx->Pixel.RedBias = save->RedBias; + ctx->Pixel.GreenScale = save->GreenScale; + ctx->Pixel.GreenBias = save->GreenBias; + ctx->Pixel.BlueScale = save->BlueScale; + ctx->Pixel.BlueBias = save->BlueBias; + ctx->Pixel.AlphaScale = save->AlphaScale; + ctx->Pixel.AlphaBias = save->AlphaBias; + ctx->Pixel.MapColorFlag = save->MapColorFlag; + ctx->Pixel.Convolution1DEnabled = save->Convolution1DEnabled; + ctx->Pixel.Convolution2DEnabled = save->Convolution2DEnabled; + ctx->Pixel.Separable2DEnabled = save->Separable2DEnabled; + /* XXX more state */ + ctx->NewState |=_NEW_PIXEL; + } + + if (state & META_RASTERIZATION) { + _mesa_PolygonMode(GL_FRONT, save->FrontPolygonMode); + _mesa_PolygonMode(GL_BACK, save->BackPolygonMode); + _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, save->PolygonStipple); + _mesa_set_enable(ctx, GL_POLYGON_OFFSET_FILL, save->PolygonOffset); + _mesa_set_enable(ctx, GL_POLYGON_SMOOTH, save->PolygonSmooth); + _mesa_set_enable(ctx, GL_CULL_FACE, save->PolygonCull); + } + + if (state & META_SCISSOR) { + _mesa_set_enable(ctx, GL_SCISSOR_TEST, save->Scissor.Enabled); + _mesa_Scissor(save->Scissor.X, save->Scissor.Y, + save->Scissor.Width, save->Scissor.Height); + } + + if (state & META_SHADER) { + if (ctx->Extensions.ARB_vertex_program) { + _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB, + save->VertexProgramEnabled); + _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, + save->VertexProgram); + } + + if (ctx->Extensions.ARB_fragment_program) { + _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, + save->FragmentProgramEnabled); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, + save->FragmentProgram); + } + + if (ctx->Extensions.ARB_shader_objects) { + _mesa_UseProgramObjectARB(save->Shader); + } + } + + if (state & META_STENCIL_TEST) { + const struct gl_stencil_attrib *stencil = &save->Stencil; + + _mesa_set_enable(ctx, GL_STENCIL_TEST, stencil->Enabled); + _mesa_ClearStencil(stencil->Clear); + if (ctx->Extensions.EXT_stencil_two_side) { + _mesa_set_enable(ctx, GL_STENCIL_TEST_TWO_SIDE_EXT, + stencil->TestTwoSide); + _mesa_ActiveStencilFaceEXT(stencil->ActiveFace + ? GL_BACK : GL_FRONT); + } + /* front state */ + _mesa_StencilFuncSeparate(GL_FRONT, + stencil->Function[0], + stencil->Ref[0], + stencil->ValueMask[0]); + _mesa_StencilMaskSeparate(GL_FRONT, stencil->WriteMask[0]); + _mesa_StencilOpSeparate(GL_FRONT, stencil->FailFunc[0], + stencil->ZFailFunc[0], + stencil->ZPassFunc[0]); + /* back state */ + _mesa_StencilFuncSeparate(GL_BACK, + stencil->Function[1], + stencil->Ref[1], + stencil->ValueMask[1]); + _mesa_StencilMaskSeparate(GL_BACK, stencil->WriteMask[1]); + _mesa_StencilOpSeparate(GL_BACK, stencil->FailFunc[1], + stencil->ZFailFunc[1], + stencil->ZPassFunc[1]); + } + + if (state & META_TEXTURE) { + GLuint u, tgt; + + ASSERT(ctx->Texture.CurrentUnit == 0); + + /* restore texenv for unit[0] */ + _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, save->EnvMode); + + /* restore texture objects for unit[0] only */ + for (tgt = 0; tgt < NUM_TEXTURE_TARGETS; tgt++) { + _mesa_reference_texobj(&ctx->Texture.Unit[0].CurrentTex[tgt], + save->CurrentTexture[tgt]); + } + + /* Re-enable textures, texgen */ + for (u = 0; u < ctx->Const.MaxTextureUnits; u++) { + if (save->TexEnabled[u]) { + _mesa_ActiveTextureARB(GL_TEXTURE0 + u); + + if (save->TexEnabled[u] & TEXTURE_1D_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_1D, GL_TRUE); + if (save->TexEnabled[u] & TEXTURE_2D_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_2D, GL_TRUE); + if (save->TexEnabled[u] & TEXTURE_3D_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_3D, GL_TRUE); + if (save->TexEnabled[u] & TEXTURE_CUBE_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP, GL_TRUE); + if (save->TexEnabled[u] & TEXTURE_RECT_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_RECTANGLE, GL_TRUE); + } + + if (save->TexGenEnabled[u]) { + _mesa_ActiveTextureARB(GL_TEXTURE0 + u); + + if (save->TexGenEnabled[u] & S_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_GEN_S, GL_TRUE); + if (save->TexGenEnabled[u] & T_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_GEN_T, GL_TRUE); + if (save->TexGenEnabled[u] & R_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_GEN_R, GL_TRUE); + if (save->TexGenEnabled[u] & Q_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_GEN_Q, GL_TRUE); + } + } + + /* restore current unit state */ + _mesa_ActiveTextureARB(GL_TEXTURE0 + save->ActiveUnit); + _mesa_ClientActiveTextureARB(GL_TEXTURE0 + save->ClientActiveUnit); + } + + if (state & META_TRANSFORM) { + GLuint activeTexture = ctx->Texture.CurrentUnit; + _mesa_ActiveTextureARB(GL_TEXTURE0); + _mesa_MatrixMode(GL_TEXTURE); + _mesa_LoadMatrixf(save->TextureMatrix); + _mesa_ActiveTextureARB(GL_TEXTURE0 + activeTexture); + + _mesa_MatrixMode(GL_MODELVIEW); + _mesa_LoadMatrixf(save->ModelviewMatrix); + + _mesa_MatrixMode(GL_PROJECTION); + _mesa_LoadMatrixf(save->ProjectionMatrix); + + _mesa_MatrixMode(save->MatrixMode); + + if (save->ClipPlanesEnabled) { + GLuint i; + for (i = 0; i < ctx->Const.MaxClipPlanes; i++) { + if (save->ClipPlanesEnabled & (1 << i)) { + _mesa_set_enable(ctx, GL_CLIP_PLANE0 + i, GL_TRUE); + } + } + } + } + + if (state & META_VERTEX) { + /* restore vertex buffer object */ + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, save->ArrayBufferObj->Name); + _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, NULL); + + /* restore vertex array object */ + _mesa_BindVertexArray(save->ArrayObj->Name); + _mesa_reference_array_object(ctx, &save->ArrayObj, NULL); + } + + if (state & META_VIEWPORT) { + if (save->ViewportX != ctx->Viewport.X || + save->ViewportY != ctx->Viewport.Y || + save->ViewportW != ctx->Viewport.Width || + save->ViewportH != ctx->Viewport.Height) { + _mesa_set_viewport(ctx, save->ViewportX, save->ViewportY, + save->ViewportW, save->ViewportH); + } + _mesa_DepthRange(save->DepthNear, save->DepthFar); + } + + /* misc */ + if (save->Lighting) { + _mesa_set_enable(ctx, GL_LIGHTING, GL_TRUE); + } +} + + +/** + * One-time init for a temp_texture object. + * Choose tex target, compute max tex size, etc. + */ +static void +init_temp_texture(GLcontext *ctx, struct temp_texture *tex) +{ + /* prefer texture rectangle */ + if (ctx->Extensions.NV_texture_rectangle) { + tex->Target = GL_TEXTURE_RECTANGLE; + tex->MaxSize = ctx->Const.MaxTextureRectSize; + tex->NPOT = GL_TRUE; + } + else { + /* use 2D texture, NPOT if possible */ + tex->Target = GL_TEXTURE_2D; + tex->MaxSize = 1 << (ctx->Const.MaxTextureLevels - 1); + tex->NPOT = ctx->Extensions.ARB_texture_non_power_of_two; + } + tex->MinSize = 16; /* 16 x 16 at least */ + assert(tex->MaxSize > 0); + + _mesa_GenTextures(1, &tex->TexObj); + _mesa_BindTexture(tex->Target, tex->TexObj); +} + + +/** + * Return pointer to temp_texture info for non-bitmap ops. + * This does some one-time init if needed. + */ +static struct temp_texture * +get_temp_texture(GLcontext *ctx) +{ + struct temp_texture *tex = &ctx->Meta->TempTex; + + if (!tex->TexObj) { + init_temp_texture(ctx, tex); + } + + return tex; +} + + +/** + * Return pointer to temp_texture info for _mesa_meta_bitmap(). + * We use a separate texture for bitmaps to reduce texture + * allocation/deallocation. + */ +static struct temp_texture * +get_bitmap_temp_texture(GLcontext *ctx) +{ + struct temp_texture *tex = &ctx->Meta->Bitmap.Tex; + + if (!tex->TexObj) { + init_temp_texture(ctx, tex); + } + + return tex; +} + + +/** + * Compute the width/height of texture needed to draw an image of the + * given size. Return a flag indicating whether the current texture + * can be re-used (glTexSubImage2D) or if a new texture needs to be + * allocated (glTexImage2D). + * Also, compute s/t texcoords for drawing. + * + * \return GL_TRUE if new texture is needed, GL_FALSE otherwise + */ +static GLboolean +alloc_texture(struct temp_texture *tex, + GLsizei width, GLsizei height, GLenum intFormat) +{ + GLboolean newTex = GL_FALSE; + + ASSERT(width <= tex->MaxSize); + ASSERT(height <= tex->MaxSize); + + if (width > tex->Width || + height > tex->Height || + intFormat != tex->IntFormat) { + /* alloc new texture (larger or different format) */ + + if (tex->NPOT) { + /* use non-power of two size */ + tex->Width = MAX2(tex->MinSize, width); + tex->Height = MAX2(tex->MinSize, height); + } + else { + /* find power of two size */ + GLsizei w, h; + w = h = tex->MinSize; + while (w < width) + w *= 2; + while (h < height) + h *= 2; + tex->Width = w; + tex->Height = h; + } + + tex->IntFormat = intFormat; + + newTex = GL_TRUE; + } + + /* compute texcoords */ + if (tex->Target == GL_TEXTURE_RECTANGLE) { + tex->Sright = (GLfloat) width; + tex->Ttop = (GLfloat) height; + } + else { + tex->Sright = (GLfloat) width / tex->Width; + tex->Ttop = (GLfloat) height / tex->Height; + } + + return newTex; +} + + +/** + * Setup/load texture for glCopyPixels or glBlitFramebuffer. + */ +static void +setup_copypix_texture(struct temp_texture *tex, + GLboolean newTex, + GLint srcX, GLint srcY, + GLsizei width, GLsizei height, GLenum intFormat, + GLenum filter) +{ + _mesa_BindTexture(tex->Target, tex->TexObj); + _mesa_TexParameteri(tex->Target, GL_TEXTURE_MIN_FILTER, filter); + _mesa_TexParameteri(tex->Target, GL_TEXTURE_MAG_FILTER, filter); + _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + + /* copy framebuffer image to texture */ + if (newTex) { + /* create new tex image */ + if (tex->Width == width && tex->Height == height) { + /* create new tex with framebuffer data */ + _mesa_CopyTexImage2D(tex->Target, 0, tex->IntFormat, + srcX, srcY, width, height, 0); + } + else { + /* create empty texture */ + _mesa_TexImage2D(tex->Target, 0, tex->IntFormat, + tex->Width, tex->Height, 0, + intFormat, GL_UNSIGNED_BYTE, NULL); + /* load image */ + _mesa_CopyTexSubImage2D(tex->Target, 0, + 0, 0, srcX, srcY, width, height); + } + } + else { + /* replace existing tex image */ + _mesa_CopyTexSubImage2D(tex->Target, 0, + 0, 0, srcX, srcY, width, height); + } +} + + +/** + * Setup/load texture for glDrawPixels. + */ +static void +setup_drawpix_texture(struct temp_texture *tex, + GLboolean newTex, + GLenum texIntFormat, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const GLvoid *pixels) +{ + _mesa_BindTexture(tex->Target, tex->TexObj); + _mesa_TexParameteri(tex->Target, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + _mesa_TexParameteri(tex->Target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + + /* copy pixel data to texture */ + if (newTex) { + /* create new tex image */ + if (tex->Width == width && tex->Height == height) { + /* create new tex and load image data */ + _mesa_TexImage2D(tex->Target, 0, tex->IntFormat, + tex->Width, tex->Height, 0, format, type, pixels); + } + else { + /* create empty texture */ + _mesa_TexImage2D(tex->Target, 0, tex->IntFormat, + tex->Width, tex->Height, 0, format, type, NULL); + /* load image */ + _mesa_TexSubImage2D(tex->Target, 0, + 0, 0, width, height, format, type, pixels); + } + } + else { + /* replace existing tex image */ + _mesa_TexSubImage2D(tex->Target, 0, + 0, 0, width, height, format, type, pixels); + } +} + + + +/** + * One-time init for drawing depth pixels. + */ +static void +init_blit_depth_pixels(GLcontext *ctx) +{ + static const char *program = + "!!ARBfp1.0\n" + "TEX result.depth, fragment.texcoord[0], texture[0], %s; \n" + "END \n"; + char program2[200]; + struct blit_state *blit = &ctx->Meta->Blit; + struct temp_texture *tex = get_temp_texture(ctx); + const char *texTarget; + + assert(blit->DepthFP == 0); + + /* replace %s with "RECT" or "2D" */ + assert(strlen(program) + 4 < sizeof(program2)); + if (tex->Target == GL_TEXTURE_RECTANGLE) + texTarget = "RECT"; + else + texTarget = "2D"; + _mesa_snprintf(program2, sizeof(program2), program, texTarget); + + _mesa_GenPrograms(1, &blit->DepthFP); + _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, blit->DepthFP); + _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, + strlen(program2), (const GLubyte *) program2); +} + + +/** + * Meta implementation of ctx->Driver.BlitFramebuffer() in terms + * of texture mapping and polygon rendering. + */ +void +_mesa_meta_BlitFramebuffer(GLcontext *ctx, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + struct blit_state *blit = &ctx->Meta->Blit; + struct temp_texture *tex = get_temp_texture(ctx); + const GLsizei maxTexSize = tex->MaxSize; + const GLint srcX = MIN2(srcX0, srcX1); + const GLint srcY = MIN2(srcY0, srcY1); + const GLint srcW = abs(srcX1 - srcX0); + const GLint srcH = abs(srcY1 - srcY0); + const GLboolean srcFlipX = srcX1 < srcX0; + const GLboolean srcFlipY = srcY1 < srcY0; + struct vertex { + GLfloat x, y, s, t; + }; + struct vertex verts[4]; + GLboolean newTex; + + if (srcW > maxTexSize || srcH > maxTexSize) { + /* XXX avoid this fallback */ + _swrast_BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, mask, filter); + return; + } + + if (srcFlipX) { + GLint tmp = dstX0; + dstX0 = dstX1; + dstX1 = tmp; + } + + if (srcFlipY) { + GLint tmp = dstY0; + dstY0 = dstY1; + dstY1 = tmp; + } + + /* only scissor effects blit so save/clear all other relevant state */ + _mesa_meta_begin(ctx, ~META_SCISSOR); + + if (blit->ArrayObj == 0) { + /* one-time setup */ + + /* create vertex array object */ + _mesa_GenVertexArrays(1, &blit->ArrayObj); + _mesa_BindVertexArray(blit->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &blit->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, blit->VBO); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), + NULL, GL_DYNAMIC_DRAW_ARB); + + /* setup vertex arrays */ + _mesa_VertexPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(x)); + _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s)); + _mesa_EnableClientState(GL_VERTEX_ARRAY); + _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY); + } + else { + _mesa_BindVertexArray(blit->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, blit->VBO); + } + + newTex = alloc_texture(tex, srcW, srcH, GL_RGBA); + + /* vertex positions/texcoords (after texture allocation!) */ + { + verts[0].x = (GLfloat) dstX0; + verts[0].y = (GLfloat) dstY0; + verts[1].x = (GLfloat) dstX1; + verts[1].y = (GLfloat) dstY0; + verts[2].x = (GLfloat) dstX1; + verts[2].y = (GLfloat) dstY1; + verts[3].x = (GLfloat) dstX0; + verts[3].y = (GLfloat) dstY1; + + verts[0].s = 0.0F; + verts[0].t = 0.0F; + verts[1].s = tex->Sright; + verts[1].t = 0.0F; + verts[2].s = tex->Sright; + verts[2].t = tex->Ttop; + verts[3].s = 0.0F; + verts[3].t = tex->Ttop; + + /* upload new vertex data */ + _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts); + } + + _mesa_set_enable(ctx, tex->Target, GL_TRUE); + + if (mask & GL_COLOR_BUFFER_BIT) { + setup_copypix_texture(tex, newTex, srcX, srcY, srcW, srcH, + GL_RGBA, filter); + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + mask &= ~GL_COLOR_BUFFER_BIT; + } + + if (mask & GL_DEPTH_BUFFER_BIT) { + GLuint *tmp = (GLuint *) _mesa_malloc(srcW * srcH * sizeof(GLuint)); + if (tmp) { + if (!blit->DepthFP) + init_blit_depth_pixels(ctx); + + /* maybe change tex format here */ + newTex = alloc_texture(tex, srcW, srcH, GL_DEPTH_COMPONENT); + + _mesa_ReadPixels(srcX, srcY, srcW, srcH, + GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp); + + setup_drawpix_texture(tex, newTex, GL_DEPTH_COMPONENT, srcW, srcH, + GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp); + + _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, blit->DepthFP); + _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_TRUE); + _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE); + _mesa_DepthFunc(GL_ALWAYS); + _mesa_DepthMask(GL_TRUE); + + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + mask &= ~GL_DEPTH_BUFFER_BIT; + + _mesa_free(tmp); + } + } + + if (mask & GL_STENCIL_BUFFER_BIT) { + /* XXX can't easily do stencil */ + } + + _mesa_set_enable(ctx, tex->Target, GL_FALSE); + + _mesa_meta_end(ctx); + + if (mask) { + _swrast_BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, mask, filter); + } +} + + +/** + * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering. + */ +void +_mesa_meta_Clear(GLcontext *ctx, GLbitfield buffers) +{ + struct clear_state *clear = &ctx->Meta->Clear; + struct vertex { + GLfloat x, y, z, r, g, b, a; + }; + struct vertex verts[4]; + /* save all state but scissor, pixel pack/unpack */ + GLbitfield metaSave = META_ALL - META_SCISSOR - META_PIXEL_STORE; + + if (buffers & BUFFER_BITS_COLOR) { + /* if clearing color buffers, don't save/restore colormask */ + metaSave -= META_COLOR_MASK; + } + + _mesa_meta_begin(ctx, metaSave); + + if (clear->ArrayObj == 0) { + /* one-time setup */ + + /* create vertex array object */ + _mesa_GenVertexArrays(1, &clear->ArrayObj); + _mesa_BindVertexArray(clear->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &clear->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), + NULL, GL_DYNAMIC_DRAW_ARB); + + /* setup vertex arrays */ + _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x)); + _mesa_ColorPointer(4, GL_FLOAT, sizeof(struct vertex), OFFSET(r)); + _mesa_EnableClientState(GL_VERTEX_ARRAY); + _mesa_EnableClientState(GL_COLOR_ARRAY); + } + else { + _mesa_BindVertexArray(clear->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + } + + /* GL_COLOR_BUFFER_BIT */ + if (buffers & BUFFER_BITS_COLOR) { + /* leave colormask, glDrawBuffer state as-is */ + } + else { + ASSERT(metaSave & META_COLOR_MASK); + _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + } + + /* GL_DEPTH_BUFFER_BIT */ + if (buffers & BUFFER_BIT_DEPTH) { + _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE); + _mesa_DepthFunc(GL_ALWAYS); + _mesa_DepthMask(GL_TRUE); + } + else { + assert(!ctx->Depth.Test); + } + + /* GL_STENCIL_BUFFER_BIT */ + if (buffers & BUFFER_BIT_STENCIL) { + _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE); + _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, + GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, + ctx->Stencil.Clear & 0x7fffffff, + ctx->Stencil.WriteMask[0]); + } + else { + assert(!ctx->Stencil.Enabled); + } + + /* vertex positions/colors */ + { + const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin; + const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin; + const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax; + const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax; + const GLfloat z = 1.0 - 2.0 * ctx->Depth.Clear; + GLuint i; + + verts[0].x = x0; + verts[0].y = y0; + verts[0].z = z; + verts[1].x = x1; + verts[1].y = y0; + verts[1].z = z; + verts[2].x = x1; + verts[2].y = y1; + verts[2].z = z; + verts[3].x = x0; + verts[3].y = y1; + verts[3].z = z; + + /* vertex colors */ + for (i = 0; i < 4; i++) { + verts[i].r = ctx->Color.ClearColor[0]; + verts[i].g = ctx->Color.ClearColor[1]; + verts[i].b = ctx->Color.ClearColor[2]; + verts[i].a = ctx->Color.ClearColor[3]; + } + + /* upload new vertex data */ + _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts); + } + + /* draw quad */ + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_meta_end(ctx); +} + + +/** + * Meta implementation of ctx->Driver.CopyPixels() in terms + * of texture mapping and polygon rendering. + */ +void +_mesa_meta_CopyPixels(GLcontext *ctx, GLint srcX, GLint srcY, + GLsizei width, GLsizei height, + GLint dstX, GLint dstY, GLenum type) +{ + struct copypix_state *copypix = &ctx->Meta->CopyPix; + struct temp_texture *tex = get_temp_texture(ctx); + struct vertex { + GLfloat x, y, z, s, t; + }; + struct vertex verts[4]; + GLboolean newTex; + GLenum intFormat = GL_RGBA; + + if (type != GL_COLOR || + ctx->_ImageTransferState || + ctx->Fog.Enabled || + width > tex->MaxSize || + height > tex->MaxSize) { + /* XXX avoid this fallback */ + _swrast_CopyPixels(ctx, srcX, srcY, width, height, dstX, dstY, type); + return; + } + + /* Most GL state applies to glCopyPixels, but a there's a few things + * we need to override: + */ + _mesa_meta_begin(ctx, (META_RASTERIZATION | + META_SHADER | + META_TEXTURE | + META_TRANSFORM | + META_VERTEX | + META_VIEWPORT)); + + if (copypix->ArrayObj == 0) { + /* one-time setup */ + + /* create vertex array object */ + _mesa_GenVertexArrays(1, ©pix->ArrayObj); + _mesa_BindVertexArray(copypix->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, ©pix->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, copypix->VBO); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), + NULL, GL_DYNAMIC_DRAW_ARB); + + /* setup vertex arrays */ + _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x)); + _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s)); + _mesa_EnableClientState(GL_VERTEX_ARRAY); + _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY); + } + else { + _mesa_BindVertexArray(copypix->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, copypix->VBO); + } + + newTex = alloc_texture(tex, width, height, intFormat); + + /* vertex positions, texcoords (after texture allocation!) */ + { + const GLfloat dstX0 = (GLfloat) dstX; + const GLfloat dstY0 = (GLfloat) dstY; + const GLfloat dstX1 = dstX + width * ctx->Pixel.ZoomX; + const GLfloat dstY1 = dstY + height * ctx->Pixel.ZoomY; + const GLfloat z = ctx->Current.RasterPos[2]; + + verts[0].x = dstX0; + verts[0].y = dstY0; + verts[0].z = z; + verts[0].s = 0.0F; + verts[0].t = 0.0F; + verts[1].x = dstX1; + verts[1].y = dstY0; + verts[1].z = z; + verts[1].s = tex->Sright; + verts[1].t = 0.0F; + verts[2].x = dstX1; + verts[2].y = dstY1; + verts[2].z = z; + verts[2].s = tex->Sright; + verts[2].t = tex->Ttop; + verts[3].x = dstX0; + verts[3].y = dstY1; + verts[3].z = z; + verts[3].s = 0.0F; + verts[3].t = tex->Ttop; + + /* upload new vertex data */ + _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts); + } + + /* Alloc/setup texture */ + setup_copypix_texture(tex, newTex, srcX, srcY, width, height, + GL_RGBA, GL_NEAREST); + + _mesa_set_enable(ctx, tex->Target, GL_TRUE); + + /* draw textured quad */ + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_set_enable(ctx, tex->Target, GL_FALSE); + + _mesa_meta_end(ctx); +} + + + +/** + * When the glDrawPixels() image size is greater than the max rectangle + * texture size we use this function to break the glDrawPixels() image + * into tiles which fit into the max texture size. + */ +static void +tiled_draw_pixels(GLcontext *ctx, + GLint tileSize, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels) +{ + struct gl_pixelstore_attrib tileUnpack = *unpack; + GLint i, j; + + if (tileUnpack.RowLength == 0) + tileUnpack.RowLength = width; + + for (i = 0; i < width; i += tileSize) { + const GLint tileWidth = MIN2(tileSize, width - i); + const GLint tileX = (GLint) (x + i * ctx->Pixel.ZoomX); + + tileUnpack.SkipPixels = unpack->SkipPixels + i; + + for (j = 0; j < height; j += tileSize) { + const GLint tileHeight = MIN2(tileSize, height - j); + const GLint tileY = (GLint) (y + j * ctx->Pixel.ZoomY); + + tileUnpack.SkipRows = unpack->SkipRows + j; + + _mesa_meta_DrawPixels(ctx, tileX, tileY, tileWidth, tileHeight, + format, type, &tileUnpack, pixels); + } + } +} + + +/** + * One-time init for drawing stencil pixels. + */ +static void +init_draw_stencil_pixels(GLcontext *ctx) +{ + /* This program is run eight times, once for each stencil bit. + * The stencil values to draw are found in an 8-bit alpha texture. + * We read the texture/stencil value and test if bit 'b' is set. + * If the bit is not set, use KIL to kill the fragment. + * Finally, we use the stencil test to update the stencil buffer. + * + * The basic algorithm for checking if a bit is set is: + * if (is_odd(value / (1 << bit))) + * result is one (or non-zero). + * else + * result is zero. + * The program parameter contains three values: + * parm.x = 255 / (1 << bit) + * parm.y = 0.5 + * parm.z = 0.0 + */ + static const char *program = + "!!ARBfp1.0\n" + "PARAM parm = program.local[0]; \n" + "TEMP t; \n" + "TEX t, fragment.texcoord[0], texture[0], %s; \n" /* NOTE %s here! */ + "# t = t * 255 / bit \n" + "MUL t.x, t.a, parm.x; \n" + "# t = (int) t \n" + "FRC t.y, t.x; \n" + "SUB t.x, t.x, t.y; \n" + "# t = t * 0.5 \n" + "MUL t.x, t.x, parm.y; \n" + "# t = fract(t.x) \n" + "FRC t.x, t.x; # if t.x != 0, then the bit is set \n" + "# t.x = (t.x == 0 ? 1 : 0) \n" + "SGE t.x, -t.x, parm.z; \n" + "KIL -t.x; \n" + "# for debug only \n" + "#MOV result.color, t.x; \n" + "END \n"; + char program2[1000]; + struct drawpix_state *drawpix = &ctx->Meta->DrawPix; + struct temp_texture *tex = get_temp_texture(ctx); + const char *texTarget; + + assert(drawpix->StencilFP == 0); + + /* replace %s with "RECT" or "2D" */ + assert(strlen(program) + 4 < sizeof(program2)); + if (tex->Target == GL_TEXTURE_RECTANGLE) + texTarget = "RECT"; + else + texTarget = "2D"; + _mesa_snprintf(program2, sizeof(program2), program, texTarget); + + _mesa_GenPrograms(1, &drawpix->StencilFP); + _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, drawpix->StencilFP); + _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, + strlen(program2), (const GLubyte *) program2); +} + + +/** + * One-time init for drawing depth pixels. + */ +static void +init_draw_depth_pixels(GLcontext *ctx) +{ + static const char *program = + "!!ARBfp1.0\n" + "PARAM color = program.local[0]; \n" + "TEX result.depth, fragment.texcoord[0], texture[0], %s; \n" + "MOV result.color, color; \n" + "END \n"; + char program2[200]; + struct drawpix_state *drawpix = &ctx->Meta->DrawPix; + struct temp_texture *tex = get_temp_texture(ctx); + const char *texTarget; + + assert(drawpix->DepthFP == 0); + + /* replace %s with "RECT" or "2D" */ + assert(strlen(program) + 4 < sizeof(program2)); + if (tex->Target == GL_TEXTURE_RECTANGLE) + texTarget = "RECT"; + else + texTarget = "2D"; + _mesa_snprintf(program2, sizeof(program2), program, texTarget); + + _mesa_GenPrograms(1, &drawpix->DepthFP); + _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, drawpix->DepthFP); + _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, + strlen(program2), (const GLubyte *) program2); +} + + +/** + * Meta implementation of ctx->Driver.DrawPixels() in terms + * of texture mapping and polygon rendering. + */ +void +_mesa_meta_DrawPixels(GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels) +{ + struct drawpix_state *drawpix = &ctx->Meta->DrawPix; + struct temp_texture *tex = get_temp_texture(ctx); + const struct gl_pixelstore_attrib unpackSave = ctx->Unpack; + const GLuint origStencilMask = ctx->Stencil.WriteMask[0]; + struct vertex { + GLfloat x, y, z, s, t; + }; + struct vertex verts[4]; + GLenum texIntFormat; + GLboolean fallback, newTex; + GLbitfield metaExtraSave = 0x0; + GLuint vbo; + + /* + * Determine if we can do the glDrawPixels with texture mapping. + */ + fallback = GL_FALSE; + if (ctx->_ImageTransferState || + ctx->Fog.Enabled) { + fallback = GL_TRUE; + } + + if (_mesa_is_color_format(format)) { + /* use more compact format when possible */ + /* XXX disable special case for GL_LUMINANCE for now to work around + * apparent i965 driver bug (see bug #23670). + */ + if (/*format == GL_LUMINANCE ||*/ format == GL_LUMINANCE_ALPHA) + texIntFormat = format; + else + texIntFormat = GL_RGBA; + } + else if (_mesa_is_stencil_format(format)) { + if (ctx->Extensions.ARB_fragment_program && + ctx->Pixel.IndexShift == 0 && + ctx->Pixel.IndexOffset == 0 && + type == GL_UNSIGNED_BYTE) { + /* We'll store stencil as alpha. This only works for GLubyte + * image data because of how incoming values are mapped to alpha + * in [0,1]. + */ + texIntFormat = GL_ALPHA; + metaExtraSave = (META_COLOR_MASK | + META_DEPTH_TEST | + META_SHADER | + META_STENCIL_TEST); + } + else { + fallback = GL_TRUE; + } + } + else if (_mesa_is_depth_format(format)) { + if (ctx->Extensions.ARB_depth_texture && + ctx->Extensions.ARB_fragment_program) { + texIntFormat = GL_DEPTH_COMPONENT; + metaExtraSave = (META_SHADER); + } + else { + fallback = GL_TRUE; + } + } + else { + fallback = GL_TRUE; + } + + if (fallback) { + _swrast_DrawPixels(ctx, x, y, width, height, + format, type, unpack, pixels); + return; + } + + /* + * Check image size against max texture size, draw as tiles if needed. + */ + if (width > tex->MaxSize || height > tex->MaxSize) { + tiled_draw_pixels(ctx, tex->MaxSize, x, y, width, height, + format, type, unpack, pixels); + return; + } + + /* Most GL state applies to glDrawPixels (like blending, stencil, etc), + * but a there's a few things we need to override: + */ + _mesa_meta_begin(ctx, (META_RASTERIZATION | + META_SHADER | + META_TEXTURE | + META_TRANSFORM | + META_VERTEX | + META_VIEWPORT | + metaExtraSave)); + + newTex = alloc_texture(tex, width, height, texIntFormat); + + /* vertex positions, texcoords (after texture allocation!) */ + { + const GLfloat x0 = (GLfloat) x; + const GLfloat y0 = (GLfloat) y; + const GLfloat x1 = x + width * ctx->Pixel.ZoomX; + const GLfloat y1 = y + height * ctx->Pixel.ZoomY; + const GLfloat z = ctx->Current.RasterPos[2]; + + verts[0].x = x0; + verts[0].y = y0; + verts[0].z = z; + verts[0].s = 0.0F; + verts[0].t = 0.0F; + verts[1].x = x1; + verts[1].y = y0; + verts[1].z = z; + verts[1].s = tex->Sright; + verts[1].t = 0.0F; + verts[2].x = x1; + verts[2].y = y1; + verts[2].z = z; + verts[2].s = tex->Sright; + verts[2].t = tex->Ttop; + verts[3].x = x0; + verts[3].y = y1; + verts[3].z = z; + verts[3].s = 0.0F; + verts[3].t = tex->Ttop; + } + + if (drawpix->ArrayObj == 0) { + /* one-time setup: create vertex array object */ + _mesa_GenVertexArrays(1, &drawpix->ArrayObj); + } + _mesa_BindVertexArray(drawpix->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &vbo); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, vbo); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), + verts, GL_DYNAMIC_DRAW_ARB); + + /* setup vertex arrays */ + _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x)); + _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s)); + _mesa_EnableClientState(GL_VERTEX_ARRAY); + _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY); + + /* set given unpack params */ + ctx->Unpack = *unpack; + + _mesa_set_enable(ctx, tex->Target, GL_TRUE); + + if (_mesa_is_stencil_format(format)) { + /* Drawing stencil */ + GLint bit; + + if (!drawpix->StencilFP) + init_draw_stencil_pixels(ctx); + + setup_drawpix_texture(tex, newTex, texIntFormat, width, height, + GL_ALPHA, type, pixels); + + _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + + _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE); + + /* set all stencil bits to 0 */ + _mesa_StencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFunc(GL_ALWAYS, 0, 255); + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + /* set stencil bits to 1 where needed */ + _mesa_StencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + + _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, drawpix->StencilFP); + _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_TRUE); + + for (bit = 0; bit < ctx->Visual.stencilBits; bit++) { + const GLuint mask = 1 << bit; + if (mask & origStencilMask) { + _mesa_StencilFunc(GL_ALWAYS, mask, mask); + _mesa_StencilMask(mask); + + _mesa_ProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, 0, + 255.0 / mask, 0.5, 0.0, 0.0); + + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + } + } + } + else if (_mesa_is_depth_format(format)) { + /* Drawing depth */ + if (!drawpix->DepthFP) + init_draw_depth_pixels(ctx); + + _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, drawpix->DepthFP); + _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_TRUE); + + /* polygon color = current raster color */ + _mesa_ProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, + ctx->Current.RasterColor); + + setup_drawpix_texture(tex, newTex, texIntFormat, width, height, + format, type, pixels); + + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + } + else { + /* Drawing RGBA */ + setup_drawpix_texture(tex, newTex, texIntFormat, width, height, + format, type, pixels); + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + } + + _mesa_set_enable(ctx, tex->Target, GL_FALSE); + + _mesa_DeleteBuffersARB(1, &vbo); + + /* restore unpack params */ + ctx->Unpack = unpackSave; + + _mesa_meta_end(ctx); +} + + +/** + * Do glBitmap with a alpha texture quad. Use the alpha test to + * cull the 'off' bits. If alpha test is already enabled, fall back + * to swrast (should be a rare case). + * A bitmap cache as in the gallium/mesa state tracker would + * improve performance a lot. + */ +void +_mesa_meta_Bitmap(GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap1) +{ + struct bitmap_state *bitmap = &ctx->Meta->Bitmap; + struct temp_texture *tex = get_bitmap_temp_texture(ctx); + const GLenum texIntFormat = GL_ALPHA; + const struct gl_pixelstore_attrib unpackSave = *unpack; + struct vertex { + GLfloat x, y, z, s, t, r, g, b, a; + }; + struct vertex verts[4]; + GLboolean newTex; + GLubyte *bitmap8; + + /* + * Check if swrast fallback is needed. + */ + if (ctx->_ImageTransferState || + ctx->Color.AlphaEnabled || + ctx->Fog.Enabled || + ctx->Texture._EnabledUnits || + width > tex->MaxSize || + height > tex->MaxSize) { + _swrast_Bitmap(ctx, x, y, width, height, unpack, bitmap1); + return; + } + + /* Most GL state applies to glBitmap (like blending, stencil, etc), + * but a there's a few things we need to override: + */ + _mesa_meta_begin(ctx, (META_ALPHA_TEST | + META_PIXEL_STORE | + META_RASTERIZATION | + META_SHADER | + META_TEXTURE | + META_TRANSFORM | + META_VERTEX | + META_VIEWPORT)); + + if (bitmap->ArrayObj == 0) { + /* one-time setup */ + + /* create vertex array object */ + _mesa_GenVertexArraysAPPLE(1, &bitmap->ArrayObj); + _mesa_BindVertexArrayAPPLE(bitmap->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &bitmap->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, bitmap->VBO); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), + NULL, GL_DYNAMIC_DRAW_ARB); + + /* setup vertex arrays */ + _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x)); + _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s)); + _mesa_ColorPointer(4, GL_FLOAT, sizeof(struct vertex), OFFSET(r)); + _mesa_EnableClientState(GL_VERTEX_ARRAY); + _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY); + _mesa_EnableClientState(GL_COLOR_ARRAY); + } + else { + _mesa_BindVertexArray(bitmap->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, bitmap->VBO); + } + + newTex = alloc_texture(tex, width, height, texIntFormat); + + /* vertex positions, texcoords, colors (after texture allocation!) */ + { + const GLfloat x0 = (GLfloat) x; + const GLfloat y0 = (GLfloat) y; + const GLfloat x1 = (GLfloat) (x + width); + const GLfloat y1 = (GLfloat) (y + height); + const GLfloat z = ctx->Current.RasterPos[2]; + GLuint i; + + verts[0].x = x0; + verts[0].y = y0; + verts[0].z = z; + verts[0].s = 0.0F; + verts[0].t = 0.0F; + verts[1].x = x1; + verts[1].y = y0; + verts[1].z = z; + verts[1].s = tex->Sright; + verts[1].t = 0.0F; + verts[2].x = x1; + verts[2].y = y1; + verts[2].z = z; + verts[2].s = tex->Sright; + verts[2].t = tex->Ttop; + verts[3].x = x0; + verts[3].y = y1; + verts[3].z = z; + verts[3].s = 0.0F; + verts[3].t = tex->Ttop; + + for (i = 0; i < 4; i++) { + verts[i].r = ctx->Current.RasterColor[0]; + verts[i].g = ctx->Current.RasterColor[1]; + verts[i].b = ctx->Current.RasterColor[2]; + verts[i].a = ctx->Current.RasterColor[3]; + } + + /* upload new vertex data */ + _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts); + } + + bitmap1 = _mesa_map_pbo_source(ctx, &unpackSave, bitmap1); + if (!bitmap1) + return; + + bitmap8 = (GLubyte *) _mesa_calloc(width * height); + if (bitmap8) { + _mesa_expand_bitmap(width, height, &unpackSave, bitmap1, + bitmap8, width, 0xff); + + _mesa_set_enable(ctx, tex->Target, GL_TRUE); + + _mesa_set_enable(ctx, GL_ALPHA_TEST, GL_TRUE); + _mesa_AlphaFunc(GL_GREATER, 0.0); + + setup_drawpix_texture(tex, newTex, texIntFormat, width, height, + GL_ALPHA, GL_UNSIGNED_BYTE, bitmap8); + + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_set_enable(ctx, tex->Target, GL_FALSE); + + _mesa_free(bitmap8); + } + + _mesa_unmap_pbo_source(ctx, &unpackSave); + + _mesa_meta_end(ctx); +} + + +void +_mesa_meta_GenerateMipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct gen_mipmap_state *mipmap = &ctx->Meta->Mipmap; + struct vertex { + GLfloat x, y, s, t, r; + }; + struct vertex verts[4]; + const GLuint baseLevel = texObj->BaseLevel; + const GLuint maxLevel = texObj->MaxLevel; + const GLenum minFilterSave = texObj->MinFilter; + const GLenum magFilterSave = texObj->MagFilter; + const GLuint fboSave = ctx->DrawBuffer->Name; + GLenum faceTarget; + GLuint level; + GLuint border = 0; + + /* check for fallbacks */ + if (!ctx->Extensions.EXT_framebuffer_object) { + _mesa_generate_mipmap(ctx, target, texObj); + return; + } + + if (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X && + target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z) { + faceTarget = target; + target = GL_TEXTURE_CUBE_MAP; + } + else { + faceTarget = target; + } + + _mesa_meta_begin(ctx, META_ALL); + + if (mipmap->ArrayObj == 0) { + /* one-time setup */ + + /* create vertex array object */ + _mesa_GenVertexArraysAPPLE(1, &mipmap->ArrayObj); + _mesa_BindVertexArrayAPPLE(mipmap->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &mipmap->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, mipmap->VBO); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), + NULL, GL_DYNAMIC_DRAW_ARB); + + /* setup vertex arrays */ + _mesa_VertexPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(x)); + _mesa_TexCoordPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(s)); + _mesa_EnableClientState(GL_VERTEX_ARRAY); + _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY); + } + else { + _mesa_BindVertexArray(mipmap->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, mipmap->VBO); + } + + if (!mipmap->FBO) { + /* Bind the new renderbuffer to the color attachment point. */ + _mesa_GenFramebuffersEXT(1, &mipmap->FBO); + } + + _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, mipmap->FBO); + + _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + _mesa_set_enable(ctx, target, GL_TRUE); + + /* setup texcoords once (XXX what about border?) */ + switch (faceTarget) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + break; + case GL_TEXTURE_2D: + verts[0].s = 0.0F; + verts[0].t = 0.0F; + verts[0].r = 0.0F; + verts[1].s = 1.0F; + verts[1].t = 0.0F; + verts[1].r = 0.0F; + verts[2].s = 1.0F; + verts[2].t = 1.0F; + verts[2].r = 0.0F; + verts[3].s = 0.0F; + verts[3].t = 1.0F; + verts[3].r = 0.0F; + break; + } + + + for (level = baseLevel + 1; level <= maxLevel; level++) { + const struct gl_texture_image *srcImage; + const GLuint srcLevel = level - 1; + GLsizei srcWidth, srcHeight; + GLsizei newWidth, newHeight; + GLenum status; + + srcImage = _mesa_select_tex_image(ctx, texObj, target, srcLevel); + assert(srcImage->Border == 0); /* XXX we can fix this */ + + srcWidth = srcImage->Width - 2 * border; + srcHeight = srcImage->Height - 2 * border; + + newWidth = MAX2(1, srcWidth / 2) + 2 * border; + newHeight = MAX2(1, srcHeight / 2) + 2 * border; + + if (newWidth == srcImage->Width && newHeight == srcImage->Height) { + break; + } + + /* Create empty image */ + _mesa_TexImage2D(GL_TEXTURE_2D, level, srcImage->InternalFormat, + newWidth, newHeight, border, + GL_RGBA, GL_UNSIGNED_BYTE, NULL); + + /* vertex positions */ + { + verts[0].x = 0.0F; + verts[0].y = 0.0F; + verts[1].x = (GLfloat) newWidth; + verts[1].y = 0.0F; + verts[2].x = (GLfloat) newWidth; + verts[2].y = (GLfloat) newHeight; + verts[3].x = 0.0F; + verts[3].y = (GLfloat) newHeight; + + /* upload new vertex data */ + _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts); + } + + /* limit sampling to src level */ + _mesa_TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, srcLevel); + _mesa_TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, srcLevel); + + /* Set to draw into the current level */ + _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, + GL_COLOR_ATTACHMENT0_EXT, + target, + texObj->Name, + level); + + /* Choose to render to the color attachment. */ + _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + + status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT); + if (status != GL_FRAMEBUFFER_COMPLETE_EXT) { + abort(); + break; + } + + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + } + + _mesa_meta_end(ctx); + + _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, minFilterSave); + _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, magFilterSave); + + /* restore (XXX add to meta_begin/end()? */ + _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fboSave); +} + + +/** + * Determine the GL data type to use for the temporary image read with + * ReadPixels() and passed to Tex[Sub]Image(). + */ +static GLenum +get_temp_image_type(GLcontext *ctx, GLenum baseFormat) +{ + switch (baseFormat) { + case GL_RGBA: + case GL_RGB: + case GL_ALPHA: + case GL_LUMINANCE: + case GL_LUMINANCE_ALPHA: + case GL_INTENSITY: + if (ctx->DrawBuffer->Visual.redBits <= 8) + return GL_UNSIGNED_BYTE; + else if (ctx->DrawBuffer->Visual.redBits <= 8) + return GL_UNSIGNED_SHORT; + else + return GL_FLOAT; + case GL_DEPTH_COMPONENT: + return GL_UNSIGNED_INT; + case GL_DEPTH_STENCIL: + return GL_UNSIGNED_INT_24_8; + default: + _mesa_problem(ctx, "Unexpected format in get_temp_image_type()"); + return 0; + } +} + + +/** + * Helper for _mesa_meta_CopyTexImage1/2D() functions. + * Have to be careful with locking and meta state for pixel transfer. + */ +static void +copy_tex_image(GLcontext *ctx, GLuint dims, GLenum target, GLint level, + GLenum internalFormat, GLint x, GLint y, + GLsizei width, GLsizei height, GLint border) +{ + struct gl_texture_unit *texUnit; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + GLsizei postConvWidth = width, postConvHeight = height; + GLenum format, type; + GLint bpp; + void *buf; + + texUnit = _mesa_get_current_tex_unit(ctx); + texObj = _mesa_select_tex_object(ctx, texUnit, target); + texImage = _mesa_get_tex_image(ctx, texObj, target, level); + + format = _mesa_base_tex_format(ctx, internalFormat); + type = get_temp_image_type(ctx, format); + bpp = _mesa_bytes_per_pixel(format, type); + if (bpp <= 0) { + _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()"); + return; + } + + /* + * Alloc image buffer (XXX could use a PBO) + */ + buf = _mesa_malloc(width * height * bpp); + if (!buf) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); + return; + } + + _mesa_unlock_texture(ctx, texObj); /* need to unlock first */ + + /* + * Read image from framebuffer (disable pixel transfer ops) + */ + _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + ctx->Driver.ReadPixels(ctx, x, y, width, height, + format, type, &ctx->Pack, buf); + _mesa_meta_end(ctx); + + /* + * Prepare for new texture image size/data + */ +#if FEATURE_convolve + if (_mesa_is_color_format(internalFormat)) { + _mesa_adjust_image_for_convolution(ctx, 2, + &postConvWidth, &postConvHeight); + } +#endif + + if (texImage->Data) { + ctx->Driver.FreeTexImageData(ctx, texImage); + } + + _mesa_init_teximage_fields(ctx, target, texImage, + postConvWidth, postConvHeight, 1, + border, internalFormat); + + /* + * Store texture data (with pixel transfer ops) + */ + _mesa_meta_begin(ctx, META_PIXEL_STORE); + + _mesa_update_state(ctx); /* to update pixel transfer state */ + + if (target == GL_TEXTURE_1D) { + ctx->Driver.TexImage1D(ctx, target, level, internalFormat, + width, border, format, type, + buf, &ctx->Unpack, texObj, texImage); + } + else { + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, format, type, + buf, &ctx->Unpack, texObj, texImage); + } + _mesa_meta_end(ctx); + + _mesa_lock_texture(ctx, texObj); /* re-lock */ + + _mesa_free(buf); +} + + +void +_mesa_meta_CopyTexImage1D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, GLint x, GLint y, + GLsizei width, GLint border) +{ + copy_tex_image(ctx, 1, target, level, internalFormat, x, y, + width, 1, border); +} + + +void +_mesa_meta_CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, GLint x, GLint y, + GLsizei width, GLsizei height, GLint border) +{ + copy_tex_image(ctx, 2, target, level, internalFormat, x, y, + width, height, border); +} + + + +/** + * Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions. + * Have to be careful with locking and meta state for pixel transfer. + */ +static void +copy_tex_sub_image(GLcontext *ctx, GLuint dims, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + struct gl_texture_unit *texUnit; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + GLenum format, type; + GLint bpp; + void *buf; + + texUnit = _mesa_get_current_tex_unit(ctx); + texObj = _mesa_select_tex_object(ctx, texUnit, target); + texImage = _mesa_select_tex_image(ctx, texObj, target, level); + + format = texImage->TexFormat->BaseFormat; + type = get_temp_image_type(ctx, format); + bpp = _mesa_bytes_per_pixel(format, type); + if (bpp <= 0) { + _mesa_problem(ctx, "Bad bpp in meta copy_tex_sub_image()"); + return; + } + + /* + * Alloc image buffer (XXX could use a PBO) + */ + buf = _mesa_malloc(width * height * bpp); + if (!buf) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage%uD", dims); + return; + } + + _mesa_unlock_texture(ctx, texObj); /* need to unlock first */ + + /* + * Read image from framebuffer (disable pixel transfer ops) + */ + _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + ctx->Driver.ReadPixels(ctx, x, y, width, height, + format, type, &ctx->Pack, buf); + _mesa_meta_end(ctx); + + _mesa_update_state(ctx); /* to update pixel transfer state */ + + /* + * Store texture data (with pixel transfer ops) + */ + _mesa_meta_begin(ctx, META_PIXEL_STORE); + if (target == GL_TEXTURE_1D) { + ctx->Driver.TexSubImage1D(ctx, target, level, xoffset, + width, format, type, buf, + &ctx->Unpack, texObj, texImage); + } + else if (target == GL_TEXTURE_3D) { + ctx->Driver.TexSubImage3D(ctx, target, level, xoffset, yoffset, zoffset, + width, height, 1, format, type, buf, + &ctx->Unpack, texObj, texImage); + } + else { + ctx->Driver.TexSubImage2D(ctx, target, level, xoffset, yoffset, + width, height, format, type, buf, + &ctx->Unpack, texObj, texImage); + } + _mesa_meta_end(ctx); + + _mesa_lock_texture(ctx, texObj); /* re-lock */ + + _mesa_free(buf); +} + + +void +_mesa_meta_CopyTexSubImage1D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, + GLint x, GLint y, GLsizei width) +{ + copy_tex_sub_image(ctx, 1, target, level, xoffset, 0, 0, + x, y, width, 1); +} + + +void +_mesa_meta_CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + copy_tex_sub_image(ctx, 2, target, level, xoffset, yoffset, 0, + x, y, width, height); +} + + +void +_mesa_meta_CopyTexSubImage3D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + copy_tex_sub_image(ctx, 3, target, level, xoffset, yoffset, zoffset, + x, y, width, height); +} + + +void +_mesa_meta_CopyColorTable(GLcontext *ctx, + GLenum target, GLenum internalformat, + GLint x, GLint y, GLsizei width) +{ + GLfloat *buf; + + buf = (GLfloat *) _mesa_malloc(width * 4 * sizeof(GLfloat)); + if (!buf) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyColorTable"); + return; + } + + /* + * Read image from framebuffer (disable pixel transfer ops) + */ + _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + ctx->Driver.ReadPixels(ctx, x, y, width, 1, + GL_RGBA, GL_FLOAT, &ctx->Pack, buf); + + _mesa_ColorTable(target, internalformat, width, GL_RGBA, GL_FLOAT, buf); + + _mesa_meta_end(ctx); + + _mesa_free(buf); +} + + +void +_mesa_meta_CopyColorSubTable(GLcontext *ctx,GLenum target, GLsizei start, + GLint x, GLint y, GLsizei width) +{ + GLfloat *buf; + + buf = (GLfloat *) _mesa_malloc(width * 4 * sizeof(GLfloat)); + if (!buf) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyColorSubTable"); + return; + } + + /* + * Read image from framebuffer (disable pixel transfer ops) + */ + _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + ctx->Driver.ReadPixels(ctx, x, y, width, 1, + GL_RGBA, GL_FLOAT, &ctx->Pack, buf); + + _mesa_ColorSubTable(target, start, width, GL_RGBA, GL_FLOAT, buf); + + _mesa_meta_end(ctx); + + _mesa_free(buf); +} + + +void +_mesa_meta_CopyConvolutionFilter1D(GLcontext *ctx, GLenum target, + GLenum internalFormat, + GLint x, GLint y, GLsizei width) +{ + GLfloat *buf; + + buf = (GLfloat *) _mesa_malloc(width * 4 * sizeof(GLfloat)); + if (!buf) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyConvolutionFilter2D"); + return; + } + + /* + * Read image from framebuffer (disable pixel transfer ops) + */ + _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + _mesa_update_state(ctx); + ctx->Driver.ReadPixels(ctx, x, y, width, 1, + GL_RGBA, GL_FLOAT, &ctx->Pack, buf); + + _mesa_ConvolutionFilter1D(target, internalFormat, width, + GL_RGBA, GL_FLOAT, buf); + + _mesa_meta_end(ctx); + + _mesa_free(buf); +} + + +void +_mesa_meta_CopyConvolutionFilter2D(GLcontext *ctx, GLenum target, + GLenum internalFormat, GLint x, GLint y, + GLsizei width, GLsizei height) +{ + GLfloat *buf; + + buf = (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat)); + if (!buf) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyConvolutionFilter2D"); + return; + } + + /* + * Read image from framebuffer (disable pixel transfer ops) + */ + _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + _mesa_update_state(ctx); + + ctx->Driver.ReadPixels(ctx, x, y, width, height, + GL_RGBA, GL_FLOAT, &ctx->Pack, buf); + + _mesa_ConvolutionFilter2D(target, internalFormat, width, height, + GL_RGBA, GL_FLOAT, buf); + + _mesa_meta_end(ctx); + + _mesa_free(buf); +} diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h new file mode 100644 index 0000000000..7f659528dc --- /dev/null +++ b/src/mesa/drivers/common/meta.h @@ -0,0 +1,114 @@ +/* + * Mesa 3-D graphics library + * Version: 7.6 + * + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef META_H +#define META_H + + +extern void +_mesa_meta_init(GLcontext *ctx); + +extern void +_mesa_meta_free(GLcontext *ctx); + +extern void +_mesa_meta_BlitFramebuffer(GLcontext *ctx, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter); + +extern void +_mesa_meta_Clear(GLcontext *ctx, GLbitfield buffers); + +extern void +_mesa_meta_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint dstx, GLint dsty, GLenum type); + +extern void +_mesa_meta_DrawPixels(GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels); + +extern void +_mesa_meta_Bitmap(GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap); + +extern void +_mesa_meta_GenerateMipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj); + +extern void +_mesa_meta_CopyTexImage1D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, GLint x, GLint y, + GLsizei width, GLint border); + +extern void +_mesa_meta_CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, GLint x, GLint y, + GLsizei width, GLsizei height, GLint border); + +extern void +_mesa_meta_CopyTexSubImage1D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, + GLint x, GLint y, GLsizei width); + +extern void +_mesa_meta_CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, + GLsizei width, GLsizei height); + +extern void +_mesa_meta_CopyTexSubImage3D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLint x, GLint y, + GLsizei width, GLsizei height); + +extern void +_mesa_meta_CopyColorTable(GLcontext *ctx, + GLenum target, GLenum internalformat, + GLint x, GLint y, GLsizei width); + +extern void +_mesa_meta_CopyColorSubTable(GLcontext *ctx,GLenum target, GLsizei start, + GLint x, GLint y, GLsizei width); + +extern void +_mesa_meta_CopyConvolutionFilter1D(GLcontext *ctx, GLenum target, + GLenum internalFormat, + GLint x, GLint y, GLsizei width); + +extern void +_mesa_meta_CopyConvolutionFilter2D(GLcontext *ctx, GLenum target, + GLenum internalFormat, GLint x, GLint y, + GLsizei width, GLsizei height); + + +#endif /* META_H */ diff --git a/src/mesa/drivers/dri/common/.gitignore b/src/mesa/drivers/dri/common/.gitignore new file mode 100644 index 0000000000..1edeb79fd1 --- /dev/null +++ b/src/mesa/drivers/dri/common/.gitignore @@ -0,0 +1 @@ +*.os diff --git a/src/mesa/drivers/dri/common/dri_metaops.c b/src/mesa/drivers/dri/common/dri_metaops.c index fe183c2e87..c7bea07dc9 100644 --- a/src/mesa/drivers/dri/common/dri_metaops.c +++ b/src/mesa/drivers/dri/common/dri_metaops.c @@ -287,247 +287,6 @@ meta_restore_texcoords(struct dri_metaops *meta) } -/** - * Perform glClear where mask contains only color, depth, and/or stencil. - * - * The implementation is based on calling into Mesa to set GL state and - * performing normal triangle rendering. The intent of this path is to - * have as generic a path as possible, so that any driver could make use of - * it. - */ - -/** - * Per-context one-time init of things for intl_clear_tris(). - * Basically set up a private array object for vertex/color arrays. - */ -static void -meta_init_clear(struct dri_metaops *meta) -{ - GLcontext *ctx = meta->ctx; - struct gl_array_object *arraySave = NULL; - const GLuint arrayBuffer = ctx->Array.ArrayBufferObj->Name; - const GLuint elementBuffer = ctx->Array.ElementArrayBufferObj->Name; - - /* create new array object */ - meta->clear.arrayObj = _mesa_new_array_object(ctx, ~0); - - /* save current array object, bind new one */ - _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj); - ctx->NewState |= _NEW_ARRAY; - ctx->Array.NewState |= _NEW_ARRAY_ALL; - _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, meta->clear.arrayObj); - - /* one-time setup of vertex arrays (pos, color) */ - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); - _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0); - _mesa_ColorPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), meta->clear.color); - _mesa_VertexPointer(3, GL_FLOAT, 3 * sizeof(GLfloat), meta->clear.vertices); - _mesa_Enable(GL_COLOR_ARRAY); - _mesa_Enable(GL_VERTEX_ARRAY); - - /* restore original array object */ - ctx->NewState |= _NEW_ARRAY; - ctx->Array.NewState |= _NEW_ARRAY_ALL; - _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave); - _mesa_reference_array_object(ctx, &arraySave, NULL); - - /* restore original buffer objects */ - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, arrayBuffer); - _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuffer); -} - - - -/** - * Perform glClear where mask contains only color, depth, and/or stencil. - * - * The implementation is based on calling into Mesa to set GL state and - * performing normal triangle rendering. The intent of this path is to - * have as generic a path as possible, so that any driver could make use of - * it. - */ -void -meta_clear_tris(struct dri_metaops *meta, GLbitfield mask) -{ - GLcontext *ctx = meta->ctx; - GLfloat dst_z; - struct gl_framebuffer *fb = ctx->DrawBuffer; - int i; - GLboolean saved_fp_enable = GL_FALSE, saved_vp_enable = GL_FALSE; - GLuint saved_shader_program = 0; - unsigned int saved_active_texture; - struct gl_array_object *arraySave = NULL; - - if (!meta->clear.arrayObj) - meta_init_clear(meta); - - assert((mask & ~(TRI_CLEAR_COLOR_BITS | BUFFER_BIT_DEPTH | - BUFFER_BIT_STENCIL)) == 0); - - _mesa_PushAttrib(GL_COLOR_BUFFER_BIT | - GL_DEPTH_BUFFER_BIT | - GL_ENABLE_BIT | - GL_POLYGON_BIT | - GL_STENCIL_BUFFER_BIT | - GL_TRANSFORM_BIT | - GL_CURRENT_BIT | - GL_VIEWPORT_BIT); - saved_active_texture = ctx->Texture.CurrentUnit; - - /* Disable existing GL state we don't want to apply to a clear. */ - _mesa_Disable(GL_ALPHA_TEST); - _mesa_Disable(GL_BLEND); - _mesa_Disable(GL_CULL_FACE); - _mesa_Disable(GL_FOG); - _mesa_Disable(GL_POLYGON_SMOOTH); - _mesa_Disable(GL_POLYGON_STIPPLE); - _mesa_Disable(GL_POLYGON_OFFSET_FILL); - _mesa_Disable(GL_LIGHTING); - _mesa_Disable(GL_CLIP_PLANE0); - _mesa_Disable(GL_CLIP_PLANE1); - _mesa_Disable(GL_CLIP_PLANE2); - _mesa_Disable(GL_CLIP_PLANE3); - _mesa_Disable(GL_CLIP_PLANE4); - _mesa_Disable(GL_CLIP_PLANE5); - _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL); - if (ctx->Extensions.ARB_fragment_program && ctx->FragmentProgram.Enabled) { - saved_fp_enable = GL_TRUE; - _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); - } - if (ctx->Extensions.ARB_vertex_program && ctx->VertexProgram.Enabled) { - saved_vp_enable = GL_TRUE; - _mesa_Disable(GL_VERTEX_PROGRAM_ARB); - } - if (ctx->Extensions.ARB_shader_objects && ctx->Shader.CurrentProgram) { - saved_shader_program = ctx->Shader.CurrentProgram->Name; - _mesa_UseProgramObjectARB(0); - } - - if (ctx->Texture._EnabledUnits != 0) { - int i; - - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - _mesa_ActiveTextureARB(GL_TEXTURE0 + i); - _mesa_Disable(GL_TEXTURE_1D); - _mesa_Disable(GL_TEXTURE_2D); - _mesa_Disable(GL_TEXTURE_3D); - if (ctx->Extensions.ARB_texture_cube_map) - _mesa_Disable(GL_TEXTURE_CUBE_MAP_ARB); - if (ctx->Extensions.NV_texture_rectangle) - _mesa_Disable(GL_TEXTURE_RECTANGLE_NV); - if (ctx->Extensions.MESA_texture_array) { - _mesa_Disable(GL_TEXTURE_1D_ARRAY_EXT); - _mesa_Disable(GL_TEXTURE_2D_ARRAY_EXT); - } - } - } - - /* save current array object, bind our private one */ - _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj); - ctx->NewState |= _NEW_ARRAY; - ctx->Array.NewState |= _NEW_ARRAY_ALL; - _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, meta->clear.arrayObj); - - meta_set_passthrough_transform(meta); - - for (i = 0; i < 4; i++) { - COPY_4FV(meta->clear.color[i], ctx->Color.ClearColor); - } - - /* convert clear Z from [0,1] to NDC coord in [-1,1] */ - dst_z = -1.0 + 2.0 * ctx->Depth.Clear; - - /* The ClearDepth value is unaffected by DepthRange, so do a default - * mapping. - */ - _mesa_DepthRange(0.0, 1.0); - - /* Prepare the vertices, which are the same regardless of which buffer we're - * drawing to. - */ - meta->clear.vertices[0][0] = fb->_Xmin; - meta->clear.vertices[0][1] = fb->_Ymin; - meta->clear.vertices[0][2] = dst_z; - meta->clear.vertices[1][0] = fb->_Xmax; - meta->clear.vertices[1][1] = fb->_Ymin; - meta->clear.vertices[1][2] = dst_z; - meta->clear.vertices[2][0] = fb->_Xmax; - meta->clear.vertices[2][1] = fb->_Ymax; - meta->clear.vertices[2][2] = dst_z; - meta->clear.vertices[3][0] = fb->_Xmin; - meta->clear.vertices[3][1] = fb->_Ymax; - meta->clear.vertices[3][2] = dst_z; - - while (mask != 0) { - GLuint this_mask = 0; - GLuint color_bit; - - color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS); - if (color_bit != 0) - this_mask |= (1 << (color_bit - 1)); - - /* Clear depth/stencil in the same pass as color. */ - this_mask |= (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)); - - /* Select the current color buffer and use the color write mask if - * we have one, otherwise don't write any color channels. - */ - if (this_mask & BUFFER_BIT_FRONT_LEFT) - _mesa_DrawBuffer(GL_FRONT_LEFT); - else if (this_mask & BUFFER_BIT_BACK_LEFT) - _mesa_DrawBuffer(GL_BACK_LEFT); - else if (color_bit != 0) - _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0 + - (color_bit - BUFFER_COLOR0 - 1)); - else - _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - - /* Control writing of the depth clear value to depth. */ - if (this_mask & BUFFER_BIT_DEPTH) { - _mesa_DepthFunc(GL_ALWAYS); - _mesa_Enable(GL_DEPTH_TEST); - } else { - _mesa_Disable(GL_DEPTH_TEST); - _mesa_DepthMask(GL_FALSE); - } - - /* Control writing of the stencil clear value to stencil. */ - if (this_mask & BUFFER_BIT_STENCIL) { - _mesa_Enable(GL_STENCIL_TEST); - _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, - GL_REPLACE, GL_REPLACE, GL_REPLACE); - _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, - ctx->Stencil.Clear & 0x7fffffff, - ctx->Stencil.WriteMask[0]); - } else { - _mesa_Disable(GL_STENCIL_TEST); - } - - _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); - - mask &= ~this_mask; - } - - meta_restore_transform(meta); - - _mesa_ActiveTextureARB(GL_TEXTURE0 + saved_active_texture); - if (saved_fp_enable) - _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB); - if (saved_vp_enable) - _mesa_Enable(GL_VERTEX_PROGRAM_ARB); - - if (saved_shader_program) - _mesa_UseProgramObjectARB(saved_shader_program); - - _mesa_PopAttrib(); - - /* restore current array object */ - ctx->NewState |= _NEW_ARRAY; - ctx->Array.NewState |= _NEW_ARRAY_ALL; - _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave); - _mesa_reference_array_object(ctx, &arraySave, NULL); -} - void meta_init_metaops(GLcontext *ctx, struct dri_metaops *meta) { meta->ctx = ctx; @@ -535,7 +294,5 @@ void meta_init_metaops(GLcontext *ctx, struct dri_metaops *meta) void meta_destroy_metaops(struct dri_metaops *meta) { - if (meta->clear.arrayObj) - _mesa_delete_array_object(meta->ctx, meta->clear.arrayObj); } diff --git a/src/mesa/drivers/dri/common/dri_metaops.h b/src/mesa/drivers/dri/common/dri_metaops.h index bb4079d535..2487145326 100644 --- a/src/mesa/drivers/dri/common/dri_metaops.h +++ b/src/mesa/drivers/dri/common/dri_metaops.h @@ -25,25 +25,10 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ + #ifndef DRI_METAOPS_H #define DRI_METAOPS_H -#define TRI_CLEAR_COLOR_BITS (BUFFER_BIT_BACK_LEFT | \ - BUFFER_BIT_FRONT_LEFT | \ - BUFFER_BIT_COLOR0 | \ - BUFFER_BIT_COLOR1 | \ - BUFFER_BIT_COLOR2 | \ - BUFFER_BIT_COLOR3 | \ - BUFFER_BIT_COLOR4 | \ - BUFFER_BIT_COLOR5 | \ - BUFFER_BIT_COLOR6 | \ - BUFFER_BIT_COLOR7) - -struct dri_meta_clear { - struct gl_array_object *arrayObj; - GLfloat vertices[4][3]; - GLfloat color[4][4]; -}; struct dri_metaops { GLcontext *ctx; @@ -69,8 +54,6 @@ struct dri_metaops { GLint saved_vp_x, saved_vp_y; GLsizei saved_vp_width, saved_vp_height; GLenum saved_matrix_mode; - - struct dri_meta_clear clear; }; @@ -91,9 +74,8 @@ void meta_restore_fragment_program(struct dri_metaops *meta); void meta_set_default_texrect(struct dri_metaops *meta); void meta_restore_texcoords(struct dri_metaops *meta); -void meta_clear_tris(struct dri_metaops *meta, GLbitfield mask); void meta_init_metaops(GLcontext *ctx, struct dri_metaops *meta); void meta_destroy_metaops(struct dri_metaops *meta); -#endif +#endif diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 1d940603fa..e48e10d7c0 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -778,7 +778,7 @@ dri2CreateNewScreen(int scrn, int fd, if (driDriverAPI.InitScreen2 == NULL) return NULL; - psp = _mesa_malloc(sizeof(*psp)); + psp = _mesa_calloc(sizeof(*psp)); if (!psp) return NULL; diff --git a/src/mesa/drivers/dri/common/extension_helper.h b/src/mesa/drivers/dri/common/extension_helper.h index e308fd2831..5e86324eec 100644 --- a/src/mesa/drivers/dri/common/extension_helper.h +++ b/src/mesa/drivers/dri/common/extension_helper.h @@ -33,7 +33,7 @@ #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char UniformMatrix3fvARB_names[] = +static const char UniformMatrix3fvARB_names[] = "iiip\0" /* Parameter signature */ "glUniformMatrix3fv\0" "glUniformMatrix3fvARB\0" @@ -41,7 +41,7 @@ static const char UniformMatrix3fvARB_names[] = #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_multisample) -static const char SampleCoverageARB_names[] = +static const char SampleCoverageARB_names[] = "fi\0" /* Parameter signature */ "glSampleCoverage\0" "glSampleCoverageARB\0" @@ -49,7 +49,7 @@ static const char SampleCoverageARB_names[] = #endif #if defined(need_GL_EXT_convolution) -static const char ConvolutionFilter1D_names[] = +static const char ConvolutionFilter1D_names[] = "iiiiip\0" /* Parameter signature */ "glConvolutionFilter1D\0" "glConvolutionFilter1DEXT\0" @@ -57,7 +57,7 @@ static const char ConvolutionFilter1D_names[] = #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_occlusion_query) -static const char BeginQueryARB_names[] = +static const char BeginQueryARB_names[] = "ii\0" /* Parameter signature */ "glBeginQuery\0" "glBeginQueryARB\0" @@ -65,7 +65,7 @@ static const char BeginQueryARB_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_NV_point_sprite) -static const char PointParameteriNV_names[] = +static const char PointParameteriNV_names[] = "ii\0" /* Parameter signature */ "glPointParameteri\0" "glPointParameteriNV\0" @@ -73,14 +73,14 @@ static const char PointParameteriNV_names[] = #endif #if defined(need_GL_VERSION_2_0) -static const char GetProgramiv_names[] = +static const char GetProgramiv_names[] = "iip\0" /* Parameter signature */ "glGetProgramiv\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord3sARB_names[] = +static const char MultiTexCoord3sARB_names[] = "iiii\0" /* Parameter signature */ "glMultiTexCoord3s\0" "glMultiTexCoord3sARB\0" @@ -88,7 +88,7 @@ static const char MultiTexCoord3sARB_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3iEXT_names[] = +static const char SecondaryColor3iEXT_names[] = "iii\0" /* Parameter signature */ "glSecondaryColor3i\0" "glSecondaryColor3iEXT\0" @@ -96,7 +96,7 @@ static const char SecondaryColor3iEXT_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos3fMESA_names[] = +static const char WindowPos3fMESA_names[] = "fff\0" /* Parameter signature */ "glWindowPos3f\0" "glWindowPos3fARB\0" @@ -105,14 +105,14 @@ static const char WindowPos3fMESA_names[] = #endif #if defined(need_GL_SGIS_pixel_texture) -static const char PixelTexGenParameterfvSGIS_names[] = +static const char PixelTexGenParameterfvSGIS_names[] = "ip\0" /* Parameter signature */ "glPixelTexGenParameterfvSGIS\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char ActiveTextureARB_names[] = +static const char ActiveTextureARB_names[] = "i\0" /* Parameter signature */ "glActiveTexture\0" "glActiveTextureARB\0" @@ -120,7 +120,7 @@ static const char ActiveTextureARB_names[] = #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_blit) -static const char BlitFramebufferEXT_names[] = +static const char BlitFramebufferEXT_names[] = "iiiiiiiiii\0" /* Parameter signature */ "glBlitFramebuffer\0" "glBlitFramebufferEXT\0" @@ -128,21 +128,21 @@ static const char BlitFramebufferEXT_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib4ubvNV_names[] = +static const char VertexAttrib4ubvNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4ubvNV\0" ""; #endif #if defined(need_GL_NV_fragment_program) -static const char GetProgramNamedParameterdvNV_names[] = +static const char GetProgramNamedParameterdvNV_names[] = "iipp\0" /* Parameter signature */ "glGetProgramNamedParameterdvNV\0" ""; #endif #if defined(need_GL_EXT_histogram) -static const char Histogram_names[] = +static const char Histogram_names[] = "iiii\0" /* Parameter signature */ "glHistogram\0" "glHistogramEXT\0" @@ -150,14 +150,14 @@ static const char Histogram_names[] = #endif #if defined(need_GL_SGIS_texture4D) -static const char TexImage4DSGIS_names[] = +static const char TexImage4DSGIS_names[] = "iiiiiiiiiip\0" /* Parameter signature */ "glTexImage4DSGIS\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos2dvMESA_names[] = +static const char WindowPos2dvMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos2dv\0" "glWindowPos2dvARB\0" @@ -166,14 +166,14 @@ static const char WindowPos2dvMESA_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiColor3fVertex3fvSUN_names[] = +static const char ReplacementCodeuiColor3fVertex3fvSUN_names[] = "ppp\0" /* Parameter signature */ "glReplacementCodeuiColor3fVertex3fvSUN\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_EXT_blend_equation_separate) || defined(need_GL_ATI_blend_equation_separate) -static const char BlendEquationSeparateEXT_names[] = +static const char BlendEquationSeparateEXT_names[] = "ii\0" /* Parameter signature */ "glBlendEquationSeparate\0" "glBlendEquationSeparateEXT\0" @@ -182,14 +182,14 @@ static const char BlendEquationSeparateEXT_names[] = #endif #if defined(need_GL_SGIX_list_priority) -static const char ListParameterfSGIX_names[] = +static const char ListParameterfSGIX_names[] = "iif\0" /* Parameter signature */ "glListParameterfSGIX\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3bEXT_names[] = +static const char SecondaryColor3bEXT_names[] = "iii\0" /* Parameter signature */ "glSecondaryColor3b\0" "glSecondaryColor3bEXT\0" @@ -197,21 +197,21 @@ static const char SecondaryColor3bEXT_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord4fColor4fNormal3fVertex4fvSUN_names[] = +static const char TexCoord4fColor4fNormal3fVertex4fvSUN_names[] = "pppp\0" /* Parameter signature */ "glTexCoord4fColor4fNormal3fVertex4fvSUN\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib4svNV_names[] = +static const char VertexAttrib4svNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4svNV\0" ""; #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) -static const char GetBufferSubDataARB_names[] = +static const char GetBufferSubDataARB_names[] = "iiip\0" /* Parameter signature */ "glGetBufferSubData\0" "glGetBufferSubDataARB\0" @@ -219,7 +219,7 @@ static const char GetBufferSubDataARB_names[] = #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) -static const char BufferSubDataARB_names[] = +static const char BufferSubDataARB_names[] = "iiip\0" /* Parameter signature */ "glBufferSubData\0" "glBufferSubDataARB\0" @@ -227,21 +227,21 @@ static const char BufferSubDataARB_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord2fColor4ubVertex3fvSUN_names[] = +static const char TexCoord2fColor4ubVertex3fvSUN_names[] = "ppp\0" /* Parameter signature */ "glTexCoord2fColor4ubVertex3fvSUN\0" ""; #endif #if defined(need_GL_VERSION_2_0) -static const char AttachShader_names[] = +static const char AttachShader_names[] = "ii\0" /* Parameter signature */ "glAttachShader\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib2fARB_names[] = +static const char VertexAttrib2fARB_names[] = "iff\0" /* Parameter signature */ "glVertexAttrib2f\0" "glVertexAttrib2fARB\0" @@ -249,14 +249,14 @@ static const char VertexAttrib2fARB_names[] = #endif #if defined(need_GL_MESA_shader_debug) -static const char GetDebugLogLengthMESA_names[] = +static const char GetDebugLogLengthMESA_names[] = "iii\0" /* Parameter signature */ "glGetDebugLogLengthMESA\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib3fARB_names[] = +static const char VertexAttrib3fARB_names[] = "ifff\0" /* Parameter signature */ "glVertexAttrib3f\0" "glVertexAttrib3fARB\0" @@ -264,7 +264,7 @@ static const char VertexAttrib3fARB_names[] = #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_occlusion_query) -static const char GetQueryivARB_names[] = +static const char GetQueryivARB_names[] = "iip\0" /* Parameter signature */ "glGetQueryiv\0" "glGetQueryivARB\0" @@ -272,7 +272,7 @@ static const char GetQueryivARB_names[] = #endif #if defined(need_GL_EXT_texture3D) -static const char TexImage3D_names[] = +static const char TexImage3D_names[] = "iiiiiiiiip\0" /* Parameter signature */ "glTexImage3D\0" "glTexImage3DEXT\0" @@ -280,14 +280,14 @@ static const char TexImage3D_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiVertex3fvSUN_names[] = +static const char ReplacementCodeuiVertex3fvSUN_names[] = "pp\0" /* Parameter signature */ "glReplacementCodeuiVertex3fvSUN\0" ""; #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_occlusion_query) -static const char GetQueryObjectivARB_names[] = +static const char GetQueryObjectivARB_names[] = "iip\0" /* Parameter signature */ "glGetQueryObjectiv\0" "glGetQueryObjectivARB\0" @@ -295,14 +295,14 @@ static const char GetQueryObjectivARB_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiTexCoord2fVertex3fvSUN_names[] = +static const char ReplacementCodeuiTexCoord2fVertex3fvSUN_names[] = "ppp\0" /* Parameter signature */ "glReplacementCodeuiTexCoord2fVertex3fvSUN\0" ""; #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_texture_compression) -static const char CompressedTexSubImage2DARB_names[] = +static const char CompressedTexSubImage2DARB_names[] = "iiiiiiiip\0" /* Parameter signature */ "glCompressedTexSubImage2D\0" "glCompressedTexSubImage2DARB\0" @@ -310,14 +310,21 @@ static const char CompressedTexSubImage2DARB_names[] = #endif #if defined(need_GL_NV_register_combiners) -static const char CombinerOutputNV_names[] = +static const char CombinerOutputNV_names[] = "iiiiiiiiii\0" /* Parameter signature */ "glCombinerOutputNV\0" ""; #endif +#if defined(need_GL_NV_vertex_program) +static const char VertexAttribs3fvNV_names[] = + "iip\0" /* Parameter signature */ + "glVertexAttribs3fvNV\0" + ""; +#endif + #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform2fARB_names[] = +static const char Uniform2fARB_names[] = "iff\0" /* Parameter signature */ "glUniform2f\0" "glUniform2fARB\0" @@ -325,7 +332,7 @@ static const char Uniform2fARB_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib1svARB_names[] = +static const char VertexAttrib1svARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib1sv\0" "glVertexAttrib1svARB\0" @@ -333,14 +340,14 @@ static const char VertexAttrib1svARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs1dvNV_names[] = +static const char VertexAttribs1dvNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs1dvNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform2ivARB_names[] = +static const char Uniform2ivARB_names[] = "iip\0" /* Parameter signature */ "glUniform2iv\0" "glUniform2ivARB\0" @@ -348,28 +355,28 @@ static const char Uniform2ivARB_names[] = #endif #if defined(need_GL_HP_image_transform) -static const char GetImageTransformParameterfvHP_names[] = +static const char GetImageTransformParameterfvHP_names[] = "iip\0" /* Parameter signature */ "glGetImageTransformParameterfvHP\0" ""; #endif #if defined(need_GL_ARB_vertex_blend) -static const char WeightubvARB_names[] = +static const char WeightubvARB_names[] = "ip\0" /* Parameter signature */ "glWeightubvARB\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib1fvNV_names[] = +static const char VertexAttrib1fvNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib1fvNV\0" ""; #endif #if defined(need_GL_EXT_convolution) -static const char CopyConvolutionFilter1D_names[] = +static const char CopyConvolutionFilter1D_names[] = "iiiii\0" /* Parameter signature */ "glCopyConvolutionFilter1D\0" "glCopyConvolutionFilter1DEXT\0" @@ -377,21 +384,28 @@ static const char CopyConvolutionFilter1D_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiNormal3fVertex3fSUN_names[] = +static const char ReplacementCodeuiNormal3fVertex3fSUN_names[] = "iffffff\0" /* Parameter signature */ "glReplacementCodeuiNormal3fVertex3fSUN\0" ""; #endif +#if defined(need_GL_ARB_sync) +static const char DeleteSync_names[] = + "i\0" /* Parameter signature */ + "glDeleteSync\0" + ""; +#endif + #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentMaterialfvSGIX_names[] = +static const char FragmentMaterialfvSGIX_names[] = "iip\0" /* Parameter signature */ "glFragmentMaterialfvSGIX\0" ""; #endif #if defined(need_GL_EXT_blend_color) -static const char BlendColor_names[] = +static const char BlendColor_names[] = "ffff\0" /* Parameter signature */ "glBlendColor\0" "glBlendColorEXT\0" @@ -399,7 +413,7 @@ static const char BlendColor_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char UniformMatrix4fvARB_names[] = +static const char UniformMatrix4fvARB_names[] = "iiip\0" /* Parameter signature */ "glUniformMatrix4fv\0" "glUniformMatrix4fvARB\0" @@ -407,7 +421,7 @@ static const char UniformMatrix4fvARB_names[] = #endif #if defined(need_GL_ARB_vertex_array_object) || defined(need_GL_APPLE_vertex_array_object) -static const char DeleteVertexArraysAPPLE_names[] = +static const char DeleteVertexArraysAPPLE_names[] = "ip\0" /* Parameter signature */ "glDeleteVertexArrays\0" "glDeleteVertexArraysAPPLE\0" @@ -415,28 +429,28 @@ static const char DeleteVertexArraysAPPLE_names[] = #endif #if defined(need_GL_SGIX_instruments) -static const char ReadInstrumentsSGIX_names[] = +static const char ReadInstrumentsSGIX_names[] = "i\0" /* Parameter signature */ "glReadInstrumentsSGIX\0" ""; #endif #if defined(need_GL_VERSION_2_1) -static const char UniformMatrix2x4fv_names[] = +static const char UniformMatrix2x4fv_names[] = "iiip\0" /* Parameter signature */ "glUniformMatrix2x4fv\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char Color4ubVertex3fvSUN_names[] = +static const char Color4ubVertex3fvSUN_names[] = "pp\0" /* Parameter signature */ "glColor4ubVertex3fvSUN\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_texture_array) -static const char FramebufferTextureLayerEXT_names[] = +static const char FramebufferTextureLayerEXT_names[] = "iiiii\0" /* Parameter signature */ "glFramebufferTextureLayer\0" "glFramebufferTextureLayerEXT\0" @@ -444,14 +458,14 @@ static const char FramebufferTextureLayerEXT_names[] = #endif #if defined(need_GL_SGIX_list_priority) -static const char GetListParameterfvSGIX_names[] = +static const char GetListParameterfvSGIX_names[] = "iip\0" /* Parameter signature */ "glGetListParameterfvSGIX\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4NusvARB_names[] = +static const char VertexAttrib4NusvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4Nusv\0" "glVertexAttrib4NusvARB\0" @@ -459,35 +473,35 @@ static const char VertexAttrib4NusvARB_names[] = #endif #if defined(need_GL_MESA_window_pos) -static const char WindowPos4svMESA_names[] = +static const char WindowPos4svMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos4svMESA\0" ""; #endif #if defined(need_GL_ARB_shader_objects) -static const char CreateProgramObjectARB_names[] = +static const char CreateProgramObjectARB_names[] = "\0" /* Parameter signature */ "glCreateProgramObjectARB\0" ""; #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentLightModelivSGIX_names[] = +static const char FragmentLightModelivSGIX_names[] = "ip\0" /* Parameter signature */ "glFragmentLightModelivSGIX\0" ""; #endif #if defined(need_GL_VERSION_2_1) -static const char UniformMatrix4x3fv_names[] = +static const char UniformMatrix4x3fv_names[] = "iiip\0" /* Parameter signature */ "glUniformMatrix4x3fv\0" ""; #endif #if defined(need_GL_EXT_texture_object) -static const char PrioritizeTextures_names[] = +static const char PrioritizeTextures_names[] = "ipp\0" /* Parameter signature */ "glPrioritizeTextures\0" "glPrioritizeTexturesEXT\0" @@ -495,28 +509,28 @@ static const char PrioritizeTextures_names[] = #endif #if defined(need_GL_SGIX_async) -static const char AsyncMarkerSGIX_names[] = +static const char AsyncMarkerSGIX_names[] = "i\0" /* Parameter signature */ "glAsyncMarkerSGIX\0" ""; #endif #if defined(need_GL_SUN_global_alpha) -static const char GlobalAlphaFactorubSUN_names[] = +static const char GlobalAlphaFactorubSUN_names[] = "i\0" /* Parameter signature */ "glGlobalAlphaFactorubSUN\0" ""; #endif #if defined(need_GL_MESA_shader_debug) -static const char ClearDebugLogMESA_names[] = +static const char ClearDebugLogMESA_names[] = "iii\0" /* Parameter signature */ "glClearDebugLogMESA\0" ""; #endif #if defined(need_GL_EXT_histogram) -static const char ResetHistogram_names[] = +static const char ResetHistogram_names[] = "i\0" /* Parameter signature */ "glResetHistogram\0" "glResetHistogramEXT\0" @@ -524,14 +538,14 @@ static const char ResetHistogram_names[] = #endif #if defined(need_GL_NV_fragment_program) -static const char GetProgramNamedParameterfvNV_names[] = +static const char GetProgramNamedParameterfvNV_names[] = "iipp\0" /* Parameter signature */ "glGetProgramNamedParameterfvNV\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_point_parameters) || defined(need_GL_EXT_point_parameters) || defined(need_GL_SGIS_point_parameters) -static const char PointParameterfEXT_names[] = +static const char PointParameterfEXT_names[] = "if\0" /* Parameter signature */ "glPointParameterf\0" "glPointParameterfARB\0" @@ -541,35 +555,42 @@ static const char PointParameterfEXT_names[] = #endif #if defined(need_GL_SGIX_polynomial_ffd) -static const char LoadIdentityDeformationMapSGIX_names[] = +static const char LoadIdentityDeformationMapSGIX_names[] = "i\0" /* Parameter signature */ "glLoadIdentityDeformationMapSGIX\0" ""; #endif #if defined(need_GL_NV_fence) -static const char GenFencesNV_names[] = +static const char GenFencesNV_names[] = "ip\0" /* Parameter signature */ "glGenFencesNV\0" ""; #endif #if defined(need_GL_HP_image_transform) -static const char ImageTransformParameterfHP_names[] = +static const char ImageTransformParameterfHP_names[] = "iif\0" /* Parameter signature */ "glImageTransformParameterfHP\0" ""; #endif #if defined(need_GL_ARB_matrix_palette) -static const char MatrixIndexusvARB_names[] = +static const char MatrixIndexusvARB_names[] = "ip\0" /* Parameter signature */ "glMatrixIndexusvARB\0" ""; #endif +#if defined(need_GL_ARB_draw_elements_base_vertex) +static const char DrawElementsBaseVertex_names[] = + "iiipi\0" /* Parameter signature */ + "glDrawElementsBaseVertex\0" + ""; +#endif + #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char DisableVertexAttribArrayARB_names[] = +static const char DisableVertexAttribArrayARB_names[] = "i\0" /* Parameter signature */ "glDisableVertexAttribArray\0" "glDisableVertexAttribArrayARB\0" @@ -577,21 +598,21 @@ static const char DisableVertexAttribArrayARB_names[] = #endif #if defined(need_GL_VERSION_2_0) -static const char StencilMaskSeparate_names[] = +static const char StencilMaskSeparate_names[] = "ii\0" /* Parameter signature */ "glStencilMaskSeparate\0" ""; #endif #if defined(need_GL_ARB_vertex_program) -static const char ProgramLocalParameter4dARB_names[] = +static const char ProgramLocalParameter4dARB_names[] = "iidddd\0" /* Parameter signature */ "glProgramLocalParameter4dARB\0" ""; #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_texture_compression) -static const char CompressedTexImage3DARB_names[] = +static const char CompressedTexImage3DARB_names[] = "iiiiiiiip\0" /* Parameter signature */ "glCompressedTexImage3D\0" "glCompressedTexImage3DARB\0" @@ -599,7 +620,7 @@ static const char CompressedTexImage3DARB_names[] = #endif #if defined(need_GL_EXT_convolution) -static const char GetConvolutionParameteriv_names[] = +static const char GetConvolutionParameteriv_names[] = "iip\0" /* Parameter signature */ "glGetConvolutionParameteriv\0" "glGetConvolutionParameterivEXT\0" @@ -607,7 +628,7 @@ static const char GetConvolutionParameteriv_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib1fARB_names[] = +static const char VertexAttrib1fARB_names[] = "if\0" /* Parameter signature */ "glVertexAttrib1f\0" "glVertexAttrib1fARB\0" @@ -615,14 +636,14 @@ static const char VertexAttrib1fARB_names[] = #endif #if defined(need_GL_NV_fence) -static const char TestFenceNV_names[] = +static const char TestFenceNV_names[] = "i\0" /* Parameter signature */ "glTestFenceNV\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord1fvARB_names[] = +static const char MultiTexCoord1fvARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord1fv\0" "glMultiTexCoord1fvARB\0" @@ -630,56 +651,56 @@ static const char MultiTexCoord1fvARB_names[] = #endif #if defined(need_GL_ATI_fragment_shader) -static const char ColorFragmentOp2ATI_names[] = +static const char ColorFragmentOp2ATI_names[] = "iiiiiiiiii\0" /* Parameter signature */ "glColorFragmentOp2ATI\0" ""; #endif #if defined(need_GL_IBM_vertex_array_lists) -static const char SecondaryColorPointerListIBM_names[] = +static const char SecondaryColorPointerListIBM_names[] = "iiipi\0" /* Parameter signature */ "glSecondaryColorPointerListIBM\0" ""; #endif #if defined(need_GL_SGIS_pixel_texture) -static const char GetPixelTexGenParameterivSGIS_names[] = +static const char GetPixelTexGenParameterivSGIS_names[] = "ip\0" /* Parameter signature */ "glGetPixelTexGenParameterivSGIS\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib4fNV_names[] = +static const char VertexAttrib4fNV_names[] = "iffff\0" /* Parameter signature */ "glVertexAttrib4fNV\0" ""; #endif #if defined(need_GL_SUN_triangle_list) -static const char ReplacementCodeubSUN_names[] = +static const char ReplacementCodeubSUN_names[] = "i\0" /* Parameter signature */ "glReplacementCodeubSUN\0" ""; #endif #if defined(need_GL_SGIX_async) -static const char FinishAsyncSGIX_names[] = +static const char FinishAsyncSGIX_names[] = "p\0" /* Parameter signature */ "glFinishAsyncSGIX\0" ""; #endif #if defined(need_GL_MESA_shader_debug) -static const char GetDebugLogMESA_names[] = +static const char GetDebugLogMESA_names[] = "iiiipp\0" /* Parameter signature */ "glGetDebugLogMESA\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_fog_coord) -static const char FogCoorddEXT_names[] = +static const char FogCoorddEXT_names[] = "d\0" /* Parameter signature */ "glFogCoordd\0" "glFogCoorddEXT\0" @@ -687,14 +708,14 @@ static const char FogCoorddEXT_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char Color4ubVertex3fSUN_names[] = +static const char Color4ubVertex3fSUN_names[] = "iiiifff\0" /* Parameter signature */ "glColor4ubVertex3fSUN\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_fog_coord) -static const char FogCoordfEXT_names[] = +static const char FogCoordfEXT_names[] = "f\0" /* Parameter signature */ "glFogCoordf\0" "glFogCoordfEXT\0" @@ -702,35 +723,35 @@ static const char FogCoordfEXT_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord2fVertex3fSUN_names[] = +static const char TexCoord2fVertex3fSUN_names[] = "fffff\0" /* Parameter signature */ "glTexCoord2fVertex3fSUN\0" ""; #endif #if defined(need_GL_SUN_global_alpha) -static const char GlobalAlphaFactoriSUN_names[] = +static const char GlobalAlphaFactoriSUN_names[] = "i\0" /* Parameter signature */ "glGlobalAlphaFactoriSUN\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib2dNV_names[] = +static const char VertexAttrib2dNV_names[] = "idd\0" /* Parameter signature */ "glVertexAttrib2dNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) -static const char GetProgramInfoLog_names[] = +static const char GetProgramInfoLog_names[] = "iipp\0" /* Parameter signature */ "glGetProgramInfoLog\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4NbvARB_names[] = +static const char VertexAttrib4NbvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4Nbv\0" "glVertexAttrib4NbvARB\0" @@ -738,7 +759,7 @@ static const char VertexAttrib4NbvARB_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_shader) -static const char GetActiveAttribARB_names[] = +static const char GetActiveAttribARB_names[] = "iiipppp\0" /* Parameter signature */ "glGetActiveAttrib\0" "glGetActiveAttribARB\0" @@ -746,84 +767,91 @@ static const char GetActiveAttribARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib4ubNV_names[] = +static const char VertexAttrib4ubNV_names[] = "iiiii\0" /* Parameter signature */ "glVertexAttrib4ubNV\0" ""; #endif +#if defined(need_GL_APPLE_texture_range) +static const char TextureRangeAPPLE_names[] = + "iip\0" /* Parameter signature */ + "glTextureRangeAPPLE\0" + ""; +#endif + #if defined(need_GL_SUN_vertex) -static const char TexCoord2fColor4fNormal3fVertex3fSUN_names[] = +static const char TexCoord2fColor4fNormal3fVertex3fSUN_names[] = "ffffffffffff\0" /* Parameter signature */ "glTexCoord2fColor4fNormal3fVertex3fSUN\0" ""; #endif #if defined(need_GL_NV_register_combiners) -static const char CombinerParameterfvNV_names[] = +static const char CombinerParameterfvNV_names[] = "ip\0" /* Parameter signature */ "glCombinerParameterfvNV\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs3dvNV_names[] = +static const char VertexAttribs3dvNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs3dvNV\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs4fvNV_names[] = +static const char VertexAttribs4fvNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs4fvNV\0" ""; #endif #if defined(need_GL_NV_vertex_array_range) -static const char VertexArrayRangeNV_names[] = +static const char VertexArrayRangeNV_names[] = "ip\0" /* Parameter signature */ "glVertexArrayRangeNV\0" ""; #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentLightiSGIX_names[] = +static const char FragmentLightiSGIX_names[] = "iii\0" /* Parameter signature */ "glFragmentLightiSGIX\0" ""; #endif #if defined(need_GL_EXT_polygon_offset) -static const char PolygonOffsetEXT_names[] = +static const char PolygonOffsetEXT_names[] = "ff\0" /* Parameter signature */ "glPolygonOffsetEXT\0" ""; #endif #if defined(need_GL_SGIX_async) -static const char PollAsyncSGIX_names[] = +static const char PollAsyncSGIX_names[] = "p\0" /* Parameter signature */ "glPollAsyncSGIX\0" ""; #endif #if defined(need_GL_ATI_fragment_shader) -static const char DeleteFragmentShaderATI_names[] = +static const char DeleteFragmentShaderATI_names[] = "i\0" /* Parameter signature */ "glDeleteFragmentShaderATI\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord2fNormal3fVertex3fvSUN_names[] = +static const char TexCoord2fNormal3fVertex3fvSUN_names[] = "ppp\0" /* Parameter signature */ "glTexCoord2fNormal3fVertex3fvSUN\0" ""; #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_transpose_matrix) -static const char MultTransposeMatrixdARB_names[] = +static const char MultTransposeMatrixdARB_names[] = "p\0" /* Parameter signature */ "glMultTransposeMatrixd\0" "glMultTransposeMatrixdARB\0" @@ -831,7 +859,7 @@ static const char MultTransposeMatrixdARB_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos2svMESA_names[] = +static const char WindowPos2svMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos2sv\0" "glWindowPos2svARB\0" @@ -840,7 +868,7 @@ static const char WindowPos2svMESA_names[] = #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_texture_compression) -static const char CompressedTexImage1DARB_names[] = +static const char CompressedTexImage1DARB_names[] = "iiiiiip\0" /* Parameter signature */ "glCompressedTexImage1D\0" "glCompressedTexImage1DARB\0" @@ -848,35 +876,35 @@ static const char CompressedTexImage1DARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib2sNV_names[] = +static const char VertexAttrib2sNV_names[] = "iii\0" /* Parameter signature */ "glVertexAttrib2sNV\0" ""; #endif #if defined(need_GL_IBM_vertex_array_lists) -static const char NormalPointerListIBM_names[] = +static const char NormalPointerListIBM_names[] = "iipi\0" /* Parameter signature */ "glNormalPointerListIBM\0" ""; #endif #if defined(need_GL_EXT_vertex_array) -static const char IndexPointerEXT_names[] = +static const char IndexPointerEXT_names[] = "iiip\0" /* Parameter signature */ "glIndexPointerEXT\0" ""; #endif #if defined(need_GL_EXT_vertex_array) -static const char NormalPointerEXT_names[] = +static const char NormalPointerEXT_names[] = "iiip\0" /* Parameter signature */ "glNormalPointerEXT\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord3dARB_names[] = +static const char MultiTexCoord3dARB_names[] = "iddd\0" /* Parameter signature */ "glMultiTexCoord3d\0" "glMultiTexCoord3dARB\0" @@ -884,7 +912,7 @@ static const char MultiTexCoord3dARB_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord2iARB_names[] = +static const char MultiTexCoord2iARB_names[] = "iii\0" /* Parameter signature */ "glMultiTexCoord2i\0" "glMultiTexCoord2iARB\0" @@ -892,14 +920,14 @@ static const char MultiTexCoord2iARB_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiTexCoord2fNormal3fVertex3fSUN_names[] = +static const char ReplacementCodeuiTexCoord2fNormal3fVertex3fSUN_names[] = "iffffffff\0" /* Parameter signature */ "glReplacementCodeuiTexCoord2fNormal3fVertex3fSUN\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord2svARB_names[] = +static const char MultiTexCoord2svARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord2sv\0" "glMultiTexCoord2svARB\0" @@ -907,14 +935,14 @@ static const char MultiTexCoord2svARB_names[] = #endif #if defined(need_GL_SUN_triangle_list) -static const char ReplacementCodeubvSUN_names[] = +static const char ReplacementCodeubvSUN_names[] = "p\0" /* Parameter signature */ "glReplacementCodeubvSUN\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform3iARB_names[] = +static const char Uniform3iARB_names[] = "iiii\0" /* Parameter signature */ "glUniform3i\0" "glUniform3iARB\0" @@ -922,49 +950,49 @@ static const char Uniform3iARB_names[] = #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char GetFragmentMaterialfvSGIX_names[] = +static const char GetFragmentMaterialfvSGIX_names[] = "iip\0" /* Parameter signature */ "glGetFragmentMaterialfvSGIX\0" ""; #endif #if defined(need_GL_VERSION_2_0) -static const char GetShaderInfoLog_names[] = +static const char GetShaderInfoLog_names[] = "iipp\0" /* Parameter signature */ "glGetShaderInfoLog\0" ""; #endif #if defined(need_GL_ARB_vertex_blend) -static const char WeightivARB_names[] = +static const char WeightivARB_names[] = "ip\0" /* Parameter signature */ "glWeightivARB\0" ""; #endif #if defined(need_GL_SGIX_instruments) -static const char PollInstrumentsSGIX_names[] = +static const char PollInstrumentsSGIX_names[] = "p\0" /* Parameter signature */ "glPollInstrumentsSGIX\0" ""; #endif #if defined(need_GL_SUN_global_alpha) -static const char GlobalAlphaFactordSUN_names[] = +static const char GlobalAlphaFactordSUN_names[] = "d\0" /* Parameter signature */ "glGlobalAlphaFactordSUN\0" ""; #endif -#if defined(need_GL_NV_vertex_program) -static const char VertexAttribs3fvNV_names[] = +#if defined(need_GL_NV_register_combiners) +static const char GetFinalCombinerInputParameterfvNV_names[] = "iip\0" /* Parameter signature */ - "glVertexAttribs3fvNV\0" + "glGetFinalCombinerInputParameterfvNV\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char GenerateMipmapEXT_names[] = +static const char GenerateMipmapEXT_names[] = "i\0" /* Parameter signature */ "glGenerateMipmap\0" "glGenerateMipmapEXT\0" @@ -972,35 +1000,35 @@ static const char GenerateMipmapEXT_names[] = #endif #if defined(need_GL_ATI_fragment_shader) -static const char SetFragmentShaderConstantATI_names[] = +static const char SetFragmentShaderConstantATI_names[] = "ip\0" /* Parameter signature */ "glSetFragmentShaderConstantATI\0" ""; #endif #if defined(need_GL_NV_evaluators) -static const char GetMapAttribParameterivNV_names[] = +static const char GetMapAttribParameterivNV_names[] = "iiip\0" /* Parameter signature */ "glGetMapAttribParameterivNV\0" ""; #endif #if defined(need_GL_ARB_shader_objects) -static const char CreateShaderObjectARB_names[] = +static const char CreateShaderObjectARB_names[] = "i\0" /* Parameter signature */ "glCreateShaderObjectARB\0" ""; #endif #if defined(need_GL_SGIS_sharpen_texture) -static const char GetSharpenTexFuncSGIS_names[] = +static const char GetSharpenTexFuncSGIS_names[] = "ip\0" /* Parameter signature */ "glGetSharpenTexFuncSGIS\0" ""; #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) -static const char BufferDataARB_names[] = +static const char BufferDataARB_names[] = "iipi\0" /* Parameter signature */ "glBufferData\0" "glBufferDataARB\0" @@ -1008,42 +1036,42 @@ static const char BufferDataARB_names[] = #endif #if defined(need_GL_NV_vertex_array_range) -static const char FlushVertexArrayRangeNV_names[] = +static const char FlushVertexArrayRangeNV_names[] = "\0" /* Parameter signature */ "glFlushVertexArrayRangeNV\0" ""; #endif #if defined(need_GL_ATI_fragment_shader) -static const char SampleMapATI_names[] = +static const char SampleMapATI_names[] = "iii\0" /* Parameter signature */ "glSampleMapATI\0" ""; #endif #if defined(need_GL_EXT_vertex_array) -static const char VertexPointerEXT_names[] = +static const char VertexPointerEXT_names[] = "iiiip\0" /* Parameter signature */ "glVertexPointerEXT\0" ""; #endif #if defined(need_GL_SGIS_texture_filter4) -static const char GetTexFilterFuncSGIS_names[] = +static const char GetTexFilterFuncSGIS_names[] = "iip\0" /* Parameter signature */ "glGetTexFilterFuncSGIS\0" ""; #endif #if defined(need_GL_NV_register_combiners) -static const char GetCombinerOutputParameterfvNV_names[] = +static const char GetCombinerOutputParameterfvNV_names[] = "iiip\0" /* Parameter signature */ "glGetCombinerOutputParameterfvNV\0" ""; #endif #if defined(need_GL_EXT_subtexture) -static const char TexSubImage1D_names[] = +static const char TexSubImage1D_names[] = "iiiiiip\0" /* Parameter signature */ "glTexSubImage1D\0" "glTexSubImage1DEXT\0" @@ -1051,36 +1079,43 @@ static const char TexSubImage1D_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib1sARB_names[] = +static const char VertexAttrib1sARB_names[] = "ii\0" /* Parameter signature */ "glVertexAttrib1s\0" "glVertexAttrib1sARB\0" ""; #endif +#if defined(need_GL_ARB_sync) +static const char FenceSync_names[] = + "ii\0" /* Parameter signature */ + "glFenceSync\0" + ""; +#endif + #if defined(need_GL_NV_register_combiners) -static const char FinalCombinerInputNV_names[] = +static const char FinalCombinerInputNV_names[] = "iiii\0" /* Parameter signature */ "glFinalCombinerInputNV\0" ""; #endif #if defined(need_GL_SGIX_flush_raster) -static const char FlushRasterSGIX_names[] = +static const char FlushRasterSGIX_names[] = "\0" /* Parameter signature */ "glFlushRasterSGIX\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiTexCoord2fVertex3fSUN_names[] = +static const char ReplacementCodeuiTexCoord2fVertex3fSUN_names[] = "ifffff\0" /* Parameter signature */ "glReplacementCodeuiTexCoord2fVertex3fSUN\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform1fARB_names[] = +static const char Uniform1fARB_names[] = "if\0" /* Parameter signature */ "glUniform1f\0" "glUniform1fARB\0" @@ -1088,7 +1123,7 @@ static const char Uniform1fARB_names[] = #endif #if defined(need_GL_EXT_texture_object) -static const char AreTexturesResident_names[] = +static const char AreTexturesResident_names[] = "ipp\0" /* Parameter signature */ "glAreTexturesResident\0" "glAreTexturesResidentEXT\0" @@ -1096,7 +1131,7 @@ static const char AreTexturesResident_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ATI_separate_stencil) -static const char StencilOpSeparate_names[] = +static const char StencilOpSeparate_names[] = "iiii\0" /* Parameter signature */ "glStencilOpSeparate\0" "glStencilOpSeparateATI\0" @@ -1104,7 +1139,7 @@ static const char StencilOpSeparate_names[] = #endif #if defined(need_GL_SGI_color_table) -static const char ColorTableParameteriv_names[] = +static const char ColorTableParameteriv_names[] = "iip\0" /* Parameter signature */ "glColorTableParameteriv\0" "glColorTableParameterivSGI\0" @@ -1112,14 +1147,14 @@ static const char ColorTableParameteriv_names[] = #endif #if defined(need_GL_IBM_vertex_array_lists) -static const char FogCoordPointerListIBM_names[] = +static const char FogCoordPointerListIBM_names[] = "iipi\0" /* Parameter signature */ "glFogCoordPointerListIBM\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos3dMESA_names[] = +static const char WindowPos3dMESA_names[] = "ddd\0" /* Parameter signature */ "glWindowPos3d\0" "glWindowPos3dARB\0" @@ -1128,7 +1163,7 @@ static const char WindowPos3dMESA_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_point_parameters) || defined(need_GL_EXT_point_parameters) || defined(need_GL_SGIS_point_parameters) -static const char PointParameterfvEXT_names[] = +static const char PointParameterfvEXT_names[] = "ip\0" /* Parameter signature */ "glPointParameterfv\0" "glPointParameterfvARB\0" @@ -1138,7 +1173,7 @@ static const char PointParameterfvEXT_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos2fvMESA_names[] = +static const char WindowPos2fvMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos2fv\0" "glWindowPos2fvARB\0" @@ -1147,7 +1182,7 @@ static const char WindowPos2fvMESA_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3bvEXT_names[] = +static const char SecondaryColor3bvEXT_names[] = "p\0" /* Parameter signature */ "glSecondaryColor3bv\0" "glSecondaryColor3bvEXT\0" @@ -1155,35 +1190,35 @@ static const char SecondaryColor3bvEXT_names[] = #endif #if defined(need_GL_IBM_vertex_array_lists) -static const char VertexPointerListIBM_names[] = +static const char VertexPointerListIBM_names[] = "iiipi\0" /* Parameter signature */ "glVertexPointerListIBM\0" ""; #endif #if defined(need_GL_ARB_vertex_program) -static const char GetProgramLocalParameterfvARB_names[] = +static const char GetProgramLocalParameterfvARB_names[] = "iip\0" /* Parameter signature */ "glGetProgramLocalParameterfvARB\0" ""; #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentMaterialfSGIX_names[] = +static const char FragmentMaterialfSGIX_names[] = "iif\0" /* Parameter signature */ "glFragmentMaterialfSGIX\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord2fNormal3fVertex3fSUN_names[] = +static const char TexCoord2fNormal3fVertex3fSUN_names[] = "ffffffff\0" /* Parameter signature */ "glTexCoord2fNormal3fVertex3fSUN\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char RenderbufferStorageEXT_names[] = +static const char RenderbufferStorageEXT_names[] = "iiii\0" /* Parameter signature */ "glRenderbufferStorage\0" "glRenderbufferStorageEXT\0" @@ -1191,28 +1226,28 @@ static const char RenderbufferStorageEXT_names[] = #endif #if defined(need_GL_NV_fence) -static const char IsFenceNV_names[] = +static const char IsFenceNV_names[] = "i\0" /* Parameter signature */ "glIsFenceNV\0" ""; #endif #if defined(need_GL_ARB_shader_objects) -static const char AttachObjectARB_names[] = +static const char AttachObjectARB_names[] = "ii\0" /* Parameter signature */ "glAttachObjectARB\0" ""; #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char GetFragmentLightivSGIX_names[] = +static const char GetFragmentLightivSGIX_names[] = "iip\0" /* Parameter signature */ "glGetFragmentLightivSGIX\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char UniformMatrix2fvARB_names[] = +static const char UniformMatrix2fvARB_names[] = "iiip\0" /* Parameter signature */ "glUniformMatrix2fv\0" "glUniformMatrix2fvARB\0" @@ -1220,7 +1255,7 @@ static const char UniformMatrix2fvARB_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord2fARB_names[] = +static const char MultiTexCoord2fARB_names[] = "iff\0" /* Parameter signature */ "glMultiTexCoord2f\0" "glMultiTexCoord2fARB\0" @@ -1228,7 +1263,7 @@ static const char MultiTexCoord2fARB_names[] = #endif #if defined(need_GL_SGI_color_table) || defined(need_GL_EXT_paletted_texture) -static const char ColorTable_names[] = +static const char ColorTable_names[] = "iiiiip\0" /* Parameter signature */ "glColorTable\0" "glColorTableSGI\0" @@ -1237,14 +1272,14 @@ static const char ColorTable_names[] = #endif #if defined(need_GL_NV_evaluators) -static const char MapControlPointsNV_names[] = +static const char MapControlPointsNV_names[] = "iiiiiiiip\0" /* Parameter signature */ "glMapControlPointsNV\0" ""; #endif #if defined(need_GL_EXT_convolution) -static const char ConvolutionFilter2D_names[] = +static const char ConvolutionFilter2D_names[] = "iiiiiip\0" /* Parameter signature */ "glConvolutionFilter2D\0" "glConvolutionFilter2DEXT\0" @@ -1252,14 +1287,14 @@ static const char ConvolutionFilter2D_names[] = #endif #if defined(need_GL_NV_evaluators) -static const char MapParameterfvNV_names[] = +static const char MapParameterfvNV_names[] = "iip\0" /* Parameter signature */ "glMapParameterfvNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib3dvARB_names[] = +static const char VertexAttrib3dvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib3dv\0" "glVertexAttrib3dvARB\0" @@ -1267,14 +1302,14 @@ static const char VertexAttrib3dvARB_names[] = #endif #if defined(need_GL_PGI_misc_hints) -static const char HintPGI_names[] = +static const char HintPGI_names[] = "ii\0" /* Parameter signature */ "glHintPGI\0" ""; #endif #if defined(need_GL_EXT_convolution) -static const char ConvolutionParameteriv_names[] = +static const char ConvolutionParameteriv_names[] = "iip\0" /* Parameter signature */ "glConvolutionParameteriv\0" "glConvolutionParameterivEXT\0" @@ -1282,28 +1317,28 @@ static const char ConvolutionParameteriv_names[] = #endif #if defined(need_GL_EXT_cull_vertex) -static const char CullParameterdvEXT_names[] = +static const char CullParameterdvEXT_names[] = "ip\0" /* Parameter signature */ "glCullParameterdvEXT\0" ""; #endif #if defined(need_GL_NV_fragment_program) -static const char ProgramNamedParameter4fNV_names[] = +static const char ProgramNamedParameter4fNV_names[] = "iipffff\0" /* Parameter signature */ "glProgramNamedParameter4fNV\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char Color3fVertex3fSUN_names[] = +static const char Color3fVertex3fSUN_names[] = "ffffff\0" /* Parameter signature */ "glColor3fVertex3fSUN\0" ""; #endif #if defined(need_GL_ARB_vertex_program) || defined(need_GL_NV_vertex_program) -static const char ProgramEnvParameter4fvARB_names[] = +static const char ProgramEnvParameter4fvARB_names[] = "iip\0" /* Parameter signature */ "glProgramEnvParameter4fvARB\0" "glProgramParameter4fvNV\0" @@ -1311,14 +1346,14 @@ static const char ProgramEnvParameter4fvARB_names[] = #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentLightModeliSGIX_names[] = +static const char FragmentLightModeliSGIX_names[] = "ii\0" /* Parameter signature */ "glFragmentLightModeliSGIX\0" ""; #endif #if defined(need_GL_EXT_convolution) -static const char ConvolutionParameterfv_names[] = +static const char ConvolutionParameterfv_names[] = "iip\0" /* Parameter signature */ "glConvolutionParameterfv\0" "glConvolutionParameterfvEXT\0" @@ -1326,35 +1361,42 @@ static const char ConvolutionParameterfv_names[] = #endif #if defined(need_GL_3DFX_tbuffer) -static const char TbufferMask3DFX_names[] = +static const char TbufferMask3DFX_names[] = "i\0" /* Parameter signature */ "glTbufferMask3DFX\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char LoadProgramNV_names[] = +static const char LoadProgramNV_names[] = "iiip\0" /* Parameter signature */ "glLoadProgramNV\0" ""; #endif +#if defined(need_GL_ARB_sync) +static const char WaitSync_names[] = + "iii\0" /* Parameter signature */ + "glWaitSync\0" + ""; +#endif + #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib4fvNV_names[] = +static const char VertexAttrib4fvNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4fvNV\0" ""; #endif #if defined(need_GL_ARB_shader_objects) -static const char GetAttachedObjectsARB_names[] = +static const char GetAttachedObjectsARB_names[] = "iipp\0" /* Parameter signature */ "glGetAttachedObjectsARB\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform3fvARB_names[] = +static const char Uniform3fvARB_names[] = "iip\0" /* Parameter signature */ "glUniform3fv\0" "glUniform3fvARB\0" @@ -1362,7 +1404,7 @@ static const char Uniform3fvARB_names[] = #endif #if defined(need_GL_EXT_draw_range_elements) -static const char DrawRangeElements_names[] = +static const char DrawRangeElements_names[] = "iiiiip\0" /* Parameter signature */ "glDrawRangeElements\0" "glDrawRangeElementsEXT\0" @@ -1370,14 +1412,14 @@ static const char DrawRangeElements_names[] = #endif #if defined(need_GL_SGIX_sprite) -static const char SpriteParameterfvSGIX_names[] = +static const char SpriteParameterfvSGIX_names[] = "ip\0" /* Parameter signature */ "glSpriteParameterfvSGIX\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char CheckFramebufferStatusEXT_names[] = +static const char CheckFramebufferStatusEXT_names[] = "i\0" /* Parameter signature */ "glCheckFramebufferStatus\0" "glCheckFramebufferStatusEXT\0" @@ -1385,21 +1427,21 @@ static const char CheckFramebufferStatusEXT_names[] = #endif #if defined(need_GL_SUN_global_alpha) -static const char GlobalAlphaFactoruiSUN_names[] = +static const char GlobalAlphaFactoruiSUN_names[] = "i\0" /* Parameter signature */ "glGlobalAlphaFactoruiSUN\0" ""; #endif #if defined(need_GL_ARB_shader_objects) -static const char GetHandleARB_names[] = +static const char GetHandleARB_names[] = "i\0" /* Parameter signature */ "glGetHandleARB\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char GetVertexAttribivARB_names[] = +static const char GetVertexAttribivARB_names[] = "iip\0" /* Parameter signature */ "glGetVertexAttribiv\0" "glGetVertexAttribivARB\0" @@ -1407,21 +1449,21 @@ static const char GetVertexAttribivARB_names[] = #endif #if defined(need_GL_NV_register_combiners) -static const char GetCombinerInputParameterfvNV_names[] = +static const char GetCombinerInputParameterfvNV_names[] = "iiiip\0" /* Parameter signature */ "glGetCombinerInputParameterfvNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) -static const char CreateProgram_names[] = +static const char CreateProgram_names[] = "\0" /* Parameter signature */ "glCreateProgram\0" ""; #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_transpose_matrix) -static const char LoadTransposeMatrixdARB_names[] = +static const char LoadTransposeMatrixdARB_names[] = "p\0" /* Parameter signature */ "glLoadTransposeMatrixd\0" "glLoadTransposeMatrixdARB\0" @@ -1429,7 +1471,7 @@ static const char LoadTransposeMatrixdARB_names[] = #endif #if defined(need_GL_EXT_histogram) -static const char GetMinmax_names[] = +static const char GetMinmax_names[] = "iiiip\0" /* Parameter signature */ "glGetMinmax\0" "glGetMinmaxEXT\0" @@ -1437,14 +1479,14 @@ static const char GetMinmax_names[] = #endif #if defined(need_GL_VERSION_2_0) -static const char StencilFuncSeparate_names[] = +static const char StencilFuncSeparate_names[] = "iiii\0" /* Parameter signature */ "glStencilFuncSeparate\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3sEXT_names[] = +static const char SecondaryColor3sEXT_names[] = "iii\0" /* Parameter signature */ "glSecondaryColor3s\0" "glSecondaryColor3sEXT\0" @@ -1452,28 +1494,28 @@ static const char SecondaryColor3sEXT_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char Color3fVertex3fvSUN_names[] = +static const char Color3fVertex3fvSUN_names[] = "pp\0" /* Parameter signature */ "glColor3fVertex3fvSUN\0" ""; #endif #if defined(need_GL_SUN_global_alpha) -static const char GlobalAlphaFactorbSUN_names[] = +static const char GlobalAlphaFactorbSUN_names[] = "i\0" /* Parameter signature */ "glGlobalAlphaFactorbSUN\0" ""; #endif #if defined(need_GL_HP_image_transform) -static const char ImageTransformParameterfvHP_names[] = +static const char ImageTransformParameterfvHP_names[] = "iip\0" /* Parameter signature */ "glImageTransformParameterfvHP\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4ivARB_names[] = +static const char VertexAttrib4ivARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4iv\0" "glVertexAttrib4ivARB\0" @@ -1481,28 +1523,28 @@ static const char VertexAttrib4ivARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib3fNV_names[] = +static const char VertexAttrib3fNV_names[] = "ifff\0" /* Parameter signature */ "glVertexAttrib3fNV\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs2dvNV_names[] = +static const char VertexAttribs2dvNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs2dvNV\0" ""; #endif #if defined(need_GL_EXT_timer_query) -static const char GetQueryObjectui64vEXT_names[] = +static const char GetQueryObjectui64vEXT_names[] = "iip\0" /* Parameter signature */ "glGetQueryObjectui64vEXT\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord3fvARB_names[] = +static const char MultiTexCoord3fvARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord3fv\0" "glMultiTexCoord3fvARB\0" @@ -1510,7 +1552,7 @@ static const char MultiTexCoord3fvARB_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3dEXT_names[] = +static const char SecondaryColor3dEXT_names[] = "ddd\0" /* Parameter signature */ "glSecondaryColor3d\0" "glSecondaryColor3dEXT\0" @@ -1518,42 +1560,42 @@ static const char SecondaryColor3dEXT_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char GetProgramParameterfvNV_names[] = +static const char GetProgramParameterfvNV_names[] = "iiip\0" /* Parameter signature */ "glGetProgramParameterfvNV\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char TangentPointerEXT_names[] = +static const char TangentPointerEXT_names[] = "iip\0" /* Parameter signature */ "glTangentPointerEXT\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char Color4fNormal3fVertex3fvSUN_names[] = +static const char Color4fNormal3fVertex3fvSUN_names[] = "ppp\0" /* Parameter signature */ "glColor4fNormal3fVertex3fvSUN\0" ""; #endif #if defined(need_GL_SGIX_instruments) -static const char GetInstrumentsSGIX_names[] = +static const char GetInstrumentsSGIX_names[] = "\0" /* Parameter signature */ "glGetInstrumentsSGIX\0" ""; #endif #if defined(need_GL_NV_evaluators) -static const char EvalMapsNV_names[] = +static const char EvalMapsNV_names[] = "ii\0" /* Parameter signature */ "glEvalMapsNV\0" ""; #endif #if defined(need_GL_EXT_subtexture) -static const char TexSubImage2D_names[] = +static const char TexSubImage2D_names[] = "iiiiiiiip\0" /* Parameter signature */ "glTexSubImage2D\0" "glTexSubImage2DEXT\0" @@ -1561,21 +1603,28 @@ static const char TexSubImage2D_names[] = #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentLightivSGIX_names[] = +static const char FragmentLightivSGIX_names[] = "iip\0" /* Parameter signature */ "glFragmentLightivSGIX\0" ""; #endif +#if defined(need_GL_APPLE_texture_range) +static const char GetTexParameterPointervAPPLE_names[] = + "iip\0" /* Parameter signature */ + "glGetTexParameterPointervAPPLE\0" + ""; +#endif + #if defined(need_GL_EXT_pixel_transform) -static const char PixelTransformParameterfvEXT_names[] = +static const char PixelTransformParameterfvEXT_names[] = "iip\0" /* Parameter signature */ "glPixelTransformParameterfvEXT\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4bvARB_names[] = +static const char VertexAttrib4bvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4bv\0" "glVertexAttrib4bvARB\0" @@ -1583,14 +1632,14 @@ static const char VertexAttrib4bvARB_names[] = #endif #if defined(need_GL_ATI_fragment_shader) -static const char AlphaFragmentOp2ATI_names[] = +static const char AlphaFragmentOp2ATI_names[] = "iiiiiiiii\0" /* Parameter signature */ "glAlphaFragmentOp2ATI\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord4sARB_names[] = +static const char MultiTexCoord4sARB_names[] = "iiiii\0" /* Parameter signature */ "glMultiTexCoord4s\0" "glMultiTexCoord4sARB\0" @@ -1598,28 +1647,28 @@ static const char MultiTexCoord4sARB_names[] = #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char GetFragmentMaterialivSGIX_names[] = +static const char GetFragmentMaterialivSGIX_names[] = "iip\0" /* Parameter signature */ "glGetFragmentMaterialivSGIX\0" ""; #endif #if defined(need_GL_MESA_window_pos) -static const char WindowPos4dMESA_names[] = +static const char WindowPos4dMESA_names[] = "dddd\0" /* Parameter signature */ "glWindowPos4dMESA\0" ""; #endif #if defined(need_GL_ARB_vertex_blend) -static const char WeightPointerARB_names[] = +static const char WeightPointerARB_names[] = "iiip\0" /* Parameter signature */ "glWeightPointerARB\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos2dMESA_names[] = +static const char WindowPos2dMESA_names[] = "dd\0" /* Parameter signature */ "glWindowPos2d\0" "glWindowPos2dARB\0" @@ -1628,7 +1677,7 @@ static const char WindowPos2dMESA_names[] = #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char FramebufferTexture3DEXT_names[] = +static const char FramebufferTexture3DEXT_names[] = "iiiiii\0" /* Parameter signature */ "glFramebufferTexture3D\0" "glFramebufferTexture3DEXT\0" @@ -1636,7 +1685,7 @@ static const char FramebufferTexture3DEXT_names[] = #endif #if defined(need_GL_EXT_blend_minmax) -static const char BlendEquation_names[] = +static const char BlendEquation_names[] = "i\0" /* Parameter signature */ "glBlendEquation\0" "glBlendEquationEXT\0" @@ -1644,14 +1693,14 @@ static const char BlendEquation_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib3dNV_names[] = +static const char VertexAttrib3dNV_names[] = "iddd\0" /* Parameter signature */ "glVertexAttrib3dNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib3dARB_names[] = +static const char VertexAttrib3dARB_names[] = "iddd\0" /* Parameter signature */ "glVertexAttrib3d\0" "glVertexAttrib3dARB\0" @@ -1659,14 +1708,14 @@ static const char VertexAttrib3dARB_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN_names[] = +static const char ReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN_names[] = "ppppp\0" /* Parameter signature */ "glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4fARB_names[] = +static const char VertexAttrib4fARB_names[] = "iffff\0" /* Parameter signature */ "glVertexAttrib4f\0" "glVertexAttrib4fARB\0" @@ -1674,14 +1723,14 @@ static const char VertexAttrib4fARB_names[] = #endif #if defined(need_GL_EXT_index_func) -static const char IndexFuncEXT_names[] = +static const char IndexFuncEXT_names[] = "if\0" /* Parameter signature */ "glIndexFuncEXT\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char FramebufferTexture2DEXT_names[] = +static const char FramebufferTexture2DEXT_names[] = "iiiii\0" /* Parameter signature */ "glFramebufferTexture2D\0" "glFramebufferTexture2DEXT\0" @@ -1689,7 +1738,7 @@ static const char FramebufferTexture2DEXT_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord2dvARB_names[] = +static const char MultiTexCoord2dvARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord2dv\0" "glMultiTexCoord2dvARB\0" @@ -1697,21 +1746,21 @@ static const char MultiTexCoord2dvARB_names[] = #endif #if defined(need_GL_EXT_cull_vertex) -static const char CullParameterfvEXT_names[] = +static const char CullParameterfvEXT_names[] = "ip\0" /* Parameter signature */ "glCullParameterfvEXT\0" ""; #endif #if defined(need_GL_NV_fragment_program) -static const char ProgramNamedParameter4fvNV_names[] = +static const char ProgramNamedParameter4fvNV_names[] = "iipp\0" /* Parameter signature */ "glProgramNamedParameter4fvNV\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColorPointerEXT_names[] = +static const char SecondaryColorPointerEXT_names[] = "iiip\0" /* Parameter signature */ "glSecondaryColorPointer\0" "glSecondaryColorPointerEXT\0" @@ -1719,7 +1768,7 @@ static const char SecondaryColorPointerEXT_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4fvARB_names[] = +static const char VertexAttrib4fvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4fv\0" "glVertexAttrib4fvARB\0" @@ -1727,14 +1776,14 @@ static const char VertexAttrib4fvARB_names[] = #endif #if defined(need_GL_IBM_vertex_array_lists) -static const char ColorPointerListIBM_names[] = +static const char ColorPointerListIBM_names[] = "iiipi\0" /* Parameter signature */ "glColorPointerListIBM\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char GetActiveUniformARB_names[] = +static const char GetActiveUniformARB_names[] = "iiipppp\0" /* Parameter signature */ "glGetActiveUniform\0" "glGetActiveUniformARB\0" @@ -1742,14 +1791,14 @@ static const char GetActiveUniformARB_names[] = #endif #if defined(need_GL_HP_image_transform) -static const char ImageTransformParameteriHP_names[] = +static const char ImageTransformParameteriHP_names[] = "iii\0" /* Parameter signature */ "glImageTransformParameteriHP\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord1svARB_names[] = +static const char MultiTexCoord1svARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord1sv\0" "glMultiTexCoord1svARB\0" @@ -1757,7 +1806,7 @@ static const char MultiTexCoord1svARB_names[] = #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_occlusion_query) -static const char EndQueryARB_names[] = +static const char EndQueryARB_names[] = "i\0" /* Parameter signature */ "glEndQuery\0" "glEndQueryARB\0" @@ -1765,42 +1814,42 @@ static const char EndQueryARB_names[] = #endif #if defined(need_GL_NV_fence) -static const char DeleteFencesNV_names[] = +static const char DeleteFencesNV_names[] = "ip\0" /* Parameter signature */ "glDeleteFencesNV\0" ""; #endif #if defined(need_GL_SGIX_polynomial_ffd) -static const char DeformationMap3dSGIX_names[] = +static const char DeformationMap3dSGIX_names[] = "iddiiddiiddiip\0" /* Parameter signature */ "glDeformationMap3dSGIX\0" ""; #endif #if defined(need_GL_VERSION_2_0) -static const char IsShader_names[] = +static const char IsShader_names[] = "i\0" /* Parameter signature */ "glIsShader\0" ""; #endif #if defined(need_GL_HP_image_transform) -static const char GetImageTransformParameterivHP_names[] = +static const char GetImageTransformParameterivHP_names[] = "iip\0" /* Parameter signature */ "glGetImageTransformParameterivHP\0" ""; #endif #if defined(need_GL_MESA_window_pos) -static const char WindowPos4ivMESA_names[] = +static const char WindowPos4ivMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos4ivMESA\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord3svARB_names[] = +static const char MultiTexCoord3svARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord3sv\0" "glMultiTexCoord3svARB\0" @@ -1808,7 +1857,7 @@ static const char MultiTexCoord3svARB_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord4iARB_names[] = +static const char MultiTexCoord4iARB_names[] = "iiiii\0" /* Parameter signature */ "glMultiTexCoord4i\0" "glMultiTexCoord4iARB\0" @@ -1816,21 +1865,21 @@ static const char MultiTexCoord4iARB_names[] = #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Binormal3ivEXT_names[] = +static const char Binormal3ivEXT_names[] = "p\0" /* Parameter signature */ "glBinormal3ivEXT\0" ""; #endif #if defined(need_GL_MESA_resize_buffers) -static const char ResizeBuffersMESA_names[] = +static const char ResizeBuffersMESA_names[] = "\0" /* Parameter signature */ "glResizeBuffersMESA\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char GetUniformivARB_names[] = +static const char GetUniformivARB_names[] = "iip\0" /* Parameter signature */ "glGetUniformiv\0" "glGetUniformivARB\0" @@ -1838,28 +1887,28 @@ static const char GetUniformivARB_names[] = #endif #if defined(need_GL_SGIS_pixel_texture) -static const char PixelTexGenParameteriSGIS_names[] = +static const char PixelTexGenParameteriSGIS_names[] = "ii\0" /* Parameter signature */ "glPixelTexGenParameteriSGIS\0" ""; #endif #if defined(need_GL_INTEL_parallel_arrays) -static const char VertexPointervINTEL_names[] = +static const char VertexPointervINTEL_names[] = "iip\0" /* Parameter signature */ "glVertexPointervINTEL\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiColor4fNormal3fVertex3fvSUN_names[] = +static const char ReplacementCodeuiColor4fNormal3fVertex3fvSUN_names[] = "pppp\0" /* Parameter signature */ "glReplacementCodeuiColor4fNormal3fVertex3fvSUN\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3uiEXT_names[] = +static const char SecondaryColor3uiEXT_names[] = "iii\0" /* Parameter signature */ "glSecondaryColor3ui\0" "glSecondaryColor3uiEXT\0" @@ -1867,14 +1916,14 @@ static const char SecondaryColor3uiEXT_names[] = #endif #if defined(need_GL_SGIX_instruments) -static const char StartInstrumentsSGIX_names[] = +static const char StartInstrumentsSGIX_names[] = "\0" /* Parameter signature */ "glStartInstrumentsSGIX\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3usvEXT_names[] = +static const char SecondaryColor3usvEXT_names[] = "p\0" /* Parameter signature */ "glSecondaryColor3usv\0" "glSecondaryColor3usvEXT\0" @@ -1882,49 +1931,49 @@ static const char SecondaryColor3usvEXT_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib2fvNV_names[] = +static const char VertexAttrib2fvNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib2fvNV\0" ""; #endif #if defined(need_GL_ARB_vertex_program) -static const char ProgramLocalParameter4dvARB_names[] = +static const char ProgramLocalParameter4dvARB_names[] = "iip\0" /* Parameter signature */ "glProgramLocalParameter4dvARB\0" ""; #endif #if defined(need_GL_ARB_matrix_palette) -static const char MatrixIndexuivARB_names[] = +static const char MatrixIndexuivARB_names[] = "ip\0" /* Parameter signature */ "glMatrixIndexuivARB\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) -static const char RenderbufferStorageMultisample_names[] = +static const char RenderbufferStorageMultisample_names[] = "iiiii\0" /* Parameter signature */ "glRenderbufferStorageMultisample\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Tangent3sEXT_names[] = +static const char Tangent3sEXT_names[] = "iii\0" /* Parameter signature */ "glTangent3sEXT\0" ""; #endif #if defined(need_GL_SUN_global_alpha) -static const char GlobalAlphaFactorfSUN_names[] = +static const char GlobalAlphaFactorfSUN_names[] = "f\0" /* Parameter signature */ "glGlobalAlphaFactorfSUN\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord3iARB_names[] = +static const char MultiTexCoord3iARB_names[] = "iiii\0" /* Parameter signature */ "glMultiTexCoord3i\0" "glMultiTexCoord3iARB\0" @@ -1932,35 +1981,35 @@ static const char MultiTexCoord3iARB_names[] = #endif #if defined(need_GL_VERSION_2_0) -static const char IsProgram_names[] = +static const char IsProgram_names[] = "i\0" /* Parameter signature */ "glIsProgram\0" ""; #endif #if defined(need_GL_IBM_vertex_array_lists) -static const char TexCoordPointerListIBM_names[] = +static const char TexCoordPointerListIBM_names[] = "iiipi\0" /* Parameter signature */ "glTexCoordPointerListIBM\0" ""; #endif #if defined(need_GL_SUN_global_alpha) -static const char GlobalAlphaFactorusSUN_names[] = +static const char GlobalAlphaFactorusSUN_names[] = "i\0" /* Parameter signature */ "glGlobalAlphaFactorusSUN\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib2dvNV_names[] = +static const char VertexAttrib2dvNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib2dvNV\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char FramebufferRenderbufferEXT_names[] = +static const char FramebufferRenderbufferEXT_names[] = "iiii\0" /* Parameter signature */ "glFramebufferRenderbuffer\0" "glFramebufferRenderbufferEXT\0" @@ -1968,14 +2017,14 @@ static const char FramebufferRenderbufferEXT_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib1dvNV_names[] = +static const char VertexAttrib1dvNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib1dvNV\0" ""; #endif #if defined(need_GL_EXT_texture_object) -static const char GenTextures_names[] = +static const char GenTextures_names[] = "ip\0" /* Parameter signature */ "glGenTextures\0" "glGenTexturesEXT\0" @@ -1983,14 +2032,14 @@ static const char GenTextures_names[] = #endif #if defined(need_GL_NV_fence) -static const char SetFenceNV_names[] = +static const char SetFenceNV_names[] = "ii\0" /* Parameter signature */ "glSetFenceNV\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char FramebufferTexture1DEXT_names[] = +static const char FramebufferTexture1DEXT_names[] = "iiiii\0" /* Parameter signature */ "glFramebufferTexture1D\0" "glFramebufferTexture1DEXT\0" @@ -1998,49 +2047,49 @@ static const char FramebufferTexture1DEXT_names[] = #endif #if defined(need_GL_NV_register_combiners) -static const char GetCombinerOutputParameterivNV_names[] = +static const char GetCombinerOutputParameterivNV_names[] = "iiip\0" /* Parameter signature */ "glGetCombinerOutputParameterivNV\0" ""; #endif #if defined(need_GL_SGIS_pixel_texture) -static const char PixelTexGenParameterivSGIS_names[] = +static const char PixelTexGenParameterivSGIS_names[] = "ip\0" /* Parameter signature */ "glPixelTexGenParameterivSGIS\0" ""; #endif #if defined(need_GL_EXT_texture_perturb_normal) -static const char TextureNormalEXT_names[] = +static const char TextureNormalEXT_names[] = "i\0" /* Parameter signature */ "glTextureNormalEXT\0" ""; #endif #if defined(need_GL_IBM_vertex_array_lists) -static const char IndexPointerListIBM_names[] = +static const char IndexPointerListIBM_names[] = "iipi\0" /* Parameter signature */ "glIndexPointerListIBM\0" ""; #endif #if defined(need_GL_ARB_vertex_blend) -static const char WeightfvARB_names[] = +static const char WeightfvARB_names[] = "ip\0" /* Parameter signature */ "glWeightfvARB\0" ""; #endif #if defined(need_GL_MESA_window_pos) -static const char WindowPos4fMESA_names[] = +static const char WindowPos4fMESA_names[] = "ffff\0" /* Parameter signature */ "glWindowPos4fMESA\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos3dvMESA_names[] = +static const char WindowPos3dvMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos3dv\0" "glWindowPos3dvARB\0" @@ -2049,14 +2098,14 @@ static const char WindowPos3dvMESA_names[] = #endif #if defined(need_GL_EXT_timer_query) -static const char GetQueryObjecti64vEXT_names[] = +static const char GetQueryObjecti64vEXT_names[] = "iip\0" /* Parameter signature */ "glGetQueryObjecti64vEXT\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord1dARB_names[] = +static const char MultiTexCoord1dARB_names[] = "id\0" /* Parameter signature */ "glMultiTexCoord1d\0" "glMultiTexCoord1dARB\0" @@ -2064,7 +2113,7 @@ static const char MultiTexCoord1dARB_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_NV_point_sprite) -static const char PointParameterivNV_names[] = +static const char PointParameterivNV_names[] = "ip\0" /* Parameter signature */ "glPointParameteriv\0" "glPointParameterivNV\0" @@ -2072,15 +2121,22 @@ static const char PointParameterivNV_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform2fvARB_names[] = +static const char Uniform2fvARB_names[] = "iip\0" /* Parameter signature */ "glUniform2fv\0" "glUniform2fvARB\0" ""; #endif +#if defined(need_GL_APPLE_flush_buffer_range) +static const char BufferParameteriAPPLE_names[] = + "iii\0" /* Parameter signature */ + "glBufferParameteriAPPLE\0" + ""; +#endif + #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord3dvARB_names[] = +static const char MultiTexCoord3dvARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord3dv\0" "glMultiTexCoord3dvARB\0" @@ -2088,49 +2144,49 @@ static const char MultiTexCoord3dvARB_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN_names[] = +static const char ReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN_names[] = "pppp\0" /* Parameter signature */ "glReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN\0" ""; #endif #if defined(need_GL_ARB_shader_objects) -static const char DeleteObjectARB_names[] = +static const char DeleteObjectARB_names[] = "i\0" /* Parameter signature */ "glDeleteObjectARB\0" ""; #endif #if defined(need_GL_ARB_matrix_palette) -static const char MatrixIndexPointerARB_names[] = +static const char MatrixIndexPointerARB_names[] = "iiip\0" /* Parameter signature */ "glMatrixIndexPointerARB\0" ""; #endif #if defined(need_GL_NV_fragment_program) -static const char ProgramNamedParameter4dvNV_names[] = +static const char ProgramNamedParameter4dvNV_names[] = "iipp\0" /* Parameter signature */ "glProgramNamedParameter4dvNV\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Tangent3fvEXT_names[] = +static const char Tangent3fvEXT_names[] = "p\0" /* Parameter signature */ "glTangent3fvEXT\0" ""; #endif #if defined(need_GL_ARB_vertex_array_object) -static const char GenVertexArrays_names[] = +static const char GenVertexArrays_names[] = "ip\0" /* Parameter signature */ "glGenVertexArrays\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char BindFramebufferEXT_names[] = +static const char BindFramebufferEXT_names[] = "ii\0" /* Parameter signature */ "glBindFramebuffer\0" "glBindFramebufferEXT\0" @@ -2138,14 +2194,14 @@ static const char BindFramebufferEXT_names[] = #endif #if defined(need_GL_SGIX_reference_plane) -static const char ReferencePlaneSGIX_names[] = +static const char ReferencePlaneSGIX_names[] = "p\0" /* Parameter signature */ "glReferencePlaneSGIX\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char ValidateProgramARB_names[] = +static const char ValidateProgramARB_names[] = "i\0" /* Parameter signature */ "glValidateProgram\0" "glValidateProgramARB\0" @@ -2153,21 +2209,21 @@ static const char ValidateProgramARB_names[] = #endif #if defined(need_GL_EXT_compiled_vertex_array) -static const char UnlockArraysEXT_names[] = +static const char UnlockArraysEXT_names[] = "\0" /* Parameter signature */ "glUnlockArraysEXT\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord2fColor3fVertex3fSUN_names[] = +static const char TexCoord2fColor3fVertex3fSUN_names[] = "ffffffff\0" /* Parameter signature */ "glTexCoord2fColor3fVertex3fSUN\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos3fvMESA_names[] = +static const char WindowPos3fvMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos3fv\0" "glWindowPos3fvARB\0" @@ -2176,14 +2232,14 @@ static const char WindowPos3fvMESA_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib1svNV_names[] = +static const char VertexAttrib1svNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib1svNV\0" ""; #endif #if defined(need_GL_EXT_copy_texture) -static const char CopyTexSubImage3D_names[] = +static const char CopyTexSubImage3D_names[] = "iiiiiiiii\0" /* Parameter signature */ "glCopyTexSubImage3D\0" "glCopyTexSubImage3DEXT\0" @@ -2191,22 +2247,29 @@ static const char CopyTexSubImage3D_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib2dARB_names[] = +static const char VertexAttrib2dARB_names[] = "idd\0" /* Parameter signature */ "glVertexAttrib2d\0" "glVertexAttrib2dARB\0" ""; #endif +#if defined(need_GL_ARB_sync) +static const char GetInteger64v_names[] = + "ip\0" /* Parameter signature */ + "glGetInteger64v\0" + ""; +#endif + #if defined(need_GL_SGIS_texture_color_mask) -static const char TextureColorMaskSGIS_names[] = +static const char TextureColorMaskSGIS_names[] = "iiii\0" /* Parameter signature */ "glTextureColorMaskSGIS\0" ""; #endif #if defined(need_GL_SGI_color_table) || defined(need_GL_EXT_paletted_texture) -static const char GetColorTable_names[] = +static const char GetColorTable_names[] = "iiip\0" /* Parameter signature */ "glGetColorTable\0" "glGetColorTableSGI\0" @@ -2215,7 +2278,7 @@ static const char GetColorTable_names[] = #endif #if defined(need_GL_SGI_color_table) -static const char CopyColorTable_names[] = +static const char CopyColorTable_names[] = "iiiii\0" /* Parameter signature */ "glCopyColorTable\0" "glCopyColorTableSGI\0" @@ -2223,7 +2286,7 @@ static const char CopyColorTable_names[] = #endif #if defined(need_GL_EXT_histogram) -static const char GetHistogramParameterfv_names[] = +static const char GetHistogramParameterfv_names[] = "iip\0" /* Parameter signature */ "glGetHistogramParameterfv\0" "glGetHistogramParameterfvEXT\0" @@ -2231,21 +2294,21 @@ static const char GetHistogramParameterfv_names[] = #endif #if defined(need_GL_INTEL_parallel_arrays) -static const char ColorPointervINTEL_names[] = +static const char ColorPointervINTEL_names[] = "iip\0" /* Parameter signature */ "glColorPointervINTEL\0" ""; #endif #if defined(need_GL_ATI_fragment_shader) -static const char AlphaFragmentOp1ATI_names[] = +static const char AlphaFragmentOp1ATI_names[] = "iiiiii\0" /* Parameter signature */ "glAlphaFragmentOp1ATI\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord3ivARB_names[] = +static const char MultiTexCoord3ivARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord3iv\0" "glMultiTexCoord3ivARB\0" @@ -2253,7 +2316,7 @@ static const char MultiTexCoord3ivARB_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord2sARB_names[] = +static const char MultiTexCoord2sARB_names[] = "iii\0" /* Parameter signature */ "glMultiTexCoord2s\0" "glMultiTexCoord2sARB\0" @@ -2261,7 +2324,7 @@ static const char MultiTexCoord2sARB_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib1dvARB_names[] = +static const char VertexAttrib1dvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib1dv\0" "glVertexAttrib1dvARB\0" @@ -2269,7 +2332,7 @@ static const char VertexAttrib1dvARB_names[] = #endif #if defined(need_GL_EXT_texture_object) -static const char DeleteTextures_names[] = +static const char DeleteTextures_names[] = "ip\0" /* Parameter signature */ "glDeleteTextures\0" "glDeleteTexturesEXT\0" @@ -2277,49 +2340,49 @@ static const char DeleteTextures_names[] = #endif #if defined(need_GL_EXT_vertex_array) -static const char TexCoordPointerEXT_names[] = +static const char TexCoordPointerEXT_names[] = "iiiip\0" /* Parameter signature */ "glTexCoordPointerEXT\0" ""; #endif #if defined(need_GL_SGIS_texture4D) -static const char TexSubImage4DSGIS_names[] = +static const char TexSubImage4DSGIS_names[] = "iiiiiiiiiiiip\0" /* Parameter signature */ "glTexSubImage4DSGIS\0" ""; #endif #if defined(need_GL_NV_register_combiners2) -static const char CombinerStageParameterfvNV_names[] = +static const char CombinerStageParameterfvNV_names[] = "iip\0" /* Parameter signature */ "glCombinerStageParameterfvNV\0" ""; #endif #if defined(need_GL_SGIX_instruments) -static const char StopInstrumentsSGIX_names[] = +static const char StopInstrumentsSGIX_names[] = "i\0" /* Parameter signature */ "glStopInstrumentsSGIX\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord4fColor4fNormal3fVertex4fSUN_names[] = +static const char TexCoord4fColor4fNormal3fVertex4fSUN_names[] = "fffffffffffffff\0" /* Parameter signature */ "glTexCoord4fColor4fNormal3fVertex4fSUN\0" ""; #endif #if defined(need_GL_SGIX_polynomial_ffd) -static const char DeformSGIX_names[] = +static const char DeformSGIX_names[] = "i\0" /* Parameter signature */ "glDeformSGIX\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char GetVertexAttribfvARB_names[] = +static const char GetVertexAttribfvARB_names[] = "iip\0" /* Parameter signature */ "glGetVertexAttribfv\0" "glGetVertexAttribfvARB\0" @@ -2327,7 +2390,7 @@ static const char GetVertexAttribfvARB_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3ivEXT_names[] = +static const char SecondaryColor3ivEXT_names[] = "p\0" /* Parameter signature */ "glSecondaryColor3iv\0" "glSecondaryColor3ivEXT\0" @@ -2335,49 +2398,49 @@ static const char SecondaryColor3ivEXT_names[] = #endif #if defined(need_GL_VERSION_2_1) -static const char UniformMatrix4x2fv_names[] = +static const char UniformMatrix4x2fv_names[] = "iiip\0" /* Parameter signature */ "glUniformMatrix4x2fv\0" ""; #endif #if defined(need_GL_SGIS_detail_texture) -static const char GetDetailTexFuncSGIS_names[] = +static const char GetDetailTexFuncSGIS_names[] = "ip\0" /* Parameter signature */ "glGetDetailTexFuncSGIS\0" ""; #endif #if defined(need_GL_NV_register_combiners2) -static const char GetCombinerStageParameterfvNV_names[] = +static const char GetCombinerStageParameterfvNV_names[] = "iip\0" /* Parameter signature */ "glGetCombinerStageParameterfvNV\0" ""; #endif #if defined(need_GL_ARB_vertex_array_object) -static const char BindVertexArray_names[] = +static const char BindVertexArray_names[] = "i\0" /* Parameter signature */ "glBindVertexArray\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char Color4ubVertex2fvSUN_names[] = +static const char Color4ubVertex2fvSUN_names[] = "pp\0" /* Parameter signature */ "glColor4ubVertex2fvSUN\0" ""; #endif #if defined(need_GL_SGIS_texture_filter4) -static const char TexFilterFuncSGIS_names[] = +static const char TexFilterFuncSGIS_names[] = "iiip\0" /* Parameter signature */ "glTexFilterFuncSGIS\0" ""; #endif #if defined(need_GL_SGIS_multisample) || defined(need_GL_EXT_multisample) -static const char SampleMaskSGIS_names[] = +static const char SampleMaskSGIS_names[] = "fi\0" /* Parameter signature */ "glSampleMaskSGIS\0" "glSampleMaskEXT\0" @@ -2385,7 +2448,7 @@ static const char SampleMaskSGIS_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_shader) -static const char GetAttribLocationARB_names[] = +static const char GetAttribLocationARB_names[] = "ip\0" /* Parameter signature */ "glGetAttribLocation\0" "glGetAttribLocationARB\0" @@ -2393,7 +2456,7 @@ static const char GetAttribLocationARB_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4ubvARB_names[] = +static const char VertexAttrib4ubvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4ubv\0" "glVertexAttrib4ubvARB\0" @@ -2401,21 +2464,21 @@ static const char VertexAttrib4ubvARB_names[] = #endif #if defined(need_GL_SGIS_detail_texture) -static const char DetailTexFuncSGIS_names[] = +static const char DetailTexFuncSGIS_names[] = "iip\0" /* Parameter signature */ "glDetailTexFuncSGIS\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char Normal3fVertex3fSUN_names[] = +static const char Normal3fVertex3fSUN_names[] = "ffffff\0" /* Parameter signature */ "glNormal3fVertex3fSUN\0" ""; #endif #if defined(need_GL_EXT_copy_texture) -static const char CopyTexImage2D_names[] = +static const char CopyTexImage2D_names[] = "iiiiiiii\0" /* Parameter signature */ "glCopyTexImage2D\0" "glCopyTexImage2DEXT\0" @@ -2423,7 +2486,7 @@ static const char CopyTexImage2D_names[] = #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) -static const char GetBufferPointervARB_names[] = +static const char GetBufferPointervARB_names[] = "iip\0" /* Parameter signature */ "glGetBufferPointerv\0" "glGetBufferPointervARB\0" @@ -2431,7 +2494,7 @@ static const char GetBufferPointervARB_names[] = #endif #if defined(need_GL_ARB_vertex_program) || defined(need_GL_NV_vertex_program) -static const char ProgramEnvParameter4fARB_names[] = +static const char ProgramEnvParameter4fARB_names[] = "iiffff\0" /* Parameter signature */ "glProgramEnvParameter4fARB\0" "glProgramParameter4fNV\0" @@ -2439,7 +2502,7 @@ static const char ProgramEnvParameter4fARB_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform3ivARB_names[] = +static const char Uniform3ivARB_names[] = "iip\0" /* Parameter signature */ "glUniform3iv\0" "glUniform3ivARB\0" @@ -2447,21 +2510,21 @@ static const char Uniform3ivARB_names[] = #endif #if defined(need_GL_NV_fence) -static const char GetFenceivNV_names[] = +static const char GetFenceivNV_names[] = "iip\0" /* Parameter signature */ "glGetFenceivNV\0" ""; #endif #if defined(need_GL_MESA_window_pos) -static const char WindowPos4dvMESA_names[] = +static const char WindowPos4dvMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos4dvMESA\0" ""; #endif #if defined(need_GL_EXT_color_subtable) -static const char ColorSubTable_names[] = +static const char ColorSubTable_names[] = "iiiiip\0" /* Parameter signature */ "glColorSubTable\0" "glColorSubTableEXT\0" @@ -2469,7 +2532,7 @@ static const char ColorSubTable_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord4ivARB_names[] = +static const char MultiTexCoord4ivARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord4iv\0" "glMultiTexCoord4ivARB\0" @@ -2477,21 +2540,21 @@ static const char MultiTexCoord4ivARB_names[] = #endif #if defined(need_GL_EXT_gpu_program_parameters) -static const char ProgramLocalParameters4fvEXT_names[] = +static const char ProgramLocalParameters4fvEXT_names[] = "iiip\0" /* Parameter signature */ "glProgramLocalParameters4fvEXT\0" ""; #endif #if defined(need_GL_NV_evaluators) -static const char GetMapAttribParameterfvNV_names[] = +static const char GetMapAttribParameterfvNV_names[] = "iiip\0" /* Parameter signature */ "glGetMapAttribParameterfvNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4sARB_names[] = +static const char VertexAttrib4sARB_names[] = "iiiii\0" /* Parameter signature */ "glVertexAttrib4s\0" "glVertexAttrib4sARB\0" @@ -2499,7 +2562,7 @@ static const char VertexAttrib4sARB_names[] = #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_occlusion_query) -static const char GetQueryObjectuivARB_names[] = +static const char GetQueryObjectuivARB_names[] = "iip\0" /* Parameter signature */ "glGetQueryObjectuiv\0" "glGetQueryObjectuivARB\0" @@ -2507,14 +2570,14 @@ static const char GetQueryObjectuivARB_names[] = #endif #if defined(need_GL_NV_evaluators) -static const char MapParameterivNV_names[] = +static const char MapParameterivNV_names[] = "iip\0" /* Parameter signature */ "glMapParameterivNV\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char GenRenderbuffersEXT_names[] = +static const char GenRenderbuffersEXT_names[] = "ip\0" /* Parameter signature */ "glGenRenderbuffers\0" "glGenRenderbuffersEXT\0" @@ -2522,7 +2585,7 @@ static const char GenRenderbuffersEXT_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib2dvARB_names[] = +static const char VertexAttrib2dvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib2dv\0" "glVertexAttrib2dvARB\0" @@ -2530,28 +2593,28 @@ static const char VertexAttrib2dvARB_names[] = #endif #if defined(need_GL_EXT_vertex_array) -static const char EdgeFlagPointerEXT_names[] = +static const char EdgeFlagPointerEXT_names[] = "iip\0" /* Parameter signature */ "glEdgeFlagPointerEXT\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs2svNV_names[] = +static const char VertexAttribs2svNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs2svNV\0" ""; #endif #if defined(need_GL_ARB_vertex_blend) -static const char WeightbvARB_names[] = +static const char WeightbvARB_names[] = "ip\0" /* Parameter signature */ "glWeightbvARB\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib2fvARB_names[] = +static const char VertexAttrib2fvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib2fv\0" "glVertexAttrib2fvARB\0" @@ -2559,7 +2622,7 @@ static const char VertexAttrib2fvARB_names[] = #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) -static const char GetBufferParameterivARB_names[] = +static const char GetBufferParameterivARB_names[] = "iip\0" /* Parameter signature */ "glGetBufferParameteriv\0" "glGetBufferParameterivARB\0" @@ -2567,28 +2630,28 @@ static const char GetBufferParameterivARB_names[] = #endif #if defined(need_GL_SGIX_list_priority) -static const char ListParameteriSGIX_names[] = +static const char ListParameteriSGIX_names[] = "iii\0" /* Parameter signature */ "glListParameteriSGIX\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiColor4fNormal3fVertex3fSUN_names[] = +static const char ReplacementCodeuiColor4fNormal3fVertex3fSUN_names[] = "iffffffffff\0" /* Parameter signature */ "glReplacementCodeuiColor4fNormal3fVertex3fSUN\0" ""; #endif #if defined(need_GL_SGIX_instruments) -static const char InstrumentsBufferSGIX_names[] = +static const char InstrumentsBufferSGIX_names[] = "ip\0" /* Parameter signature */ "glInstrumentsBufferSGIX\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4NivARB_names[] = +static const char VertexAttrib4NivARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4Niv\0" "glVertexAttrib4NivARB\0" @@ -2596,35 +2659,35 @@ static const char VertexAttrib4NivARB_names[] = #endif #if defined(need_GL_VERSION_2_0) -static const char GetAttachedShaders_names[] = +static const char GetAttachedShaders_names[] = "iipp\0" /* Parameter signature */ "glGetAttachedShaders\0" ""; #endif #if defined(need_GL_APPLE_vertex_array_object) -static const char GenVertexArraysAPPLE_names[] = +static const char GenVertexArraysAPPLE_names[] = "ip\0" /* Parameter signature */ "glGenVertexArraysAPPLE\0" ""; #endif #if defined(need_GL_EXT_gpu_program_parameters) -static const char ProgramEnvParameters4fvEXT_names[] = +static const char ProgramEnvParameters4fvEXT_names[] = "iiip\0" /* Parameter signature */ "glProgramEnvParameters4fvEXT\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord2fColor4fNormal3fVertex3fvSUN_names[] = +static const char TexCoord2fColor4fNormal3fVertex3fvSUN_names[] = "pppp\0" /* Parameter signature */ "glTexCoord2fColor4fNormal3fVertex3fvSUN\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos2iMESA_names[] = +static const char WindowPos2iMESA_names[] = "ii\0" /* Parameter signature */ "glWindowPos2i\0" "glWindowPos2iARB\0" @@ -2633,7 +2696,7 @@ static const char WindowPos2iMESA_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3fvEXT_names[] = +static const char SecondaryColor3fvEXT_names[] = "p\0" /* Parameter signature */ "glSecondaryColor3fv\0" "glSecondaryColor3fvEXT\0" @@ -2641,7 +2704,7 @@ static const char SecondaryColor3fvEXT_names[] = #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_texture_compression) -static const char CompressedTexSubImage1DARB_names[] = +static const char CompressedTexSubImage1DARB_names[] = "iiiiiip\0" /* Parameter signature */ "glCompressedTexSubImage1D\0" "glCompressedTexSubImage1DARB\0" @@ -2649,28 +2712,28 @@ static const char CompressedTexSubImage1DARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char GetVertexAttribivNV_names[] = +static const char GetVertexAttribivNV_names[] = "iip\0" /* Parameter signature */ "glGetVertexAttribivNV\0" ""; #endif #if defined(need_GL_ARB_vertex_program) -static const char GetProgramStringARB_names[] = +static const char GetProgramStringARB_names[] = "iip\0" /* Parameter signature */ "glGetProgramStringARB\0" ""; #endif #if defined(need_GL_ATI_envmap_bumpmap) -static const char TexBumpParameterfvATI_names[] = +static const char TexBumpParameterfvATI_names[] = "ip\0" /* Parameter signature */ "glTexBumpParameterfvATI\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char CompileShaderARB_names[] = +static const char CompileShaderARB_names[] = "i\0" /* Parameter signature */ "glCompileShader\0" "glCompileShaderARB\0" @@ -2678,14 +2741,14 @@ static const char CompileShaderARB_names[] = #endif #if defined(need_GL_VERSION_2_0) -static const char DeleteShader_names[] = +static const char DeleteShader_names[] = "i\0" /* Parameter signature */ "glDeleteShader\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform3fARB_names[] = +static const char Uniform3fARB_names[] = "ifff\0" /* Parameter signature */ "glUniform3f\0" "glUniform3fARB\0" @@ -2693,28 +2756,28 @@ static const char Uniform3fARB_names[] = #endif #if defined(need_GL_SGIX_list_priority) -static const char ListParameterfvSGIX_names[] = +static const char ListParameterfvSGIX_names[] = "iip\0" /* Parameter signature */ "glListParameterfvSGIX\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Tangent3dvEXT_names[] = +static const char Tangent3dvEXT_names[] = "p\0" /* Parameter signature */ "glTangent3dvEXT\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char GetVertexAttribfvNV_names[] = +static const char GetVertexAttribfvNV_names[] = "iip\0" /* Parameter signature */ "glGetVertexAttribfvNV\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos3sMESA_names[] = +static const char WindowPos3sMESA_names[] = "iii\0" /* Parameter signature */ "glWindowPos3s\0" "glWindowPos3sARB\0" @@ -2723,35 +2786,35 @@ static const char WindowPos3sMESA_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib2svNV_names[] = +static const char VertexAttrib2svNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib2svNV\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs1fvNV_names[] = +static const char VertexAttribs1fvNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs1fvNV\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord2fVertex3fvSUN_names[] = +static const char TexCoord2fVertex3fvSUN_names[] = "pp\0" /* Parameter signature */ "glTexCoord2fVertex3fvSUN\0" ""; #endif #if defined(need_GL_MESA_window_pos) -static const char WindowPos4sMESA_names[] = +static const char WindowPos4sMESA_names[] = "iiii\0" /* Parameter signature */ "glWindowPos4sMESA\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4NuivARB_names[] = +static const char VertexAttrib4NuivARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4Nuiv\0" "glVertexAttrib4NuivARB\0" @@ -2759,7 +2822,7 @@ static const char VertexAttrib4NuivARB_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char ClientActiveTextureARB_names[] = +static const char ClientActiveTextureARB_names[] = "i\0" /* Parameter signature */ "glClientActiveTexture\0" "glClientActiveTextureARB\0" @@ -2767,21 +2830,21 @@ static const char ClientActiveTextureARB_names[] = #endif #if defined(need_GL_SGIX_pixel_texture) -static const char PixelTexGenSGIX_names[] = +static const char PixelTexGenSGIX_names[] = "i\0" /* Parameter signature */ "glPixelTexGenSGIX\0" ""; #endif #if defined(need_GL_SUN_triangle_list) -static const char ReplacementCodeusvSUN_names[] = +static const char ReplacementCodeusvSUN_names[] = "p\0" /* Parameter signature */ "glReplacementCodeusvSUN\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform4fARB_names[] = +static const char Uniform4fARB_names[] = "iffff\0" /* Parameter signature */ "glUniform4f\0" "glUniform4fARB\0" @@ -2789,29 +2852,36 @@ static const char Uniform4fARB_names[] = #endif #if defined(need_GL_ARB_map_buffer_range) -static const char FlushMappedBufferRange_names[] = +static const char FlushMappedBufferRange_names[] = "iii\0" /* Parameter signature */ "glFlushMappedBufferRange\0" ""; #endif #if defined(need_GL_ARB_vertex_program) || defined(need_GL_NV_vertex_program) -static const char IsProgramNV_names[] = +static const char IsProgramNV_names[] = "i\0" /* Parameter signature */ "glIsProgramARB\0" "glIsProgramNV\0" ""; #endif +#if defined(need_GL_APPLE_flush_buffer_range) +static const char FlushMappedBufferRangeAPPLE_names[] = + "iii\0" /* Parameter signature */ + "glFlushMappedBufferRangeAPPLE\0" + ""; +#endif + #if defined(need_GL_SUN_triangle_list) -static const char ReplacementCodePointerSUN_names[] = +static const char ReplacementCodePointerSUN_names[] = "iip\0" /* Parameter signature */ "glReplacementCodePointerSUN\0" ""; #endif #if defined(need_GL_ARB_vertex_program) || defined(need_GL_NV_vertex_program) -static const char ProgramEnvParameter4dARB_names[] = +static const char ProgramEnvParameter4dARB_names[] = "iidddd\0" /* Parameter signature */ "glProgramEnvParameter4dARB\0" "glProgramParameter4dNV\0" @@ -2819,7 +2889,7 @@ static const char ProgramEnvParameter4dARB_names[] = #endif #if defined(need_GL_SGI_color_table) -static const char ColorTableParameterfv_names[] = +static const char ColorTableParameterfv_names[] = "iip\0" /* Parameter signature */ "glColorTableParameterfv\0" "glColorTableParameterfvSGI\0" @@ -2827,21 +2897,21 @@ static const char ColorTableParameterfv_names[] = #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentLightModelfSGIX_names[] = +static const char FragmentLightModelfSGIX_names[] = "if\0" /* Parameter signature */ "glFragmentLightModelfSGIX\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Binormal3bvEXT_names[] = +static const char Binormal3bvEXT_names[] = "p\0" /* Parameter signature */ "glBinormal3bvEXT\0" ""; #endif #if defined(need_GL_EXT_texture_object) -static const char IsTexture_names[] = +static const char IsTexture_names[] = "i\0" /* Parameter signature */ "glIsTexture\0" "glIsTextureEXT\0" @@ -2849,14 +2919,14 @@ static const char IsTexture_names[] = #endif #if defined(need_GL_EXT_vertex_weighting) -static const char VertexWeightfvEXT_names[] = +static const char VertexWeightfvEXT_names[] = "p\0" /* Parameter signature */ "glVertexWeightfvEXT\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib1dARB_names[] = +static const char VertexAttrib1dARB_names[] = "id\0" /* Parameter signature */ "glVertexAttrib1d\0" "glVertexAttrib1dARB\0" @@ -2864,14 +2934,14 @@ static const char VertexAttrib1dARB_names[] = #endif #if defined(need_GL_HP_image_transform) -static const char ImageTransformParameterivHP_names[] = +static const char ImageTransformParameterivHP_names[] = "iip\0" /* Parameter signature */ "glImageTransformParameterivHP\0" ""; #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_occlusion_query) -static const char DeleteQueriesARB_names[] = +static const char DeleteQueriesARB_names[] = "ip\0" /* Parameter signature */ "glDeleteQueries\0" "glDeleteQueriesARB\0" @@ -2879,28 +2949,28 @@ static const char DeleteQueriesARB_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char Color4ubVertex2fSUN_names[] = +static const char Color4ubVertex2fSUN_names[] = "iiiiff\0" /* Parameter signature */ "glColor4ubVertex2fSUN\0" ""; #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentColorMaterialSGIX_names[] = +static const char FragmentColorMaterialSGIX_names[] = "ii\0" /* Parameter signature */ "glFragmentColorMaterialSGIX\0" ""; #endif #if defined(need_GL_ARB_matrix_palette) -static const char CurrentPaletteMatrixARB_names[] = +static const char CurrentPaletteMatrixARB_names[] = "i\0" /* Parameter signature */ "glCurrentPaletteMatrixARB\0" ""; #endif #if defined(need_GL_SGIS_multisample) || defined(need_GL_EXT_multisample) -static const char SamplePatternSGIS_names[] = +static const char SamplePatternSGIS_names[] = "i\0" /* Parameter signature */ "glSamplePatternSGIS\0" "glSamplePatternEXT\0" @@ -2908,7 +2978,7 @@ static const char SamplePatternSGIS_names[] = #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_occlusion_query) -static const char IsQueryARB_names[] = +static const char IsQueryARB_names[] = "i\0" /* Parameter signature */ "glIsQuery\0" "glIsQueryARB\0" @@ -2916,14 +2986,14 @@ static const char IsQueryARB_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiColor4ubVertex3fSUN_names[] = +static const char ReplacementCodeuiColor4ubVertex3fSUN_names[] = "iiiiifff\0" /* Parameter signature */ "glReplacementCodeuiColor4ubVertex3fSUN\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4usvARB_names[] = +static const char VertexAttrib4usvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4usv\0" "glVertexAttrib4usvARB\0" @@ -2931,7 +3001,7 @@ static const char VertexAttrib4usvARB_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char LinkProgramARB_names[] = +static const char LinkProgramARB_names[] = "i\0" /* Parameter signature */ "glLinkProgram\0" "glLinkProgramARB\0" @@ -2939,14 +3009,14 @@ static const char LinkProgramARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib2fNV_names[] = +static const char VertexAttrib2fNV_names[] = "iff\0" /* Parameter signature */ "glVertexAttrib2fNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char ShaderSourceARB_names[] = +static const char ShaderSourceARB_names[] = "iipp\0" /* Parameter signature */ "glShaderSource\0" "glShaderSourceARB\0" @@ -2954,14 +3024,14 @@ static const char ShaderSourceARB_names[] = #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentMaterialiSGIX_names[] = +static const char FragmentMaterialiSGIX_names[] = "iii\0" /* Parameter signature */ "glFragmentMaterialiSGIX\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib3svARB_names[] = +static const char VertexAttrib3svARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib3sv\0" "glVertexAttrib3svARB\0" @@ -2969,7 +3039,7 @@ static const char VertexAttrib3svARB_names[] = #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_texture_compression) -static const char CompressedTexSubImage3DARB_names[] = +static const char CompressedTexSubImage3DARB_names[] = "iiiiiiiiiip\0" /* Parameter signature */ "glCompressedTexSubImage3D\0" "glCompressedTexSubImage3DARB\0" @@ -2977,7 +3047,7 @@ static const char CompressedTexSubImage3DARB_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos2ivMESA_names[] = +static const char WindowPos2ivMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos2iv\0" "glWindowPos2ivARB\0" @@ -2986,7 +3056,7 @@ static const char WindowPos2ivMESA_names[] = #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char IsFramebufferEXT_names[] = +static const char IsFramebufferEXT_names[] = "i\0" /* Parameter signature */ "glIsFramebuffer\0" "glIsFramebufferEXT\0" @@ -2994,7 +3064,7 @@ static const char IsFramebufferEXT_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform4ivARB_names[] = +static const char Uniform4ivARB_names[] = "iip\0" /* Parameter signature */ "glUniform4iv\0" "glUniform4ivARB\0" @@ -3002,7 +3072,7 @@ static const char Uniform4ivARB_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char GetVertexAttribdvARB_names[] = +static const char GetVertexAttribdvARB_names[] = "iip\0" /* Parameter signature */ "glGetVertexAttribdv\0" "glGetVertexAttribdvARB\0" @@ -3010,14 +3080,14 @@ static const char GetVertexAttribdvARB_names[] = #endif #if defined(need_GL_ATI_envmap_bumpmap) -static const char TexBumpParameterivATI_names[] = +static const char TexBumpParameterivATI_names[] = "ip\0" /* Parameter signature */ "glTexBumpParameterivATI\0" ""; #endif #if defined(need_GL_EXT_convolution) -static const char GetSeparableFilter_names[] = +static const char GetSeparableFilter_names[] = "iiippp\0" /* Parameter signature */ "glGetSeparableFilter\0" "glGetSeparableFilterEXT\0" @@ -3025,49 +3095,49 @@ static const char GetSeparableFilter_names[] = #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Binormal3dEXT_names[] = +static const char Binormal3dEXT_names[] = "ddd\0" /* Parameter signature */ "glBinormal3dEXT\0" ""; #endif #if defined(need_GL_SGIX_sprite) -static const char SpriteParameteriSGIX_names[] = +static const char SpriteParameteriSGIX_names[] = "ii\0" /* Parameter signature */ "glSpriteParameteriSGIX\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char RequestResidentProgramsNV_names[] = +static const char RequestResidentProgramsNV_names[] = "ip\0" /* Parameter signature */ "glRequestResidentProgramsNV\0" ""; #endif #if defined(need_GL_SGIX_tag_sample_buffer) -static const char TagSampleBufferSGIX_names[] = +static const char TagSampleBufferSGIX_names[] = "\0" /* Parameter signature */ "glTagSampleBufferSGIX\0" ""; #endif #if defined(need_GL_SUN_triangle_list) -static const char ReplacementCodeusSUN_names[] = +static const char ReplacementCodeusSUN_names[] = "i\0" /* Parameter signature */ "glReplacementCodeusSUN\0" ""; #endif #if defined(need_GL_SGIX_list_priority) -static const char ListParameterivSGIX_names[] = +static const char ListParameterivSGIX_names[] = "iip\0" /* Parameter signature */ "glListParameterivSGIX\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_multi_draw_arrays) -static const char MultiDrawElementsEXT_names[] = +static const char MultiDrawElementsEXT_names[] = "ipipi\0" /* Parameter signature */ "glMultiDrawElements\0" "glMultiDrawElementsEXT\0" @@ -3075,7 +3145,7 @@ static const char MultiDrawElementsEXT_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform1ivARB_names[] = +static const char Uniform1ivARB_names[] = "iip\0" /* Parameter signature */ "glUniform1iv\0" "glUniform1ivARB\0" @@ -3083,7 +3153,7 @@ static const char Uniform1ivARB_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos2sMESA_names[] = +static const char WindowPos2sMESA_names[] = "ii\0" /* Parameter signature */ "glWindowPos2s\0" "glWindowPos2sARB\0" @@ -3092,14 +3162,14 @@ static const char WindowPos2sMESA_names[] = #endif #if defined(need_GL_ARB_vertex_blend) -static const char WeightusvARB_names[] = +static const char WeightusvARB_names[] = "ip\0" /* Parameter signature */ "glWeightusvARB\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_fog_coord) -static const char FogCoordPointerEXT_names[] = +static const char FogCoordPointerEXT_names[] = "iip\0" /* Parameter signature */ "glFogCoordPointer\0" "glFogCoordPointerEXT\0" @@ -3107,14 +3177,14 @@ static const char FogCoordPointerEXT_names[] = #endif #if defined(need_GL_EXT_index_material) -static const char IndexMaterialEXT_names[] = +static const char IndexMaterialEXT_names[] = "ii\0" /* Parameter signature */ "glIndexMaterialEXT\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3ubvEXT_names[] = +static const char SecondaryColor3ubvEXT_names[] = "p\0" /* Parameter signature */ "glSecondaryColor3ubv\0" "glSecondaryColor3ubvEXT\0" @@ -3122,7 +3192,7 @@ static const char SecondaryColor3ubvEXT_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4dvARB_names[] = +static const char VertexAttrib4dvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4dv\0" "glVertexAttrib4dvARB\0" @@ -3130,7 +3200,7 @@ static const char VertexAttrib4dvARB_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_shader) -static const char BindAttribLocationARB_names[] = +static const char BindAttribLocationARB_names[] = "iip\0" /* Parameter signature */ "glBindAttribLocation\0" "glBindAttribLocationARB\0" @@ -3138,7 +3208,7 @@ static const char BindAttribLocationARB_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord2dARB_names[] = +static const char MultiTexCoord2dARB_names[] = "idd\0" /* Parameter signature */ "glMultiTexCoord2d\0" "glMultiTexCoord2dARB\0" @@ -3146,35 +3216,35 @@ static const char MultiTexCoord2dARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char ExecuteProgramNV_names[] = +static const char ExecuteProgramNV_names[] = "iip\0" /* Parameter signature */ "glExecuteProgramNV\0" ""; #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char LightEnviSGIX_names[] = +static const char LightEnviSGIX_names[] = "ii\0" /* Parameter signature */ "glLightEnviSGIX\0" ""; #endif #if defined(need_GL_SUN_triangle_list) -static const char ReplacementCodeuiSUN_names[] = +static const char ReplacementCodeuiSUN_names[] = "i\0" /* Parameter signature */ "glReplacementCodeuiSUN\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribPointerNV_names[] = +static const char VertexAttribPointerNV_names[] = "iiiip\0" /* Parameter signature */ "glVertexAttribPointerNV\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char GetFramebufferAttachmentParameterivEXT_names[] = +static const char GetFramebufferAttachmentParameterivEXT_names[] = "iiip\0" /* Parameter signature */ "glGetFramebufferAttachmentParameteriv\0" "glGetFramebufferAttachmentParameterivEXT\0" @@ -3182,14 +3252,14 @@ static const char GetFramebufferAttachmentParameterivEXT_names[] = #endif #if defined(need_GL_EXT_pixel_transform) -static const char PixelTransformParameterfEXT_names[] = +static const char PixelTransformParameterfEXT_names[] = "iif\0" /* Parameter signature */ "glPixelTransformParameterfEXT\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord4dvARB_names[] = +static const char MultiTexCoord4dvARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord4dv\0" "glMultiTexCoord4dvARB\0" @@ -3197,21 +3267,21 @@ static const char MultiTexCoord4dvARB_names[] = #endif #if defined(need_GL_EXT_pixel_transform) -static const char PixelTransformParameteriEXT_names[] = +static const char PixelTransformParameteriEXT_names[] = "iii\0" /* Parameter signature */ "glPixelTransformParameteriEXT\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord2fColor4ubVertex3fSUN_names[] = +static const char TexCoord2fColor4ubVertex3fSUN_names[] = "ffiiiifff\0" /* Parameter signature */ "glTexCoord2fColor4ubVertex3fSUN\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform1iARB_names[] = +static const char Uniform1iARB_names[] = "ii\0" /* Parameter signature */ "glUniform1i\0" "glUniform1iARB\0" @@ -3219,7 +3289,7 @@ static const char Uniform1iARB_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttribPointerARB_names[] = +static const char VertexAttribPointerARB_names[] = "iiiiip\0" /* Parameter signature */ "glVertexAttribPointer\0" "glVertexAttribPointerARB\0" @@ -3227,14 +3297,14 @@ static const char VertexAttribPointerARB_names[] = #endif #if defined(need_GL_SGIS_sharpen_texture) -static const char SharpenTexFuncSGIS_names[] = +static const char SharpenTexFuncSGIS_names[] = "iip\0" /* Parameter signature */ "glSharpenTexFuncSGIS\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord4fvARB_names[] = +static const char MultiTexCoord4fvARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord4fv\0" "glMultiTexCoord4fvARB\0" @@ -3242,56 +3312,56 @@ static const char MultiTexCoord4fvARB_names[] = #endif #if defined(need_GL_VERSION_2_1) -static const char UniformMatrix2x3fv_names[] = +static const char UniformMatrix2x3fv_names[] = "iiip\0" /* Parameter signature */ "glUniformMatrix2x3fv\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char TrackMatrixNV_names[] = +static const char TrackMatrixNV_names[] = "iiii\0" /* Parameter signature */ "glTrackMatrixNV\0" ""; #endif #if defined(need_GL_NV_register_combiners) -static const char CombinerParameteriNV_names[] = +static const char CombinerParameteriNV_names[] = "ii\0" /* Parameter signature */ "glCombinerParameteriNV\0" ""; #endif #if defined(need_GL_SGIX_async) -static const char DeleteAsyncMarkersSGIX_names[] = +static const char DeleteAsyncMarkersSGIX_names[] = "ii\0" /* Parameter signature */ "glDeleteAsyncMarkersSGIX\0" ""; #endif #if defined(need_GL_SGIX_async) -static const char IsAsyncMarkerSGIX_names[] = +static const char IsAsyncMarkerSGIX_names[] = "i\0" /* Parameter signature */ "glIsAsyncMarkerSGIX\0" ""; #endif #if defined(need_GL_SGIX_framezoom) -static const char FrameZoomSGIX_names[] = +static const char FrameZoomSGIX_names[] = "i\0" /* Parameter signature */ "glFrameZoomSGIX\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char Normal3fVertex3fvSUN_names[] = +static const char Normal3fVertex3fvSUN_names[] = "pp\0" /* Parameter signature */ "glNormal3fVertex3fvSUN\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4NsvARB_names[] = +static const char VertexAttrib4NsvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4Nsv\0" "glVertexAttrib4NsvARB\0" @@ -3299,15 +3369,22 @@ static const char VertexAttrib4NsvARB_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib3fvARB_names[] = +static const char VertexAttrib3fvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib3fv\0" "glVertexAttrib3fvARB\0" ""; #endif +#if defined(need_GL_ARB_sync) +static const char GetSynciv_names[] = + "iiipp\0" /* Parameter signature */ + "glGetSynciv\0" + ""; +#endif + #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char DeleteFramebuffersEXT_names[] = +static const char DeleteFramebuffersEXT_names[] = "ip\0" /* Parameter signature */ "glDeleteFramebuffers\0" "glDeleteFramebuffersEXT\0" @@ -3315,14 +3392,14 @@ static const char DeleteFramebuffersEXT_names[] = #endif #if defined(need_GL_SUN_global_alpha) -static const char GlobalAlphaFactorsSUN_names[] = +static const char GlobalAlphaFactorsSUN_names[] = "i\0" /* Parameter signature */ "glGlobalAlphaFactorsSUN\0" ""; #endif #if defined(need_GL_EXT_texture3D) -static const char TexSubImage3D_names[] = +static const char TexSubImage3D_names[] = "iiiiiiiiiip\0" /* Parameter signature */ "glTexSubImage3D\0" "glTexSubImage3DEXT\0" @@ -3330,14 +3407,14 @@ static const char TexSubImage3D_names[] = #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Tangent3fEXT_names[] = +static const char Tangent3fEXT_names[] = "fff\0" /* Parameter signature */ "glTangent3fEXT\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3uivEXT_names[] = +static const char SecondaryColor3uivEXT_names[] = "p\0" /* Parameter signature */ "glSecondaryColor3uiv\0" "glSecondaryColor3uivEXT\0" @@ -3345,35 +3422,35 @@ static const char SecondaryColor3uivEXT_names[] = #endif #if defined(need_GL_ARB_matrix_palette) -static const char MatrixIndexubvARB_names[] = +static const char MatrixIndexubvARB_names[] = "ip\0" /* Parameter signature */ "glMatrixIndexubvARB\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char Color4fNormal3fVertex3fSUN_names[] = +static const char Color4fNormal3fVertex3fSUN_names[] = "ffffffffff\0" /* Parameter signature */ "glColor4fNormal3fVertex3fSUN\0" ""; #endif #if defined(need_GL_SGIS_pixel_texture) -static const char PixelTexGenParameterfSGIS_names[] = +static const char PixelTexGenParameterfSGIS_names[] = "if\0" /* Parameter signature */ "glPixelTexGenParameterfSGIS\0" ""; #endif #if defined(need_GL_VERSION_2_0) -static const char CreateShader_names[] = +static const char CreateShader_names[] = "i\0" /* Parameter signature */ "glCreateShader\0" ""; #endif #if defined(need_GL_SGI_color_table) || defined(need_GL_EXT_paletted_texture) -static const char GetColorTableParameterfv_names[] = +static const char GetColorTableParameterfv_names[] = "iip\0" /* Parameter signature */ "glGetColorTableParameterfv\0" "glGetColorTableParameterfvSGI\0" @@ -3382,14 +3459,14 @@ static const char GetColorTableParameterfv_names[] = #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentLightModelfvSGIX_names[] = +static const char FragmentLightModelfvSGIX_names[] = "ip\0" /* Parameter signature */ "glFragmentLightModelfvSGIX\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord3fARB_names[] = +static const char MultiTexCoord3fARB_names[] = "ifff\0" /* Parameter signature */ "glMultiTexCoord3f\0" "glMultiTexCoord3fARB\0" @@ -3397,14 +3474,14 @@ static const char MultiTexCoord3fARB_names[] = #endif #if defined(need_GL_SGIS_pixel_texture) -static const char GetPixelTexGenParameterfvSGIS_names[] = +static const char GetPixelTexGenParameterfvSGIS_names[] = "ip\0" /* Parameter signature */ "glGetPixelTexGenParameterfvSGIS\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char GenFramebuffersEXT_names[] = +static const char GenFramebuffersEXT_names[] = "ip\0" /* Parameter signature */ "glGenFramebuffers\0" "glGenFramebuffersEXT\0" @@ -3412,14 +3489,14 @@ static const char GenFramebuffersEXT_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char GetProgramParameterdvNV_names[] = +static const char GetProgramParameterdvNV_names[] = "iiip\0" /* Parameter signature */ "glGetProgramParameterdvNV\0" ""; #endif #if defined(need_GL_ARB_vertex_array_object) || defined(need_GL_APPLE_vertex_array_object) -static const char IsVertexArrayAPPLE_names[] = +static const char IsVertexArrayAPPLE_names[] = "i\0" /* Parameter signature */ "glIsVertexArray\0" "glIsVertexArrayAPPLE\0" @@ -3427,21 +3504,21 @@ static const char IsVertexArrayAPPLE_names[] = #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentLightfvSGIX_names[] = +static const char FragmentLightfvSGIX_names[] = "iip\0" /* Parameter signature */ "glFragmentLightfvSGIX\0" ""; #endif #if defined(need_GL_VERSION_2_0) -static const char DetachShader_names[] = +static const char DetachShader_names[] = "ii\0" /* Parameter signature */ "glDetachShader\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4NubARB_names[] = +static const char VertexAttrib4NubARB_names[] = "iiiii\0" /* Parameter signature */ "glVertexAttrib4Nub\0" "glVertexAttrib4NubARB\0" @@ -3449,28 +3526,28 @@ static const char VertexAttrib4NubARB_names[] = #endif #if defined(need_GL_ARB_vertex_program) -static const char GetProgramEnvParameterfvARB_names[] = +static const char GetProgramEnvParameterfvARB_names[] = "iip\0" /* Parameter signature */ "glGetProgramEnvParameterfvARB\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char GetTrackMatrixivNV_names[] = +static const char GetTrackMatrixivNV_names[] = "iiip\0" /* Parameter signature */ "glGetTrackMatrixivNV\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib3svNV_names[] = +static const char VertexAttrib3svNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib3svNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform4fvARB_names[] = +static const char Uniform4fvARB_names[] = "iip\0" /* Parameter signature */ "glUniform4fv\0" "glUniform4fvARB\0" @@ -3478,7 +3555,7 @@ static const char Uniform4fvARB_names[] = #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_transpose_matrix) -static const char MultTransposeMatrixfARB_names[] = +static const char MultTransposeMatrixfARB_names[] = "p\0" /* Parameter signature */ "glMultTransposeMatrixf\0" "glMultTransposeMatrixfARB\0" @@ -3486,14 +3563,14 @@ static const char MultTransposeMatrixfARB_names[] = #endif #if defined(need_GL_ATI_fragment_shader) -static const char ColorFragmentOp1ATI_names[] = +static const char ColorFragmentOp1ATI_names[] = "iiiiiii\0" /* Parameter signature */ "glColorFragmentOp1ATI\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char GetUniformfvARB_names[] = +static const char GetUniformfvARB_names[] = "iip\0" /* Parameter signature */ "glGetUniformfv\0" "glGetUniformfvARB\0" @@ -3501,28 +3578,28 @@ static const char GetUniformfvARB_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN_names[] = +static const char ReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN_names[] = "iffffffffffff\0" /* Parameter signature */ "glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN\0" ""; #endif #if defined(need_GL_ARB_shader_objects) -static const char DetachObjectARB_names[] = +static const char DetachObjectARB_names[] = "ii\0" /* Parameter signature */ "glDetachObjectARB\0" ""; #endif #if defined(need_GL_ARB_vertex_blend) -static const char VertexBlendARB_names[] = +static const char VertexBlendARB_names[] = "i\0" /* Parameter signature */ "glVertexBlendARB\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos3iMESA_names[] = +static const char WindowPos3iMESA_names[] = "iii\0" /* Parameter signature */ "glWindowPos3i\0" "glWindowPos3iARB\0" @@ -3531,7 +3608,7 @@ static const char WindowPos3iMESA_names[] = #endif #if defined(need_GL_EXT_convolution) -static const char SeparableFilter2D_names[] = +static const char SeparableFilter2D_names[] = "iiiiiipp\0" /* Parameter signature */ "glSeparableFilter2D\0" "glSeparableFilter2DEXT\0" @@ -3539,14 +3616,14 @@ static const char SeparableFilter2D_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiColor4ubVertex3fvSUN_names[] = +static const char ReplacementCodeuiColor4ubVertex3fvSUN_names[] = "ppp\0" /* Parameter signature */ "glReplacementCodeuiColor4ubVertex3fvSUN\0" ""; #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_texture_compression) -static const char CompressedTexImage2DARB_names[] = +static const char CompressedTexImage2DARB_names[] = "iiiiiiip\0" /* Parameter signature */ "glCompressedTexImage2D\0" "glCompressedTexImage2DARB\0" @@ -3554,7 +3631,7 @@ static const char CompressedTexImage2DARB_names[] = #endif #if defined(need_GL_EXT_vertex_array) -static const char ArrayElement_names[] = +static const char ArrayElement_names[] = "i\0" /* Parameter signature */ "glArrayElement\0" "glArrayElementEXT\0" @@ -3562,35 +3639,35 @@ static const char ArrayElement_names[] = #endif #if defined(need_GL_EXT_depth_bounds_test) -static const char DepthBoundsEXT_names[] = +static const char DepthBoundsEXT_names[] = "dd\0" /* Parameter signature */ "glDepthBoundsEXT\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char ProgramParameters4fvNV_names[] = +static const char ProgramParameters4fvNV_names[] = "iiip\0" /* Parameter signature */ "glProgramParameters4fvNV\0" ""; #endif #if defined(need_GL_SGIX_polynomial_ffd) -static const char DeformationMap3fSGIX_names[] = +static const char DeformationMap3fSGIX_names[] = "iffiiffiiffiip\0" /* Parameter signature */ "glDeformationMap3fSGIX\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char GetProgramivNV_names[] = +static const char GetProgramivNV_names[] = "iip\0" /* Parameter signature */ "glGetProgramivNV\0" ""; #endif #if defined(need_GL_EXT_histogram) -static const char GetMinmaxParameteriv_names[] = +static const char GetMinmaxParameteriv_names[] = "iip\0" /* Parameter signature */ "glGetMinmaxParameteriv\0" "glGetMinmaxParameterivEXT\0" @@ -3598,7 +3675,7 @@ static const char GetMinmaxParameteriv_names[] = #endif #if defined(need_GL_EXT_copy_texture) -static const char CopyTexImage1D_names[] = +static const char CopyTexImage1D_names[] = "iiiiiii\0" /* Parameter signature */ "glCopyTexImage1D\0" "glCopyTexImage1DEXT\0" @@ -3606,42 +3683,42 @@ static const char CopyTexImage1D_names[] = #endif #if defined(need_GL_ATI_fragment_shader) -static const char AlphaFragmentOp3ATI_names[] = +static const char AlphaFragmentOp3ATI_names[] = "iiiiiiiiiiii\0" /* Parameter signature */ "glAlphaFragmentOp3ATI\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char GetVertexAttribdvNV_names[] = +static const char GetVertexAttribdvNV_names[] = "iip\0" /* Parameter signature */ "glGetVertexAttribdvNV\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib3fvNV_names[] = +static const char VertexAttrib3fvNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib3fvNV\0" ""; #endif #if defined(need_GL_NV_register_combiners) -static const char GetFinalCombinerInputParameterivNV_names[] = +static const char GetFinalCombinerInputParameterivNV_names[] = "iip\0" /* Parameter signature */ "glGetFinalCombinerInputParameterivNV\0" ""; #endif #if defined(need_GL_NV_evaluators) -static const char GetMapParameterivNV_names[] = +static const char GetMapParameterivNV_names[] = "iip\0" /* Parameter signature */ "glGetMapParameterivNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform4iARB_names[] = +static const char Uniform4iARB_names[] = "iiiii\0" /* Parameter signature */ "glUniform4i\0" "glUniform4iARB\0" @@ -3649,7 +3726,7 @@ static const char Uniform4iARB_names[] = #endif #if defined(need_GL_EXT_convolution) -static const char ConvolutionParameteri_names[] = +static const char ConvolutionParameteri_names[] = "iii\0" /* Parameter signature */ "glConvolutionParameteri\0" "glConvolutionParameteriEXT\0" @@ -3657,14 +3734,14 @@ static const char ConvolutionParameteri_names[] = #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Binormal3sEXT_names[] = +static const char Binormal3sEXT_names[] = "iii\0" /* Parameter signature */ "glBinormal3sEXT\0" ""; #endif #if defined(need_GL_EXT_convolution) -static const char ConvolutionParameterf_names[] = +static const char ConvolutionParameterf_names[] = "iif\0" /* Parameter signature */ "glConvolutionParameterf\0" "glConvolutionParameterfEXT\0" @@ -3672,7 +3749,7 @@ static const char ConvolutionParameterf_names[] = #endif #if defined(need_GL_SGI_color_table) || defined(need_GL_EXT_paletted_texture) -static const char GetColorTableParameteriv_names[] = +static const char GetColorTableParameteriv_names[] = "iip\0" /* Parameter signature */ "glGetColorTableParameteriv\0" "glGetColorTableParameterivSGI\0" @@ -3681,7 +3758,7 @@ static const char GetColorTableParameteriv_names[] = #endif #if defined(need_GL_ARB_vertex_program) || defined(need_GL_NV_vertex_program) -static const char ProgramEnvParameter4dvARB_names[] = +static const char ProgramEnvParameter4dvARB_names[] = "iip\0" /* Parameter signature */ "glProgramEnvParameter4dvARB\0" "glProgramParameter4dvNV\0" @@ -3689,14 +3766,14 @@ static const char ProgramEnvParameter4dvARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs2fvNV_names[] = +static const char VertexAttribs2fvNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs2fvNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char UseProgramObjectARB_names[] = +static const char UseProgramObjectARB_names[] = "i\0" /* Parameter signature */ "glUseProgram\0" "glUseProgramObjectARB\0" @@ -3704,42 +3781,42 @@ static const char UseProgramObjectARB_names[] = #endif #if defined(need_GL_NV_evaluators) -static const char GetMapParameterfvNV_names[] = +static const char GetMapParameterfvNV_names[] = "iip\0" /* Parameter signature */ "glGetMapParameterfvNV\0" ""; #endif #if defined(need_GL_ATI_fragment_shader) -static const char PassTexCoordATI_names[] = +static const char PassTexCoordATI_names[] = "iii\0" /* Parameter signature */ "glPassTexCoordATI\0" ""; #endif #if defined(need_GL_VERSION_2_0) -static const char DeleteProgram_names[] = +static const char DeleteProgram_names[] = "i\0" /* Parameter signature */ "glDeleteProgram\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Tangent3ivEXT_names[] = +static const char Tangent3ivEXT_names[] = "p\0" /* Parameter signature */ "glTangent3ivEXT\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Tangent3dEXT_names[] = +static const char Tangent3dEXT_names[] = "ddd\0" /* Parameter signature */ "glTangent3dEXT\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3dvEXT_names[] = +static const char SecondaryColor3dvEXT_names[] = "p\0" /* Parameter signature */ "glSecondaryColor3dv\0" "glSecondaryColor3dvEXT\0" @@ -3747,7 +3824,7 @@ static const char SecondaryColor3dvEXT_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_multi_draw_arrays) -static const char MultiDrawArraysEXT_names[] = +static const char MultiDrawArraysEXT_names[] = "ippi\0" /* Parameter signature */ "glMultiDrawArrays\0" "glMultiDrawArraysEXT\0" @@ -3755,7 +3832,7 @@ static const char MultiDrawArraysEXT_names[] = #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char BindRenderbufferEXT_names[] = +static const char BindRenderbufferEXT_names[] = "ii\0" /* Parameter signature */ "glBindRenderbuffer\0" "glBindRenderbufferEXT\0" @@ -3763,7 +3840,7 @@ static const char BindRenderbufferEXT_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord4dARB_names[] = +static const char MultiTexCoord4dARB_names[] = "idddd\0" /* Parameter signature */ "glMultiTexCoord4d\0" "glMultiTexCoord4dARB\0" @@ -3771,7 +3848,7 @@ static const char MultiTexCoord4dARB_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3usEXT_names[] = +static const char SecondaryColor3usEXT_names[] = "iii\0" /* Parameter signature */ "glSecondaryColor3us\0" "glSecondaryColor3usEXT\0" @@ -3779,14 +3856,14 @@ static const char SecondaryColor3usEXT_names[] = #endif #if defined(need_GL_ARB_vertex_program) -static const char ProgramLocalParameter4fvARB_names[] = +static const char ProgramLocalParameter4fvARB_names[] = "iip\0" /* Parameter signature */ "glProgramLocalParameter4fvARB\0" ""; #endif #if defined(need_GL_ARB_vertex_program) || defined(need_GL_NV_vertex_program) -static const char DeleteProgramsNV_names[] = +static const char DeleteProgramsNV_names[] = "ip\0" /* Parameter signature */ "glDeleteProgramsARB\0" "glDeleteProgramsNV\0" @@ -3794,7 +3871,7 @@ static const char DeleteProgramsNV_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord1sARB_names[] = +static const char MultiTexCoord1sARB_names[] = "ii\0" /* Parameter signature */ "glMultiTexCoord1s\0" "glMultiTexCoord1sARB\0" @@ -3802,14 +3879,14 @@ static const char MultiTexCoord1sARB_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiColor3fVertex3fSUN_names[] = +static const char ReplacementCodeuiColor3fVertex3fSUN_names[] = "iffffff\0" /* Parameter signature */ "glReplacementCodeuiColor3fVertex3fSUN\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) || defined(need_GL_NV_vertex_program) -static const char GetVertexAttribPointervNV_names[] = +static const char GetVertexAttribPointervNV_names[] = "iip\0" /* Parameter signature */ "glGetVertexAttribPointerv\0" "glGetVertexAttribPointervARB\0" @@ -3818,7 +3895,7 @@ static const char GetVertexAttribPointervNV_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord1dvARB_names[] = +static const char MultiTexCoord1dvARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord1dv\0" "glMultiTexCoord1dvARB\0" @@ -3826,7 +3903,7 @@ static const char MultiTexCoord1dvARB_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform2iARB_names[] = +static const char Uniform2iARB_names[] = "iii\0" /* Parameter signature */ "glUniform2i\0" "glUniform2iARB\0" @@ -3834,57 +3911,64 @@ static const char Uniform2iARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char GetProgramStringNV_names[] = +static const char GetProgramStringNV_names[] = "iip\0" /* Parameter signature */ "glGetProgramStringNV\0" ""; #endif #if defined(need_GL_EXT_vertex_array) -static const char ColorPointerEXT_names[] = +static const char ColorPointerEXT_names[] = "iiiip\0" /* Parameter signature */ "glColorPointerEXT\0" ""; #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) -static const char MapBufferARB_names[] = +static const char MapBufferARB_names[] = "ii\0" /* Parameter signature */ "glMapBuffer\0" "glMapBufferARB\0" ""; #endif +#if defined(need_GL_ARB_draw_elements_base_vertex) +static const char MultiDrawElementsBaseVertex_names[] = + "ipipip\0" /* Parameter signature */ + "glMultiDrawElementsBaseVertex\0" + ""; +#endif + #if defined(need_GL_EXT_coordinate_frame) -static const char Binormal3svEXT_names[] = +static const char Binormal3svEXT_names[] = "p\0" /* Parameter signature */ "glBinormal3svEXT\0" ""; #endif #if defined(need_GL_EXT_light_texture) -static const char ApplyTextureEXT_names[] = +static const char ApplyTextureEXT_names[] = "i\0" /* Parameter signature */ "glApplyTextureEXT\0" ""; #endif #if defined(need_GL_EXT_light_texture) -static const char TextureMaterialEXT_names[] = +static const char TextureMaterialEXT_names[] = "ii\0" /* Parameter signature */ "glTextureMaterialEXT\0" ""; #endif #if defined(need_GL_EXT_light_texture) -static const char TextureLightEXT_names[] = +static const char TextureLightEXT_names[] = "i\0" /* Parameter signature */ "glTextureLightEXT\0" ""; #endif #if defined(need_GL_EXT_histogram) -static const char ResetMinmax_names[] = +static const char ResetMinmax_names[] = "i\0" /* Parameter signature */ "glResetMinmax\0" "glResetMinmaxEXT\0" @@ -3892,21 +3976,21 @@ static const char ResetMinmax_names[] = #endif #if defined(need_GL_SGIX_sprite) -static const char SpriteParameterfSGIX_names[] = +static const char SpriteParameterfSGIX_names[] = "if\0" /* Parameter signature */ "glSpriteParameterfSGIX\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib4sNV_names[] = +static const char VertexAttrib4sNV_names[] = "iiiii\0" /* Parameter signature */ "glVertexAttrib4sNV\0" ""; #endif #if defined(need_GL_EXT_convolution) -static const char GetConvolutionParameterfv_names[] = +static const char GetConvolutionParameterfv_names[] = "iip\0" /* Parameter signature */ "glGetConvolutionParameterfv\0" "glGetConvolutionParameterfvEXT\0" @@ -3914,21 +3998,21 @@ static const char GetConvolutionParameterfv_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs4dvNV_names[] = +static const char VertexAttribs4dvNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs4dvNV\0" ""; #endif #if defined(need_GL_IBM_multimode_draw_arrays) -static const char MultiModeDrawArraysIBM_names[] = +static const char MultiModeDrawArraysIBM_names[] = "pppii\0" /* Parameter signature */ "glMultiModeDrawArraysIBM\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4dARB_names[] = +static const char VertexAttrib4dARB_names[] = "idddd\0" /* Parameter signature */ "glVertexAttrib4d\0" "glVertexAttrib4dARB\0" @@ -3936,35 +4020,35 @@ static const char VertexAttrib4dARB_names[] = #endif #if defined(need_GL_ATI_envmap_bumpmap) -static const char GetTexBumpParameterfvATI_names[] = +static const char GetTexBumpParameterfvATI_names[] = "ip\0" /* Parameter signature */ "glGetTexBumpParameterfvATI\0" ""; #endif #if defined(need_GL_NV_fragment_program) -static const char ProgramNamedParameter4dNV_names[] = +static const char ProgramNamedParameter4dNV_names[] = "iipdddd\0" /* Parameter signature */ "glProgramNamedParameter4dNV\0" ""; #endif #if defined(need_GL_EXT_vertex_weighting) -static const char VertexWeightfEXT_names[] = +static const char VertexWeightfEXT_names[] = "f\0" /* Parameter signature */ "glVertexWeightfEXT\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Binormal3fEXT_names[] = +static const char Binormal3fEXT_names[] = "fff\0" /* Parameter signature */ "glBinormal3fEXT\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_fog_coord) -static const char FogCoordfvEXT_names[] = +static const char FogCoordfvEXT_names[] = "p\0" /* Parameter signature */ "glFogCoordfv\0" "glFogCoordfvEXT\0" @@ -3972,7 +4056,7 @@ static const char FogCoordfvEXT_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord1ivARB_names[] = +static const char MultiTexCoord1ivARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord1iv\0" "glMultiTexCoord1ivARB\0" @@ -3980,7 +4064,7 @@ static const char MultiTexCoord1ivARB_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3ubEXT_names[] = +static const char SecondaryColor3ubEXT_names[] = "iii\0" /* Parameter signature */ "glSecondaryColor3ub\0" "glSecondaryColor3ubEXT\0" @@ -3988,7 +4072,7 @@ static const char SecondaryColor3ubEXT_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord2ivARB_names[] = +static const char MultiTexCoord2ivARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord2iv\0" "glMultiTexCoord2ivARB\0" @@ -3996,14 +4080,14 @@ static const char MultiTexCoord2ivARB_names[] = #endif #if defined(need_GL_SGIS_fog_function) -static const char FogFuncSGIS_names[] = +static const char FogFuncSGIS_names[] = "ip\0" /* Parameter signature */ "glFogFuncSGIS\0" ""; #endif #if defined(need_GL_EXT_copy_texture) -static const char CopyTexSubImage2D_names[] = +static const char CopyTexSubImage2D_names[] = "iiiiiiii\0" /* Parameter signature */ "glCopyTexSubImage2D\0" "glCopyTexSubImage2DEXT\0" @@ -4011,35 +4095,35 @@ static const char CopyTexSubImage2D_names[] = #endif #if defined(need_GL_ARB_shader_objects) -static const char GetObjectParameterivARB_names[] = +static const char GetObjectParameterivARB_names[] = "iip\0" /* Parameter signature */ "glGetObjectParameterivARB\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord4fVertex4fSUN_names[] = +static const char TexCoord4fVertex4fSUN_names[] = "ffffffff\0" /* Parameter signature */ "glTexCoord4fVertex4fSUN\0" ""; #endif #if defined(need_GL_APPLE_vertex_array_object) -static const char BindVertexArrayAPPLE_names[] = +static const char BindVertexArrayAPPLE_names[] = "i\0" /* Parameter signature */ "glBindVertexArrayAPPLE\0" ""; #endif #if defined(need_GL_ARB_vertex_program) -static const char GetProgramLocalParameterdvARB_names[] = +static const char GetProgramLocalParameterdvARB_names[] = "iip\0" /* Parameter signature */ "glGetProgramLocalParameterdvARB\0" ""; #endif #if defined(need_GL_EXT_histogram) -static const char GetHistogramParameteriv_names[] = +static const char GetHistogramParameteriv_names[] = "iip\0" /* Parameter signature */ "glGetHistogramParameteriv\0" "glGetHistogramParameterivEXT\0" @@ -4047,7 +4131,7 @@ static const char GetHistogramParameteriv_names[] = #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord1iARB_names[] = +static const char MultiTexCoord1iARB_names[] = "ii\0" /* Parameter signature */ "glMultiTexCoord1i\0" "glMultiTexCoord1iARB\0" @@ -4055,7 +4139,7 @@ static const char MultiTexCoord1iARB_names[] = #endif #if defined(need_GL_EXT_convolution) -static const char GetConvolutionFilter_names[] = +static const char GetConvolutionFilter_names[] = "iiip\0" /* Parameter signature */ "glGetConvolutionFilter\0" "glGetConvolutionFilterEXT\0" @@ -4063,14 +4147,14 @@ static const char GetConvolutionFilter_names[] = #endif #if defined(need_GL_ARB_vertex_program) -static const char GetProgramivARB_names[] = +static const char GetProgramivARB_names[] = "iip\0" /* Parameter signature */ "glGetProgramivARB\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_blend_func_separate) || defined(need_GL_INGR_blend_func_separate) -static const char BlendFuncSeparateEXT_names[] = +static const char BlendFuncSeparateEXT_names[] = "iiii\0" /* Parameter signature */ "glBlendFuncSeparate\0" "glBlendFuncSeparateEXT\0" @@ -4079,49 +4163,49 @@ static const char BlendFuncSeparateEXT_names[] = #endif #if defined(need_GL_ARB_map_buffer_range) -static const char MapBufferRange_names[] = +static const char MapBufferRange_names[] = "iiii\0" /* Parameter signature */ "glMapBufferRange\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char ProgramParameters4dvNV_names[] = +static const char ProgramParameters4dvNV_names[] = "iiip\0" /* Parameter signature */ "glProgramParameters4dvNV\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord2fColor3fVertex3fvSUN_names[] = +static const char TexCoord2fColor3fVertex3fvSUN_names[] = "ppp\0" /* Parameter signature */ "glTexCoord2fColor3fVertex3fvSUN\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Binormal3dvEXT_names[] = +static const char Binormal3dvEXT_names[] = "p\0" /* Parameter signature */ "glBinormal3dvEXT\0" ""; #endif #if defined(need_GL_NV_fence) -static const char FinishFenceNV_names[] = +static const char FinishFenceNV_names[] = "i\0" /* Parameter signature */ "glFinishFenceNV\0" ""; #endif #if defined(need_GL_SGIS_fog_function) -static const char GetFogFuncSGIS_names[] = +static const char GetFogFuncSGIS_names[] = "p\0" /* Parameter signature */ "glGetFogFuncSGIS\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char GetUniformLocationARB_names[] = +static const char GetUniformLocationARB_names[] = "ip\0" /* Parameter signature */ "glGetUniformLocation\0" "glGetUniformLocationARB\0" @@ -4129,7 +4213,7 @@ static const char GetUniformLocationARB_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3fEXT_names[] = +static const char SecondaryColor3fEXT_names[] = "fff\0" /* Parameter signature */ "glSecondaryColor3f\0" "glSecondaryColor3fEXT\0" @@ -4137,14 +4221,14 @@ static const char SecondaryColor3fEXT_names[] = #endif #if defined(need_GL_NV_register_combiners) -static const char CombinerInputNV_names[] = +static const char CombinerInputNV_names[] = "iiiiii\0" /* Parameter signature */ "glCombinerInputNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib3sARB_names[] = +static const char VertexAttrib3sARB_names[] = "iiii\0" /* Parameter signature */ "glVertexAttrib3s\0" "glVertexAttrib3sARB\0" @@ -4152,49 +4236,49 @@ static const char VertexAttrib3sARB_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiNormal3fVertex3fvSUN_names[] = +static const char ReplacementCodeuiNormal3fVertex3fvSUN_names[] = "ppp\0" /* Parameter signature */ "glReplacementCodeuiNormal3fVertex3fvSUN\0" ""; #endif #if defined(need_GL_ARB_vertex_program) -static const char ProgramStringARB_names[] = +static const char ProgramStringARB_names[] = "iiip\0" /* Parameter signature */ "glProgramStringARB\0" ""; #endif #if defined(need_GL_SUN_vertex) -static const char TexCoord4fVertex4fvSUN_names[] = +static const char TexCoord4fVertex4fvSUN_names[] = "pp\0" /* Parameter signature */ "glTexCoord4fVertex4fvSUN\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib3sNV_names[] = +static const char VertexAttrib3sNV_names[] = "iiii\0" /* Parameter signature */ "glVertexAttrib3sNV\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib1fNV_names[] = +static const char VertexAttrib1fNV_names[] = "if\0" /* Parameter signature */ "glVertexAttrib1fNV\0" ""; #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentLightfSGIX_names[] = +static const char FragmentLightfSGIX_names[] = "iif\0" /* Parameter signature */ "glFragmentLightfSGIX\0" ""; #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_texture_compression) -static const char GetCompressedTexImageARB_names[] = +static const char GetCompressedTexImageARB_names[] = "iip\0" /* Parameter signature */ "glGetCompressedTexImage\0" "glGetCompressedTexImageARB\0" @@ -4202,14 +4286,14 @@ static const char GetCompressedTexImageARB_names[] = #endif #if defined(need_GL_EXT_vertex_weighting) -static const char VertexWeightPointerEXT_names[] = +static const char VertexWeightPointerEXT_names[] = "iiip\0" /* Parameter signature */ "glVertexWeightPointerEXT\0" ""; #endif #if defined(need_GL_EXT_histogram) -static const char GetHistogram_names[] = +static const char GetHistogram_names[] = "iiiip\0" /* Parameter signature */ "glGetHistogram\0" "glGetHistogramEXT\0" @@ -4217,21 +4301,21 @@ static const char GetHistogram_names[] = #endif #if defined(need_GL_EXT_stencil_two_side) -static const char ActiveStencilFaceEXT_names[] = +static const char ActiveStencilFaceEXT_names[] = "i\0" /* Parameter signature */ "glActiveStencilFaceEXT\0" ""; #endif #if defined(need_GL_ATI_separate_stencil) -static const char StencilFuncSeparateATI_names[] = +static const char StencilFuncSeparateATI_names[] = "iiii\0" /* Parameter signature */ "glStencilFuncSeparateATI\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char GetShaderSourceARB_names[] = +static const char GetShaderSourceARB_names[] = "iipp\0" /* Parameter signature */ "glGetShaderSource\0" "glGetShaderSourceARB\0" @@ -4239,28 +4323,28 @@ static const char GetShaderSourceARB_names[] = #endif #if defined(need_GL_SGIX_igloo_interface) -static const char IglooInterfaceSGIX_names[] = +static const char IglooInterfaceSGIX_names[] = "ip\0" /* Parameter signature */ "glIglooInterfaceSGIX\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib4dNV_names[] = +static const char VertexAttrib4dNV_names[] = "idddd\0" /* Parameter signature */ "glVertexAttrib4dNV\0" ""; #endif #if defined(need_GL_IBM_multimode_draw_arrays) -static const char MultiModeDrawElementsIBM_names[] = +static const char MultiModeDrawElementsIBM_names[] = "ppipii\0" /* Parameter signature */ "glMultiModeDrawElementsIBM\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord4svARB_names[] = +static const char MultiTexCoord4svARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord4sv\0" "glMultiTexCoord4svARB\0" @@ -4268,7 +4352,7 @@ static const char MultiTexCoord4svARB_names[] = #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_occlusion_query) -static const char GenQueriesARB_names[] = +static const char GenQueriesARB_names[] = "ip\0" /* Parameter signature */ "glGenQueries\0" "glGenQueriesARB\0" @@ -4276,35 +4360,42 @@ static const char GenQueriesARB_names[] = #endif #if defined(need_GL_SUN_vertex) -static const char ReplacementCodeuiVertex3fSUN_names[] = +static const char ReplacementCodeuiVertex3fSUN_names[] = "ifff\0" /* Parameter signature */ "glReplacementCodeuiVertex3fSUN\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Tangent3iEXT_names[] = +static const char Tangent3iEXT_names[] = "iii\0" /* Parameter signature */ "glTangent3iEXT\0" ""; #endif #if defined(need_GL_SUN_mesh_array) -static const char DrawMeshArraysSUN_names[] = +static const char DrawMeshArraysSUN_names[] = "iiii\0" /* Parameter signature */ "glDrawMeshArraysSUN\0" ""; #endif +#if defined(need_GL_ARB_sync) +static const char IsSync_names[] = + "i\0" /* Parameter signature */ + "glIsSync\0" + ""; +#endif + #if defined(need_GL_NV_evaluators) -static const char GetMapControlPointsNV_names[] = +static const char GetMapControlPointsNV_names[] = "iiiiiip\0" /* Parameter signature */ "glGetMapControlPointsNV\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_draw_buffers) || defined(need_GL_ATI_draw_buffers) -static const char DrawBuffersARB_names[] = +static const char DrawBuffersARB_names[] = "ip\0" /* Parameter signature */ "glDrawBuffers\0" "glDrawBuffersARB\0" @@ -4313,28 +4404,29 @@ static const char DrawBuffersARB_names[] = #endif #if defined(need_GL_ARB_vertex_program) -static const char ProgramLocalParameter4fARB_names[] = +static const char ProgramLocalParameter4fARB_names[] = "iiffff\0" /* Parameter signature */ "glProgramLocalParameter4fARB\0" ""; #endif #if defined(need_GL_SGIX_sprite) -static const char SpriteParameterivSGIX_names[] = +static const char SpriteParameterivSGIX_names[] = "ip\0" /* Parameter signature */ "glSpriteParameterivSGIX\0" ""; #endif -#if defined(need_GL_EXT_provoking_vertex) -static const char ProvokingVertexEXT_names[] = +#if defined(need_GL_EXT_provoking_vertex) || defined(need_GL_ARB_provoking_vertex) +static const char ProvokingVertexEXT_names[] = "i\0" /* Parameter signature */ "glProvokingVertexEXT\0" + "glProvokingVertex\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord1fARB_names[] = +static const char MultiTexCoord1fARB_names[] = "if\0" /* Parameter signature */ "glMultiTexCoord1f\0" "glMultiTexCoord1fARB\0" @@ -4342,21 +4434,21 @@ static const char MultiTexCoord1fARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs4ubvNV_names[] = +static const char VertexAttribs4ubvNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs4ubvNV\0" ""; #endif #if defined(need_GL_ARB_vertex_blend) -static const char WeightsvARB_names[] = +static const char WeightsvARB_names[] = "ip\0" /* Parameter signature */ "glWeightsvARB\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects) -static const char Uniform1fvARB_names[] = +static const char Uniform1fvARB_names[] = "iip\0" /* Parameter signature */ "glUniform1fv\0" "glUniform1fvARB\0" @@ -4364,7 +4456,7 @@ static const char Uniform1fvARB_names[] = #endif #if defined(need_GL_EXT_copy_texture) -static const char CopyTexSubImage1D_names[] = +static const char CopyTexSubImage1D_names[] = "iiiiii\0" /* Parameter signature */ "glCopyTexSubImage1D\0" "glCopyTexSubImage1DEXT\0" @@ -4372,7 +4464,7 @@ static const char CopyTexSubImage1D_names[] = #endif #if defined(need_GL_EXT_texture_object) -static const char BindTexture_names[] = +static const char BindTexture_names[] = "ii\0" /* Parameter signature */ "glBindTexture\0" "glBindTextureEXT\0" @@ -4380,14 +4472,14 @@ static const char BindTexture_names[] = #endif #if defined(need_GL_ATI_fragment_shader) -static const char BeginFragmentShaderATI_names[] = +static const char BeginFragmentShaderATI_names[] = "\0" /* Parameter signature */ "glBeginFragmentShaderATI\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord4fARB_names[] = +static const char MultiTexCoord4fARB_names[] = "iffff\0" /* Parameter signature */ "glMultiTexCoord4f\0" "glMultiTexCoord4fARB\0" @@ -4395,21 +4487,21 @@ static const char MultiTexCoord4fARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs3svNV_names[] = +static const char VertexAttribs3svNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs3svNV\0" ""; #endif #if defined(need_GL_SUN_triangle_list) -static const char ReplacementCodeuivSUN_names[] = +static const char ReplacementCodeuivSUN_names[] = "p\0" /* Parameter signature */ "glReplacementCodeuivSUN\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char EnableVertexAttribArrayARB_names[] = +static const char EnableVertexAttribArrayARB_names[] = "i\0" /* Parameter signature */ "glEnableVertexAttribArray\0" "glEnableVertexAttribArrayARB\0" @@ -4417,14 +4509,14 @@ static const char EnableVertexAttribArrayARB_names[] = #endif #if defined(need_GL_INTEL_parallel_arrays) -static const char NormalPointervINTEL_names[] = +static const char NormalPointervINTEL_names[] = "ip\0" /* Parameter signature */ "glNormalPointervINTEL\0" ""; #endif #if defined(need_GL_EXT_convolution) -static const char CopyConvolutionFilter2D_names[] = +static const char CopyConvolutionFilter2D_names[] = "iiiiii\0" /* Parameter signature */ "glCopyConvolutionFilter2D\0" "glCopyConvolutionFilter2DEXT\0" @@ -4432,7 +4524,7 @@ static const char CopyConvolutionFilter2D_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos3ivMESA_names[] = +static const char WindowPos3ivMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos3iv\0" "glWindowPos3ivARB\0" @@ -4441,14 +4533,14 @@ static const char WindowPos3ivMESA_names[] = #endif #if defined(need_GL_ARB_copy_buffer) -static const char CopyBufferSubData_names[] = +static const char CopyBufferSubData_names[] = "iiiii\0" /* Parameter signature */ "glCopyBufferSubData\0" ""; #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) -static const char IsBufferARB_names[] = +static const char IsBufferARB_names[] = "i\0" /* Parameter signature */ "glIsBuffer\0" "glIsBufferARB\0" @@ -4456,14 +4548,14 @@ static const char IsBufferARB_names[] = #endif #if defined(need_GL_MESA_window_pos) -static const char WindowPos4iMESA_names[] = +static const char WindowPos4iMESA_names[] = "iiii\0" /* Parameter signature */ "glWindowPos4iMESA\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4uivARB_names[] = +static const char VertexAttrib4uivARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4uiv\0" "glVertexAttrib4uivARB\0" @@ -4471,35 +4563,35 @@ static const char VertexAttrib4uivARB_names[] = #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Tangent3bvEXT_names[] = +static const char Tangent3bvEXT_names[] = "p\0" /* Parameter signature */ "glTangent3bvEXT\0" ""; #endif #if defined(need_GL_VERSION_2_1) -static const char UniformMatrix3x4fv_names[] = +static const char UniformMatrix3x4fv_names[] = "iiip\0" /* Parameter signature */ "glUniformMatrix3x4fv\0" ""; #endif -#if defined(need_GL_EXT_coordinate_frame) -static const char Binormal3fvEXT_names[] = - "p\0" /* Parameter signature */ - "glBinormal3fvEXT\0" +#if defined(need_GL_ARB_draw_elements_base_vertex) +static const char DrawRangeElementsBaseVertex_names[] = + "iiiiipi\0" /* Parameter signature */ + "glDrawRangeElementsBaseVertex\0" ""; #endif #if defined(need_GL_INTEL_parallel_arrays) -static const char TexCoordPointervINTEL_names[] = +static const char TexCoordPointervINTEL_names[] = "iip\0" /* Parameter signature */ "glTexCoordPointervINTEL\0" ""; #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) -static const char DeleteBuffersARB_names[] = +static const char DeleteBuffersARB_names[] = "ip\0" /* Parameter signature */ "glDeleteBuffers\0" "glDeleteBuffersARB\0" @@ -4507,21 +4599,21 @@ static const char DeleteBuffersARB_names[] = #endif #if defined(need_GL_MESA_window_pos) -static const char WindowPos4fvMESA_names[] = +static const char WindowPos4fvMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos4fvMESA\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib1sNV_names[] = +static const char VertexAttrib1sNV_names[] = "ii\0" /* Parameter signature */ "glVertexAttrib1sNV\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_secondary_color) -static const char SecondaryColor3svEXT_names[] = +static const char SecondaryColor3svEXT_names[] = "p\0" /* Parameter signature */ "glSecondaryColor3sv\0" "glSecondaryColor3svEXT\0" @@ -4529,7 +4621,7 @@ static const char SecondaryColor3svEXT_names[] = #endif #if defined(need_GL_VERSION_1_3) || defined(need_GL_ARB_transpose_matrix) -static const char LoadTransposeMatrixfARB_names[] = +static const char LoadTransposeMatrixfARB_names[] = "p\0" /* Parameter signature */ "glLoadTransposeMatrixf\0" "glLoadTransposeMatrixfARB\0" @@ -4537,7 +4629,7 @@ static const char LoadTransposeMatrixfARB_names[] = #endif #if defined(need_GL_EXT_vertex_array) -static const char GetPointerv_names[] = +static const char GetPointerv_names[] = "ip\0" /* Parameter signature */ "glGetPointerv\0" "glGetPointervEXT\0" @@ -4545,21 +4637,21 @@ static const char GetPointerv_names[] = #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Tangent3bEXT_names[] = +static const char Tangent3bEXT_names[] = "iii\0" /* Parameter signature */ "glTangent3bEXT\0" ""; #endif #if defined(need_GL_NV_register_combiners) -static const char CombinerParameterfNV_names[] = +static const char CombinerParameterfNV_names[] = "if\0" /* Parameter signature */ "glCombinerParameterfNV\0" ""; #endif #if defined(need_GL_ARB_vertex_program) || defined(need_GL_NV_vertex_program) -static const char BindProgramNV_names[] = +static const char BindProgramNV_names[] = "ii\0" /* Parameter signature */ "glBindProgramARB\0" "glBindProgramNV\0" @@ -4567,7 +4659,7 @@ static const char BindProgramNV_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4svARB_names[] = +static const char VertexAttrib4svARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4sv\0" "glVertexAttrib4svARB\0" @@ -4575,28 +4667,35 @@ static const char VertexAttrib4svARB_names[] = #endif #if defined(need_GL_MESA_shader_debug) -static const char CreateDebugObjectMESA_names[] = +static const char CreateDebugObjectMESA_names[] = "\0" /* Parameter signature */ "glCreateDebugObjectMESA\0" ""; #endif #if defined(need_GL_VERSION_2_0) -static const char GetShaderiv_names[] = +static const char GetShaderiv_names[] = "iip\0" /* Parameter signature */ "glGetShaderiv\0" ""; #endif +#if defined(need_GL_ARB_sync) +static const char ClientWaitSync_names[] = + "iii\0" /* Parameter signature */ + "glClientWaitSync\0" + ""; +#endif + #if defined(need_GL_ATI_fragment_shader) -static const char BindFragmentShaderATI_names[] = +static const char BindFragmentShaderATI_names[] = "i\0" /* Parameter signature */ "glBindFragmentShaderATI\0" ""; #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) -static const char UnmapBufferARB_names[] = +static const char UnmapBufferARB_names[] = "i\0" /* Parameter signature */ "glUnmapBuffer\0" "glUnmapBufferARB\0" @@ -4604,7 +4703,7 @@ static const char UnmapBufferARB_names[] = #endif #if defined(need_GL_EXT_histogram) -static const char Minmax_names[] = +static const char Minmax_names[] = "iii\0" /* Parameter signature */ "glMinmax\0" "glMinmaxEXT\0" @@ -4612,7 +4711,7 @@ static const char Minmax_names[] = #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_fog_coord) -static const char FogCoorddvEXT_names[] = +static const char FogCoorddvEXT_names[] = "p\0" /* Parameter signature */ "glFogCoorddv\0" "glFogCoorddvEXT\0" @@ -4620,35 +4719,35 @@ static const char FogCoorddvEXT_names[] = #endif #if defined(need_GL_SUNX_constant_data) -static const char FinishTextureSUNX_names[] = +static const char FinishTextureSUNX_names[] = "\0" /* Parameter signature */ "glFinishTextureSUNX\0" ""; #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char GetFragmentLightfvSGIX_names[] = +static const char GetFragmentLightfvSGIX_names[] = "iip\0" /* Parameter signature */ "glGetFragmentLightfvSGIX\0" ""; #endif -#if defined(need_GL_NV_register_combiners) -static const char GetFinalCombinerInputParameterfvNV_names[] = - "iip\0" /* Parameter signature */ - "glGetFinalCombinerInputParameterfvNV\0" +#if defined(need_GL_EXT_coordinate_frame) +static const char Binormal3fvEXT_names[] = + "p\0" /* Parameter signature */ + "glBinormal3fvEXT\0" ""; #endif #if defined(need_GL_ATI_fragment_shader) -static const char ColorFragmentOp3ATI_names[] = +static const char ColorFragmentOp3ATI_names[] = "iiiiiiiiiiiii\0" /* Parameter signature */ "glColorFragmentOp3ATI\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib2svARB_names[] = +static const char VertexAttrib2svARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib2sv\0" "glVertexAttrib2svARB\0" @@ -4656,14 +4755,14 @@ static const char VertexAttrib2svARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char AreProgramsResidentNV_names[] = +static const char AreProgramsResidentNV_names[] = "ipp\0" /* Parameter signature */ "glAreProgramsResidentNV\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos3svMESA_names[] = +static const char WindowPos3svMESA_names[] = "p\0" /* Parameter signature */ "glWindowPos3sv\0" "glWindowPos3svARB\0" @@ -4672,7 +4771,7 @@ static const char WindowPos3svMESA_names[] = #endif #if defined(need_GL_EXT_color_subtable) -static const char CopyColorSubTable_names[] = +static const char CopyColorSubTable_names[] = "iiiii\0" /* Parameter signature */ "glCopyColorSubTable\0" "glCopyColorSubTableEXT\0" @@ -4680,14 +4779,14 @@ static const char CopyColorSubTable_names[] = #endif #if defined(need_GL_ARB_vertex_blend) -static const char WeightdvARB_names[] = +static const char WeightdvARB_names[] = "ip\0" /* Parameter signature */ "glWeightdvARB\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char DeleteRenderbuffersEXT_names[] = +static const char DeleteRenderbuffersEXT_names[] = "ip\0" /* Parameter signature */ "glDeleteRenderbuffers\0" "glDeleteRenderbuffersEXT\0" @@ -4695,7 +4794,7 @@ static const char DeleteRenderbuffersEXT_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib4NubvARB_names[] = +static const char VertexAttrib4NubvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4Nubv\0" "glVertexAttrib4NubvARB\0" @@ -4703,63 +4802,63 @@ static const char VertexAttrib4NubvARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib3dvNV_names[] = +static const char VertexAttrib3dvNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib3dvNV\0" ""; #endif #if defined(need_GL_ARB_shader_objects) -static const char GetObjectParameterfvARB_names[] = +static const char GetObjectParameterfvARB_names[] = "iip\0" /* Parameter signature */ "glGetObjectParameterfvARB\0" ""; #endif #if defined(need_GL_ARB_vertex_program) -static const char GetProgramEnvParameterdvARB_names[] = +static const char GetProgramEnvParameterdvARB_names[] = "iip\0" /* Parameter signature */ "glGetProgramEnvParameterdvARB\0" ""; #endif #if defined(need_GL_EXT_compiled_vertex_array) -static const char LockArraysEXT_names[] = +static const char LockArraysEXT_names[] = "ii\0" /* Parameter signature */ "glLockArraysEXT\0" ""; #endif #if defined(need_GL_EXT_pixel_transform) -static const char PixelTransformParameterivEXT_names[] = +static const char PixelTransformParameterivEXT_names[] = "iip\0" /* Parameter signature */ "glPixelTransformParameterivEXT\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char BinormalPointerEXT_names[] = +static const char BinormalPointerEXT_names[] = "iip\0" /* Parameter signature */ "glBinormalPointerEXT\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib1dNV_names[] = +static const char VertexAttrib1dNV_names[] = "id\0" /* Parameter signature */ "glVertexAttrib1dNV\0" ""; #endif #if defined(need_GL_NV_register_combiners) -static const char GetCombinerInputParameterivNV_names[] = +static const char GetCombinerInputParameterivNV_names[] = "iiiip\0" /* Parameter signature */ "glGetCombinerInputParameterivNV\0" ""; #endif #if defined(need_GL_VERSION_1_3) -static const char MultiTexCoord2fvARB_names[] = +static const char MultiTexCoord2fvARB_names[] = "ip\0" /* Parameter signature */ "glMultiTexCoord2fv\0" "glMultiTexCoord2fvARB\0" @@ -4767,7 +4866,7 @@ static const char MultiTexCoord2fvARB_names[] = #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char GetRenderbufferParameterivEXT_names[] = +static const char GetRenderbufferParameterivEXT_names[] = "iip\0" /* Parameter signature */ "glGetRenderbufferParameteriv\0" "glGetRenderbufferParameterivEXT\0" @@ -4775,21 +4874,21 @@ static const char GetRenderbufferParameterivEXT_names[] = #endif #if defined(need_GL_NV_register_combiners) -static const char CombinerParameterivNV_names[] = +static const char CombinerParameterivNV_names[] = "ip\0" /* Parameter signature */ "glCombinerParameterivNV\0" ""; #endif #if defined(need_GL_ATI_fragment_shader) -static const char GenFragmentShadersATI_names[] = +static const char GenFragmentShadersATI_names[] = "i\0" /* Parameter signature */ "glGenFragmentShadersATI\0" ""; #endif #if defined(need_GL_EXT_vertex_array) -static const char DrawArrays_names[] = +static const char DrawArrays_names[] = "iii\0" /* Parameter signature */ "glDrawArrays\0" "glDrawArraysEXT\0" @@ -4797,14 +4896,14 @@ static const char DrawArrays_names[] = #endif #if defined(need_GL_ARB_vertex_blend) -static const char WeightuivARB_names[] = +static const char WeightuivARB_names[] = "ip\0" /* Parameter signature */ "glWeightuivARB\0" ""; #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib2sARB_names[] = +static const char VertexAttrib2sARB_names[] = "iii\0" /* Parameter signature */ "glVertexAttrib2s\0" "glVertexAttrib2sARB\0" @@ -4812,28 +4911,28 @@ static const char VertexAttrib2sARB_names[] = #endif #if defined(need_GL_SGIX_async) -static const char GenAsyncMarkersSGIX_names[] = +static const char GenAsyncMarkersSGIX_names[] = "i\0" /* Parameter signature */ "glGenAsyncMarkersSGIX\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Tangent3svEXT_names[] = +static const char Tangent3svEXT_names[] = "p\0" /* Parameter signature */ "glTangent3svEXT\0" ""; #endif #if defined(need_GL_SGIX_list_priority) -static const char GetListParameterivSGIX_names[] = +static const char GetListParameterivSGIX_names[] = "iip\0" /* Parameter signature */ "glGetListParameterivSGIX\0" ""; #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) -static const char BindBufferARB_names[] = +static const char BindBufferARB_names[] = "ii\0" /* Parameter signature */ "glBindBuffer\0" "glBindBufferARB\0" @@ -4841,35 +4940,35 @@ static const char BindBufferARB_names[] = #endif #if defined(need_GL_ARB_shader_objects) -static const char GetInfoLogARB_names[] = +static const char GetInfoLogARB_names[] = "iipp\0" /* Parameter signature */ "glGetInfoLogARB\0" ""; #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs4svNV_names[] = +static const char VertexAttribs4svNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs4svNV\0" ""; #endif #if defined(need_GL_IBM_vertex_array_lists) -static const char EdgeFlagPointerListIBM_names[] = +static const char EdgeFlagPointerListIBM_names[] = "ipi\0" /* Parameter signature */ "glEdgeFlagPointerListIBM\0" ""; #endif #if defined(need_GL_VERSION_2_1) -static const char UniformMatrix3x2fv_names[] = +static const char UniformMatrix3x2fv_names[] = "iiip\0" /* Parameter signature */ "glUniformMatrix3x2fv\0" ""; #endif #if defined(need_GL_EXT_histogram) -static const char GetMinmaxParameterfv_names[] = +static const char GetMinmaxParameterfv_names[] = "iip\0" /* Parameter signature */ "glGetMinmaxParameterfv\0" "glGetMinmaxParameterfvEXT\0" @@ -4877,7 +4976,7 @@ static const char GetMinmaxParameterfv_names[] = #endif #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) -static const char VertexAttrib1fvARB_names[] = +static const char VertexAttrib1fvARB_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib1fv\0" "glVertexAttrib1fvARB\0" @@ -4885,7 +4984,7 @@ static const char VertexAttrib1fvARB_names[] = #endif #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) -static const char GenBuffersARB_names[] = +static const char GenBuffersARB_names[] = "ip\0" /* Parameter signature */ "glGenBuffers\0" "glGenBuffersARB\0" @@ -4893,35 +4992,35 @@ static const char GenBuffersARB_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttribs1svNV_names[] = +static const char VertexAttribs1svNV_names[] = "iip\0" /* Parameter signature */ "glVertexAttribs1svNV\0" ""; #endif #if defined(need_GL_ATI_envmap_bumpmap) -static const char GetTexBumpParameterivATI_names[] = +static const char GetTexBumpParameterivATI_names[] = "ip\0" /* Parameter signature */ "glGetTexBumpParameterivATI\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Binormal3bEXT_names[] = +static const char Binormal3bEXT_names[] = "iii\0" /* Parameter signature */ "glBinormal3bEXT\0" ""; #endif #if defined(need_GL_SGIX_fragment_lighting) -static const char FragmentMaterialivSGIX_names[] = +static const char FragmentMaterialivSGIX_names[] = "iip\0" /* Parameter signature */ "glFragmentMaterialivSGIX\0" ""; #endif #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) -static const char IsRenderbufferEXT_names[] = +static const char IsRenderbufferEXT_names[] = "i\0" /* Parameter signature */ "glIsRenderbuffer\0" "glIsRenderbufferEXT\0" @@ -4929,7 +5028,7 @@ static const char IsRenderbufferEXT_names[] = #endif #if defined(need_GL_ARB_vertex_program) || defined(need_GL_NV_vertex_program) -static const char GenProgramsNV_names[] = +static const char GenProgramsNV_names[] = "ip\0" /* Parameter signature */ "glGenProgramsARB\0" "glGenProgramsNV\0" @@ -4937,28 +5036,28 @@ static const char GenProgramsNV_names[] = #endif #if defined(need_GL_NV_vertex_program) -static const char VertexAttrib4dvNV_names[] = +static const char VertexAttrib4dvNV_names[] = "ip\0" /* Parameter signature */ "glVertexAttrib4dvNV\0" ""; #endif #if defined(need_GL_ATI_fragment_shader) -static const char EndFragmentShaderATI_names[] = +static const char EndFragmentShaderATI_names[] = "\0" /* Parameter signature */ "glEndFragmentShaderATI\0" ""; #endif #if defined(need_GL_EXT_coordinate_frame) -static const char Binormal3iEXT_names[] = +static const char Binormal3iEXT_names[] = "iii\0" /* Parameter signature */ "glBinormal3iEXT\0" ""; #endif #if defined(need_GL_VERSION_1_4) || defined(need_GL_ARB_window_pos) || defined(need_GL_MESA_window_pos) -static const char WindowPos2fMESA_names[] = +static const char WindowPos2fMESA_names[] = "ff\0" /* Parameter signature */ "glWindowPos2f\0" "glWindowPos2fARB\0" @@ -4973,6 +5072,22 @@ static const struct dri_extension_function GL_3DFX_tbuffer_functions[] = { }; #endif +#if defined(need_GL_APPLE_flush_buffer_range) +static const struct dri_extension_function GL_APPLE_flush_buffer_range_functions[] = { + { BufferParameteriAPPLE_names, BufferParameteriAPPLE_remap_index, -1 }, + { FlushMappedBufferRangeAPPLE_names, FlushMappedBufferRangeAPPLE_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + +#if defined(need_GL_APPLE_texture_range) +static const struct dri_extension_function GL_APPLE_texture_range_functions[] = { + { TextureRangeAPPLE_names, TextureRangeAPPLE_remap_index, -1 }, + { GetTexParameterPointervAPPLE_names, GetTexParameterPointervAPPLE_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + #if defined(need_GL_APPLE_vertex_array_object) static const struct dri_extension_function GL_APPLE_vertex_array_object_functions[] = { { DeleteVertexArraysAPPLE_names, DeleteVertexArraysAPPLE_remap_index, -1 }, @@ -4997,6 +5112,15 @@ static const struct dri_extension_function GL_ARB_draw_buffers_functions[] = { }; #endif +#if defined(need_GL_ARB_draw_elements_base_vertex) +static const struct dri_extension_function GL_ARB_draw_elements_base_vertex_functions[] = { + { DrawElementsBaseVertex_names, DrawElementsBaseVertex_remap_index, -1 }, + { MultiDrawElementsBaseVertex_names, MultiDrawElementsBaseVertex_remap_index, -1 }, + { DrawRangeElementsBaseVertex_names, DrawRangeElementsBaseVertex_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + #if defined(need_GL_ARB_framebuffer_object) static const struct dri_extension_function GL_ARB_framebuffer_object_functions[] = { { BlitFramebufferEXT_names, BlitFramebufferEXT_remap_index, -1 }, @@ -5071,6 +5195,13 @@ static const struct dri_extension_function GL_ARB_point_parameters_functions[] = }; #endif +#if defined(need_GL_ARB_provoking_vertex) +static const struct dri_extension_function GL_ARB_provoking_vertex_functions[] = { + { ProvokingVertexEXT_names, ProvokingVertexEXT_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + #if defined(need_GL_ARB_shader_objects) static const struct dri_extension_function GL_ARB_shader_objects_functions[] = { { UniformMatrix3fvARB_names, UniformMatrix3fvARB_remap_index, -1 }, @@ -5116,6 +5247,19 @@ static const struct dri_extension_function GL_ARB_shader_objects_functions[] = { }; #endif +#if defined(need_GL_ARB_sync) +static const struct dri_extension_function GL_ARB_sync_functions[] = { + { DeleteSync_names, DeleteSync_remap_index, -1 }, + { FenceSync_names, FenceSync_remap_index, -1 }, + { WaitSync_names, WaitSync_remap_index, -1 }, + { GetInteger64v_names, GetInteger64v_remap_index, -1 }, + { GetSynciv_names, GetSynciv_remap_index, -1 }, + { IsSync_names, IsSync_remap_index, -1 }, + { ClientWaitSync_names, ClientWaitSync_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + #if defined(need_GL_ARB_texture_compression) static const struct dri_extension_function GL_ARB_texture_compression_functions[] = { { CompressedTexSubImage2DARB_names, CompressedTexSubImage2DARB_remap_index, -1 }, @@ -5414,8 +5558,8 @@ static const struct dri_extension_function GL_EXT_coordinate_frame_functions[] = { Binormal3dvEXT_names, Binormal3dvEXT_remap_index, -1 }, { Tangent3iEXT_names, Tangent3iEXT_remap_index, -1 }, { Tangent3bvEXT_names, Tangent3bvEXT_remap_index, -1 }, - { Binormal3fvEXT_names, Binormal3fvEXT_remap_index, -1 }, { Tangent3bEXT_names, Tangent3bEXT_remap_index, -1 }, + { Binormal3fvEXT_names, Binormal3fvEXT_remap_index, -1 }, { BinormalPointerEXT_names, BinormalPointerEXT_remap_index, -1 }, { Tangent3svEXT_names, Tangent3svEXT_remap_index, -1 }, { Binormal3bEXT_names, Binormal3bEXT_remap_index, -1 }, @@ -5857,6 +6001,7 @@ static const struct dri_extension_function GL_NV_point_sprite_functions[] = { static const struct dri_extension_function GL_NV_register_combiners_functions[] = { { CombinerOutputNV_names, CombinerOutputNV_remap_index, -1 }, { CombinerParameterfvNV_names, CombinerParameterfvNV_remap_index, -1 }, + { GetFinalCombinerInputParameterfvNV_names, GetFinalCombinerInputParameterfvNV_remap_index, -1 }, { GetCombinerOutputParameterfvNV_names, GetCombinerOutputParameterfvNV_remap_index, -1 }, { FinalCombinerInputNV_names, FinalCombinerInputNV_remap_index, -1 }, { GetCombinerInputParameterfvNV_names, GetCombinerInputParameterfvNV_remap_index, -1 }, @@ -5865,7 +6010,6 @@ static const struct dri_extension_function GL_NV_register_combiners_functions[] { GetFinalCombinerInputParameterivNV_names, GetFinalCombinerInputParameterivNV_remap_index, -1 }, { CombinerInputNV_names, CombinerInputNV_remap_index, -1 }, { CombinerParameterfNV_names, CombinerParameterfNV_remap_index, -1 }, - { GetFinalCombinerInputParameterfvNV_names, GetFinalCombinerInputParameterfvNV_remap_index, -1 }, { GetCombinerInputParameterivNV_names, GetCombinerInputParameterivNV_remap_index, -1 }, { CombinerParameterivNV_names, CombinerParameterivNV_remap_index, -1 }, { NULL, 0, 0 } @@ -5892,6 +6036,7 @@ static const struct dri_extension_function GL_NV_vertex_array_range_functions[] static const struct dri_extension_function GL_NV_vertex_program_functions[] = { { VertexAttrib4ubvNV_names, VertexAttrib4ubvNV_remap_index, -1 }, { VertexAttrib4svNV_names, VertexAttrib4svNV_remap_index, -1 }, + { VertexAttribs3fvNV_names, VertexAttribs3fvNV_remap_index, -1 }, { VertexAttribs1dvNV_names, VertexAttribs1dvNV_remap_index, -1 }, { VertexAttrib1fvNV_names, VertexAttrib1fvNV_remap_index, -1 }, { VertexAttrib4fNV_names, VertexAttrib4fNV_remap_index, -1 }, @@ -5900,7 +6045,6 @@ static const struct dri_extension_function GL_NV_vertex_program_functions[] = { { VertexAttribs3dvNV_names, VertexAttribs3dvNV_remap_index, -1 }, { VertexAttribs4fvNV_names, VertexAttribs4fvNV_remap_index, -1 }, { VertexAttrib2sNV_names, VertexAttrib2sNV_remap_index, -1 }, - { VertexAttribs3fvNV_names, VertexAttribs3fvNV_remap_index, -1 }, { ProgramEnvParameter4fvARB_names, ProgramEnvParameter4fvARB_remap_index, -1 }, { LoadProgramNV_names, LoadProgramNV_remap_index, -1 }, { VertexAttrib4fvNV_names, VertexAttrib4fvNV_remap_index, -1 }, diff --git a/src/mesa/drivers/dri/fb/fb_egl.c b/src/mesa/drivers/dri/fb/fb_egl.c index dee67feb5a..4e41860d8c 100644 --- a/src/mesa/drivers/dri/fb/fb_egl.c +++ b/src/mesa/drivers/dri/fb/fb_egl.c @@ -605,7 +605,7 @@ fbDestroySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface) { fbSurface *fs = Lookup_fbSurface(surface); _eglUnlinkSurface(&fs->Base); - if (!fs->Base.IsBound) + if (!_eglIsSurfaceBound(&fs->Base)) free(fs); return EGL_TRUE; } @@ -616,7 +616,7 @@ fbDestroyContext(_EGLDriver *drv, EGLDisplay dpy, EGLContext context) { fbContext *fc = Lookup_fbContext(context); _eglUnlinkContext(&fc->Base); - if (!fc->Base.IsBound) + if (!_eglIsContextBound(&fc->Base)) free(fc); return EGL_TRUE; } diff --git a/src/mesa/drivers/dri/ffb/ffb_tex.c b/src/mesa/drivers/dri/ffb/ffb_tex.c index 69d30aedba..95058e9069 100644 --- a/src/mesa/drivers/dri/ffb/ffb_tex.c +++ b/src/mesa/drivers/dri/ffb/ffb_tex.c @@ -30,24 +30,6 @@ #include "ffb_tex.h" /* No texture unit, all software. */ -/* XXX this function isn't needed since _mesa_init_driver_functions() - * will make all these assignments. - */ void ffbDDInitTexFuncs(GLcontext *ctx) { - /* - ctx->Driver.ChooseTextureFormat = _mesa_choose_tex_format; - ctx->Driver.TexImage1D = _mesa_store_teximage1d; - ctx->Driver.TexImage2D = _mesa_store_teximage2d; - ctx->Driver.TexImage3D = _mesa_store_teximage3d; - ctx->Driver.TexSubImage1D = _mesa_store_texsubimage1d; - ctx->Driver.TexSubImage2D = _mesa_store_texsubimage2d; - ctx->Driver.TexSubImage3D = _mesa_store_texsubimage3d; - ctx->Driver.CopyTexImage1D = _swrast_copy_teximage1d; - ctx->Driver.CopyTexImage2D = _swrast_copy_teximage2d; - ctx->Driver.CopyTexSubImage1D = _swrast_copy_texsubimage1d; - ctx->Driver.CopyTexSubImage2D = _swrast_copy_texsubimage2d; - ctx->Driver.CopyTexSubImage3D = _swrast_copy_texsubimage3d; - ctx->Driver.TestProxyTexImage = _mesa_test_proxy_teximage; - */ } diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index beaf9a4b12..9d049dea8f 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -51,6 +51,7 @@ DRIVER_SOURCES = \ intel_screen.c \ intel_span.c \ intel_state.c \ + intel_syncobj.c \ intel_tris.c \ intel_fbo.c diff --git a/src/mesa/drivers/dri/i915/i830_context.h b/src/mesa/drivers/dri/i915/i830_context.h index 1bdb32049d..f73cbbf88b 100644 --- a/src/mesa/drivers/dri/i915/i830_context.h +++ b/src/mesa/drivers/dri/i915/i830_context.h @@ -40,6 +40,7 @@ #define I830_UPLOAD_BUFFERS 0x2 #define I830_UPLOAD_STIPPLE 0x4 #define I830_UPLOAD_INVARIENT 0x8 +#define I830_UPLOAD_RASTER_RULES 0x10 #define I830_UPLOAD_TEX(i) (0x10<<(i)) #define I830_UPLOAD_TEXBLEND(i) (0x100<<(i)) #define I830_UPLOAD_TEX_ALL (0x0f0) @@ -99,6 +100,11 @@ #define I830_TEXBLEND_SIZE 12 /* (4 args + op) * 2 + COLOR_FACTOR */ +enum { + I830_RASTER_RULES, + I830_RASTER_RULES_SIZE +}; + struct i830_texture_object { struct intel_texture_object intel; @@ -112,6 +118,7 @@ struct i830_hw_state GLuint Ctx[I830_CTX_SETUP_SIZE]; GLuint Buffer[I830_DEST_SETUP_SIZE]; GLuint Stipple[I830_STP_SETUP_SIZE]; + GLuint RasterRules[I830_RASTER_RULES_SIZE]; GLuint Tex[I830_TEX_UNITS][I830_TEX_SETUP_SIZE]; GLuint TexBlend[I830_TEX_UNITS][I830_TEXBLEND_SIZE]; GLuint TexBlendWordsUsed[I830_TEX_UNITS]; @@ -197,6 +204,7 @@ extern void i830InitStateFuncs(struct dd_function_table *functions); extern void i830EmitState(struct i830_context *i830); extern void i830InitState(struct i830_context *i830); +extern void i830_update_provoking_vertex(GLcontext *ctx); /* i830_metaops.c */ diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h index db16871001..ae1317029a 100644 --- a/src/mesa/drivers/dri/i915/i830_reg.h +++ b/src/mesa/drivers/dri/i915/i830_reg.h @@ -420,8 +420,11 @@ #define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) #define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) #define ENABLE_TRI_STRIP_PROVOKE_VRTX (1<<2) +#define LINE_STRIP_PROVOKE_VRTX_MASK (3<<6) #define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX_MASK (3<<3) #define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) +#define TRI_STRIP_PROVOKE_VRTX_MASK (3<<0) #define TRI_STRIP_PROVOKE_VRTX(x) (x) /* _3DSTATE_SCISSOR_ENABLE, p200 */ diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c index 8ef6c9144f..645ebe3057 100644 --- a/src/mesa/drivers/dri/i915/i830_state.c +++ b/src/mesa/drivers/dri/i915/i830_state.c @@ -1047,6 +1047,16 @@ i830_init_packets(struct i830_context *i830) TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); + i830->state.RasterRules[I830_RASTER_RULES] = (_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + ENABLE_TRI_STRIP_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + TRI_STRIP_PROVOKE_VRTX(2)); + i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE; @@ -1058,6 +1068,27 @@ i830_init_packets(struct i830_context *i830) i830->state.Buffer[I830_DESTREG_SR2] = 0; } +void +i830_update_provoking_vertex(GLcontext * ctx) +{ + struct i830_context *i830 = i830_context(ctx); + + I830_STATECHANGE(i830, I830_UPLOAD_RASTER_RULES); + i830->state.RasterRules[I830_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK | + TRI_FAN_PROVOKE_VRTX_MASK | + TRI_STRIP_PROVOKE_VRTX_MASK); + + /* _NEW_LIGHT */ + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) { + i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + TRI_STRIP_PROVOKE_VRTX(2)); + } else { + i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) | + TRI_FAN_PROVOKE_VRTX(1) | + TRI_STRIP_PROVOKE_VRTX(0)); + } +} void i830InitStateFuncs(struct dd_function_table *functions) @@ -1101,6 +1132,7 @@ i830InitState(struct i830_context *i830) i830->current = &i830->state; i830->state.emitted = 0; i830->state.active = (I830_UPLOAD_INVARIENT | + I830_UPLOAD_RASTER_RULES | I830_UPLOAD_TEXBLEND(0) | I830_UPLOAD_STIPPLE | I830_UPLOAD_CTX | I830_UPLOAD_BUFFERS); diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 9c6f891dd3..983f6724c9 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -299,7 +299,7 @@ i830_emit_invarient_state(struct intel_context *intel) { BATCH_LOCALS; - BEGIN_BATCH(30, IGNORE_CLIPRECTS); + BEGIN_BATCH(29, IGNORE_CLIPRECTS); OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); OUT_BATCH(0); @@ -351,15 +351,6 @@ i830_emit_invarient_state(struct intel_context *intel) OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3)); - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - ENABLE_TRI_STRIP_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2)); - OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM); OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); @@ -394,6 +385,9 @@ get_state_size(struct i830_hw_state *state) if (dirty & I830_UPLOAD_INVARIENT) sz += 40 * sizeof(int); + if (dirty & I830_UPLOAD_RASTER_RULES) + sz += sizeof(state->RasterRules); + if (dirty & I830_UPLOAD_CTX) sz += sizeof(state->Ctx); @@ -486,6 +480,11 @@ i830_emit_state(struct intel_context *intel) i830_emit_invarient_state(intel); } + if (dirty & I830_UPLOAD_RASTER_RULES) { + DBG("I830_UPLOAD_RASTER_RULES:\n"); + emit(intel, state->RasterRules, sizeof(state->RasterRules)); + } + if (dirty & I830_UPLOAD_CTX) { DBG("I830_UPLOAD_CTX:\n"); emit(intel, state->Ctx, sizeof(state->Ctx)); @@ -737,6 +736,13 @@ i830_assert_not_dirty( struct intel_context *intel ) assert(!get_dirty(state)); } +static void +i830_invalidate_state(struct intel_context *intel, GLuint new_state) +{ + if (new_state & _NEW_LIGHT) + i830_update_provoking_vertex(&intel->ctx); +} + void i830InitVtbl(struct i830_context *i830) { @@ -752,4 +758,5 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.render_prevalidate = i830_render_prevalidate; i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty; i830->intel.vtbl.finish_batch = intel_finish_vb; + i830->intel.vtbl.invalidate_state = i830_invalidate_state; } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 367d2a3b64..3ab7d682ee 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -27,6 +27,7 @@ #include "i915_context.h" #include "main/imports.h" +#include "main/macros.h" #include "intel_tex.h" #include "intel_tris.h" #include "tnl/t_context.h" @@ -75,6 +76,10 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state) if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) i915_update_fog(ctx); + if (new_state & (_NEW_STENCIL | _NEW_BUFFERS | _NEW_POLYGON)) + i915_update_stencil(ctx); + if (new_state & (_NEW_LIGHT)) + i915_update_provoking_vertex(ctx); } @@ -162,6 +167,9 @@ i915CreateContext(const __GLcontextModes * mesaVis, ctx->Const.FragmentProgram.MaxNativeTexIndirections = I915_MAX_TEX_INDIRECT; ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* I don't think we have one */ + ctx->Const.FragmentProgram.MaxEnvParams = + MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, + ctx->Const.FragmentProgram.MaxEnvParams); ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index 87bbf5f927..8de4a9d0d3 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -48,6 +48,7 @@ #define I915_UPLOAD_FOG 0x20 #define I915_UPLOAD_INVARIENT 0x40 #define I915_UPLOAD_DEFAULTS 0x80 +#define I915_UPLOAD_RASTER_RULES 0x100 #define I915_UPLOAD_TEX(i) (0x00010000<<(i)) #define I915_UPLOAD_TEX_ALL (0x00ff0000) #define I915_UPLOAD_TEX_0_SHIFT 16 @@ -82,7 +83,9 @@ #define I915_CTXREG_IAB 6 #define I915_CTXREG_BLENDCOLOR0 7 #define I915_CTXREG_BLENDCOLOR1 8 -#define I915_CTX_SETUP_SIZE 9 +#define I915_CTXREG_BF_STENCIL_OPS 9 +#define I915_CTXREG_BF_STENCIL_MASKS 10 +#define I915_CTX_SETUP_SIZE 11 #define I915_FOGREG_COLOR 0 #define I915_FOGREG_MODE0 1 @@ -110,6 +113,10 @@ #define I915_DEFREG_Z1 5 #define I915_DEF_SETUP_SIZE 6 +enum { + I915_RASTER_RULES, + I915_RASTER_RULES_SETUP_SIZE, +}; #define I915_MAX_CONSTANT 32 #define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT)) @@ -206,6 +213,7 @@ struct i915_hw_state GLuint Stipple[I915_STP_SETUP_SIZE]; GLuint Fog[I915_FOG_SETUP_SIZE]; GLuint Defaults[I915_DEF_SETUP_SIZE]; + GLuint RasterRules[I915_RASTER_RULES_SETUP_SIZE]; GLuint Tex[I915_TEX_UNITS][I915_TEX_SETUP_SIZE]; GLuint Constant[I915_CONSTANT_SIZE]; GLuint ConstantSize; @@ -321,6 +329,8 @@ extern void i915_print_ureg(const char *msg, GLuint ureg); extern void i915InitStateFunctions(struct dd_function_table *functions); extern void i915InitState(struct i915_context *i915); extern void i915_update_fog(GLcontext * ctx); +extern void i915_update_stencil(GLcontext * ctx); +extern void i915_update_provoking_vertex(GLcontext *ctx); /*====================================================================== diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h index 84db58ea95..b5fa7fddb9 100644 --- a/src/mesa/drivers/dri/i915/i915_reg.h +++ b/src/mesa/drivers/dri/i915/i915_reg.h @@ -86,8 +86,10 @@ #define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) #define BFM_STENCIL_TEST_MASK_SHIFT 8 #define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_TEST_MASK(x) (((x)&0xff) << 8) #define BFM_STENCIL_WRITE_MASK_SHIFT 0 #define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) +#define BFM_STENCIL_WRITE_MASK(x) ((x)&0xff) @@ -295,7 +297,9 @@ #define TEXKILL_4D (1<<9) #define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) #define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX_MASK (3 << 6) #define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX_MASK (3 << 3) #define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) /* _3DSTATE_SCISSOR_ENABLE, p256 */ diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 814fb59fd3..b60efea75b 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -48,73 +48,119 @@ #define FILE_DEBUG_FLAG DEBUG_STATE -static void -i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref, - GLuint mask) +void +i915_update_stencil(GLcontext * ctx) { struct i915_context *i915 = I915_CONTEXT(ctx); - int test = intel_translate_compare_func(func); + GLuint front_ref, front_writemask, front_mask; + GLenum front_func, front_fail, front_pass_z_fail, front_pass_z_pass; + GLuint back_ref, back_writemask, back_mask; + GLenum back_func, back_fail, back_pass_z_fail, back_pass_z_pass; - mask = mask & 0xff; - - DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(func), ref, mask); + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + /* The 915 considers CW to be "front" for two-sided stencil, so choose + * appropriately. + */ + /* _NEW_POLYGON | _NEW_STENCIL */ + if (ctx->Polygon.FrontFace == GL_CW) { + front_ref = ctx->Stencil.Ref[0]; + front_mask = ctx->Stencil.ValueMask[0]; + front_writemask = ctx->Stencil.WriteMask[0]; + front_func = ctx->Stencil.Function[0]; + front_fail = ctx->Stencil.FailFunc[0]; + front_pass_z_fail = ctx->Stencil.ZFailFunc[0]; + front_pass_z_pass = ctx->Stencil.ZPassFunc[0]; + back_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace]; + back_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace]; + back_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace]; + back_func = ctx->Stencil.Function[ctx->Stencil._BackFace]; + back_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace]; + back_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace]; + back_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace]; + } else { + front_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace]; + front_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace]; + front_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace]; + front_func = ctx->Stencil.Function[ctx->Stencil._BackFace]; + front_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace]; + front_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace]; + front_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace]; + back_ref = ctx->Stencil.Ref[0]; + back_mask = ctx->Stencil.ValueMask[0]; + back_writemask = ctx->Stencil.WriteMask[0]; + back_func = ctx->Stencil.Function[0]; + back_fail = ctx->Stencil.FailFunc[0]; + back_pass_z_fail = ctx->Stencil.ZFailFunc[0]; + back_pass_z_pass = ctx->Stencil.ZPassFunc[0]; + } - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK; + /* Set front state. */ + i915->state.Ctx[I915_CTXREG_STATE4] &= ~(MODE4_ENABLE_STENCIL_TEST_MASK | + MODE4_ENABLE_STENCIL_WRITE_MASK); i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK | - STENCIL_TEST_MASK(mask)); + ENABLE_STENCIL_WRITE_MASK | + STENCIL_TEST_MASK(front_mask) | + STENCIL_WRITE_MASK(front_writemask)); i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK | - S5_STENCIL_TEST_FUNC_MASK); + S5_STENCIL_TEST_FUNC_MASK | + S5_STENCIL_FAIL_MASK | + S5_STENCIL_PASS_Z_FAIL_MASK | + S5_STENCIL_PASS_Z_PASS_MASK); + + i915->state.Ctx[I915_CTXREG_LIS5] |= + (front_ref << S5_STENCIL_REF_SHIFT) | + (intel_translate_compare_func(front_func) << S5_STENCIL_TEST_FUNC_SHIFT) | + (intel_translate_stencil_op(front_fail) << S5_STENCIL_FAIL_SHIFT) | + (intel_translate_stencil_op(front_pass_z_fail) << + S5_STENCIL_PASS_Z_FAIL_SHIFT) | + (intel_translate_stencil_op(front_pass_z_pass) << + S5_STENCIL_PASS_Z_PASS_SHIFT); + + /* Set back state if different from front. */ + if (ctx->Stencil._TestTwoSide) { + i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &= + ~(BFO_STENCIL_REF_MASK | + BFO_STENCIL_TEST_MASK | + BFO_STENCIL_FAIL_MASK | + BFO_STENCIL_PASS_Z_FAIL_MASK | + BFO_STENCIL_PASS_Z_PASS_MASK); + i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] |= BFO_STENCIL_TWO_SIDE | + (back_ref << BFO_STENCIL_REF_SHIFT) | + (intel_translate_compare_func(back_func) << BFO_STENCIL_TEST_SHIFT) | + (intel_translate_stencil_op(back_fail) << BFO_STENCIL_FAIL_SHIFT) | + (intel_translate_stencil_op(back_pass_z_fail) << + BFO_STENCIL_PASS_Z_FAIL_SHIFT) | + (intel_translate_stencil_op(back_pass_z_pass) << + BFO_STENCIL_PASS_Z_PASS_SHIFT); + + i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] &= + ~(BFM_STENCIL_TEST_MASK_MASK | + BFM_STENCIL_WRITE_MASK_MASK); + i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] |= + BFM_STENCIL_TEST_MASK(back_mask) | + BFM_STENCIL_WRITE_MASK(back_writemask); + } else { + i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &= ~BFO_STENCIL_TWO_SIDE; + } +} - i915->state.Ctx[I915_CTXREG_LIS5] |= ((ref << S5_STENCIL_REF_SHIFT) | - (test << - S5_STENCIL_TEST_FUNC_SHIFT)); +static void +i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref, + GLuint mask) +{ } static void i915StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) { - struct i915_context *i915 = I915_CONTEXT(ctx); - - DBG("%s : mask 0x%x\n", __FUNCTION__, mask); - - mask = mask & 0xff; - - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK; - i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK | - STENCIL_WRITE_MASK(mask)); } - static void i915StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail, GLenum zfail, GLenum zpass) { - struct i915_context *i915 = I915_CONTEXT(ctx); - int fop = intel_translate_stencil_op(fail); - int dfop = intel_translate_stencil_op(zfail); - int dpop = intel_translate_stencil_op(zpass); - - - DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(fail), - _mesa_lookup_enum_by_nr(zfail), _mesa_lookup_enum_by_nr(zpass)); - - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - - i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK | - S5_STENCIL_PASS_Z_FAIL_MASK | - S5_STENCIL_PASS_Z_PASS_MASK); - - i915->state.Ctx[I915_CTXREG_LIS5] |= ((fop << S5_STENCIL_FAIL_SHIFT) | - (dfop << - S5_STENCIL_PASS_Z_FAIL_SHIFT) | - (dpop << - S5_STENCIL_PASS_Z_PASS_SHIFT)); } static void @@ -945,6 +991,17 @@ i915_init_packets(struct i915_context *i915) _3DSTATE_CONST_BLEND_COLOR_CMD; i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = 0; + i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] = + _3DSTATE_BACKFACE_STENCIL_MASKS | + BFM_ENABLE_STENCIL_TEST_MASK | + BFM_ENABLE_STENCIL_WRITE_MASK | + (0xff << BFM_STENCIL_WRITE_MASK_SHIFT) | + (0xff << BFM_STENCIL_TEST_MASK_SHIFT); + i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] = + _3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_REF | + BFO_ENABLE_STENCIL_FUNCS | + BFO_ENABLE_STENCIL_TWO_SIDE; } { @@ -976,6 +1033,13 @@ i915_init_packets(struct i915_context *i915) i915->state.Buffer[I915_DESTREG_SR2] = 0; } + i915->state.RasterRules[I915_RASTER_RULES] = _3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D; #if 0 { @@ -996,7 +1060,33 @@ i915_init_packets(struct i915_context *i915) i915->state.active = (I915_UPLOAD_PROGRAM | I915_UPLOAD_STIPPLE | I915_UPLOAD_CTX | - I915_UPLOAD_BUFFERS | I915_UPLOAD_INVARIENT); + I915_UPLOAD_BUFFERS | + I915_UPLOAD_INVARIENT | + I915_UPLOAD_RASTER_RULES); +} + +void +i915_update_provoking_vertex(GLcontext * ctx) +{ + struct i915_context *i915 = I915_CONTEXT(ctx); + + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_TRISTRIP_PV_MASK); + + I915_STATECHANGE(i915, I915_UPLOAD_RASTER_RULES); + i915->state.RasterRules[I915_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK | + TRI_FAN_PROVOKE_VRTX_MASK); + + /* _NEW_LIGHT */ + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) { + i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2)); + i915->state.Ctx[I915_CTXREG_LIS6] |= (2 << S6_TRISTRIP_PV_SHIFT); + } else { + i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) | + TRI_FAN_PROVOKE_VRTX(1)); + i915->state.Ctx[I915_CTXREG_LIS6] |= (0 << S6_TRISTRIP_PV_SHIFT); + } } void diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index fe1be93a6d..9a723d3cd7 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -176,7 +176,7 @@ i915_emit_invarient_state(struct intel_context *intel) { BATCH_LOCALS; - BEGIN_BATCH(20, IGNORE_CLIPRECTS); + BEGIN_BATCH(17, IGNORE_CLIPRECTS); OUT_BATCH(_3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | @@ -200,14 +200,6 @@ i915_emit_invarient_state(struct intel_context *intel) CSB_TCB(3, 3) | CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7)); - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D); - /* Need to initialize this to zero. */ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); @@ -225,11 +217,6 @@ i915_emit_invarient_state(struct intel_context *intel) OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */ OUT_BATCH(0); - - /* Don't support twosided stencil yet */ - OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0); - OUT_BATCH(0); - ADVANCE_BATCH(); } @@ -263,6 +250,9 @@ get_state_size(struct i915_hw_state *state) if (dirty & I915_UPLOAD_INVARIENT) sz += 30 * 4; + if (dirty & I915_UPLOAD_RASTER_RULES) + sz += sizeof(state->RasterRules); + if (dirty & I915_UPLOAD_CTX) sz += sizeof(state->Ctx); @@ -371,6 +361,12 @@ i915_emit_state(struct intel_context *intel) i915_emit_invarient_state(intel); } + if (dirty & I915_UPLOAD_RASTER_RULES) { + if (INTEL_DEBUG & DEBUG_STATE) + fprintf(stderr, "I915_UPLOAD_RASTER_RULES:\n"); + emit(intel, state->RasterRules, sizeof(state->RasterRules)); + } + if (dirty & I915_UPLOAD_CTX) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_CTX:\n"); diff --git a/src/mesa/drivers/dri/i915/intel_syncobj.c b/src/mesa/drivers/dri/i915/intel_syncobj.c new file mode 120000 index 0000000000..0b2e56ab24 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_syncobj.c @@ -0,0 +1 @@ +../intel/intel_syncobj.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 00a42111da..6e9a9a29a3 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -26,6 +26,7 @@ DRIVER_SOURCES = \ intel_pixel_read.c \ intel_state.c \ intel_swapbuffers.c \ + intel_syncobj.c \ intel_tex.c \ intel_tex_copy.c \ intel_tex_format.c \ @@ -43,6 +44,7 @@ DRIVER_SOURCES = \ brw_clip_util.c \ brw_context.c \ brw_curbe.c \ + brw_disasm.c \ brw_draw.c \ brw_draw_upload.c \ brw_eu.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index c724218cf5..1088a7a607 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -39,12 +39,14 @@ static void prepare_cc_vp( struct brw_context *brw ) { + GLcontext *ctx = &brw->intel.ctx; struct brw_cc_viewport ccv; memset(&ccv, 0, sizeof(ccv)); - ccv.min_depth = 0.0; - ccv.max_depth = 1.0; + /* _NEW_VIEWPORT */ + ccv.min_depth = ctx->Viewport.Near; + ccv.max_depth = ctx->Viewport.Far; dri_bo_unreference(brw->cc.vp_bo); brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); @@ -52,7 +54,7 @@ static void prepare_cc_vp( struct brw_context *brw ) const struct brw_tracked_state brw_cc_vp = { .dirty = { - .mesa = 0, + .mesa = _NEW_VIEWPORT, .brw = BRW_NEW_CONTEXT, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h index 12e8548df1..957df441ab 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.h +++ b/src/mesa/drivers/dri/i965/brw_clip.h @@ -100,6 +100,8 @@ struct brw_clip_compile { struct brw_reg fixed_planes; struct brw_reg plane_equation; + + struct brw_reg ff_sync; } reg; /* 3 different ways of expressing vertex size: @@ -173,4 +175,5 @@ struct brw_reg get_tmp( struct brw_clip_compile *c ); void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ); void brw_clip_ff_sync(struct brw_clip_compile *c); +void brw_clip_init_ff_sync(struct brw_clip_compile *c); #endif diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index 9abd0642aa..048ca620fa 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -85,6 +85,10 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) i++; } + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } c->first_tmp = i; c->last_tmp = i; @@ -246,8 +250,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); - if (c->need_ff_sync) - brw_clip_ff_sync(c); not_culled = brw_IF(p, BRW_EXECUTE_1); { brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE); @@ -265,6 +267,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) void brw_emit_line_clip( struct brw_clip_compile *c ) { brw_clip_line_alloc_regs(c); + brw_clip_init_ff_sync(c); if (c->key.do_flat_shading) brw_clip_copy_colors(c, 0, 1); diff --git a/src/mesa/drivers/dri/i965/brw_clip_point.c b/src/mesa/drivers/dri/i965/brw_clip_point.c index 9738299168..8458f61c5a 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_point.c +++ b/src/mesa/drivers/dri/i965/brw_clip_point.c @@ -50,7 +50,7 @@ void brw_emit_point_clip( struct brw_clip_compile *c ) /* Send an empty message to kill the thread: */ brw_clip_tri_alloc_regs(c, 0); - if (c->need_ff_sync) - brw_clip_ff_sync(c); + brw_clip_init_ff_sync(c); + brw_clip_kill_thread(c); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 5762c9577c..234b3744bf 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -43,11 +43,14 @@ struct brw_clip_unit_key { unsigned int curbe_offset; unsigned int nr_urb_entries, urb_size; + + GLboolean depth_clamp; }; static void clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) { + GLcontext *ctx = &brw->intel.ctx; memset(key, 0, sizeof(*key)); /* CACHE_NEW_CLIP_PROG */ @@ -62,6 +65,9 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) /* BRW_NEW_URB_FENCE */ key->nr_urb_entries = brw->urb.nr_clip_entries; key->urb_size = brw->urb.vsize; + + /* _NEW_TRANSOFORM */ + key->depth_clamp = ctx->Transform.DepthClamp; } static dri_bo * @@ -117,7 +123,8 @@ clip_unit_create_from_key(struct brw_context *brw, clip.clip5.userclip_enable_flags = 0x7f; clip.clip5.userclip_must_clip = 1; clip.clip5.guard_band_enable = 0; - clip.clip5.viewport_z_clip_enable = 1; + if (!key->depth_clamp) + clip.clip5.viewport_z_clip_enable = 1; clip.clip5.viewport_xy_clip_enable = 1; clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; clip.clip5.api_mode = BRW_CLIP_API_OGL; @@ -168,7 +175,7 @@ static void upload_clip_unit( struct brw_context *brw ) const struct brw_tracked_state brw_clip_unit = { .dirty = { - .mesa = 0, + .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index 4c2d655fb1..0efd77225e 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -119,6 +119,11 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, i++; } + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } + c->first_tmp = i; c->last_tmp = i; @@ -563,6 +568,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); brw_clip_init_clipmask(c); + brw_clip_init_ff_sync(c); /* if -ve rhw workaround bit is set, do cliptest */ @@ -589,8 +595,6 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) else maybe_do_clip_tri(c); - if (c->need_ff_sync) - brw_clip_ff_sync(c); brw_clip_tri_emit_polygon(c); /* Send an empty message to kill the thread: diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c index 26950383c1..ad1bfa435f 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -453,6 +453,7 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c ) brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); + brw_clip_init_ff_sync(c); assert(c->offset[VERT_RESULT_EDGE]); @@ -496,8 +497,6 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c ) } brw_ENDIF(p, do_clip); - if (c->need_ff_sync) - brw_clip_ff_sync(c); emit_unfilled_primitives(c); brw_clip_kill_thread(c); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index e09efc07ed..5a73abdfee 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -213,6 +213,8 @@ void brw_clip_emit_vue(struct brw_clip_compile *c, struct brw_compile *p = &c->func; GLuint start = c->last_mrf; + brw_clip_ff_sync(c); + assert(!(allocate && eot)); /* Cycle through mrf regs - probably futile as we have to wait for @@ -263,6 +265,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c) { struct brw_compile *p = &c->func; + brw_clip_ff_sync(c); /* Send an empty message to kill the thread and release any * allocated urb entry: */ @@ -356,17 +359,38 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) void brw_clip_ff_sync(struct brw_clip_compile *c) { + if (c->need_ff_sync) { + struct brw_compile *p = &c->func; + struct brw_instruction *need_ff_sync; + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); + need_ff_sync = brw_IF(p, BRW_EXECUTE_1); + { + brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); + } + brw_ENDIF(p, need_ff_sync); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } +} + +void brw_clip_init_ff_sync(struct brw_clip_compile *c) +{ + if (c->need_ff_sync) { struct brw_compile *p = &c->func; - brw_ff_sync(p, - c->reg.R0, - 0, - c->reg.R0, - 1, - 1, /* used */ - 1, /* msg length */ - 1, /* response length */ - 0, /* eot */ - 1, /* write compelete */ - 0, /* urb offset */ - BRW_URB_SWIZZLE_NONE); + + brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); + } } diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 4dbe551d83..3c5b848319 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -126,7 +126,32 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /* We want the GLSL compiler to emit code that uses condition codes */ ctx->Shader.EmitCondCodes = GL_TRUE; -/* ctx->Const.MaxNativeVertexProgramTemps = 32; */ + ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); + ctx->Const.VertexProgram.MaxAluInstructions = 0; + ctx->Const.VertexProgram.MaxTexInstructions = 0; + ctx->Const.VertexProgram.MaxTexIndirections = 0; + ctx->Const.VertexProgram.MaxNativeAluInstructions = 0; + ctx->Const.VertexProgram.MaxNativeTexInstructions = 0; + ctx->Const.VertexProgram.MaxNativeTexIndirections = 0; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; + ctx->Const.VertexProgram.MaxNativeTemps = 256; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + ctx->Const.VertexProgram.MaxNativeParameters = 1024; + ctx->Const.VertexProgram.MaxEnvParams = + MIN2(ctx->Const.VertexProgram.MaxNativeParameters, + ctx->Const.VertexProgram.MaxEnvParams); + + ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeAttribs = 12; + ctx->Const.FragmentProgram.MaxNativeTemps = 256; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; + ctx->Const.FragmentProgram.MaxNativeParameters = 1024; + ctx->Const.FragmentProgram.MaxEnvParams = + MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, + ctx->Const.FragmentProgram.MaxEnvParams); brw_init_state( brw ); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 57ddf75413..a5209ac41b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -143,6 +143,7 @@ struct brw_context; #define BRW_NEW_DEPTH_BUFFER 0x20000 #define BRW_NEW_NR_WM_SURFACES 0x40000 #define BRW_NEW_NR_VS_SURFACES 0x80000 +#define BRW_NEW_INDEX_BUFFER 0x100000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -173,6 +174,9 @@ struct brw_fragment_program { dri_bo *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; + + /** for debugging, which texture units are referenced */ + GLbitfield tex_units_used; }; @@ -386,6 +390,8 @@ struct brw_cached_batch_item { struct brw_vertex_element { const struct gl_client_array *glarray; + /** The corresponding Mesa vertex attribute */ + gl_vert_attrib attrib; /** Size of a complete element */ GLuint element_size; /** Number of uploaded elements for this input. */ @@ -438,9 +444,13 @@ struct brw_query_object { unsigned int count; }; + +/** + * brw_context is derived from intel_context. + */ struct brw_context { - struct intel_context intel; + struct intel_context intel; /**< base class, must be first field */ GLuint primitive; GLboolean emit_state_always; @@ -475,6 +485,9 @@ struct brw_context struct { struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; + struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; + GLuint nr_enabled; + #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) @@ -498,8 +511,15 @@ struct brw_context */ const struct _mesa_index_buffer *ib; + /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */ dri_bo *bo; unsigned int offset; + unsigned int size; + /* Offset to index buffer index to use in CMD_3D_PRIM so that we can + * avoid re-uploading the IB packet over and over if we're actually + * referencing the same index buffer. + */ + unsigned int start_vertex_offset; } ib; /* Active vertex program: @@ -706,6 +726,8 @@ void brw_upload_urb_fence(struct brw_context *brw); */ void brw_upload_cs_urb_state(struct brw_context *brw); +/* brw_disasm.c */ +int brw_disasm (FILE *file, struct brw_instruction *inst); /*====================================================================== * Inline conversion functions. These are better-typed than the diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index a1a6c53d0e..0b0e6931a0 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -248,6 +248,9 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); /* XXX just use a memcpy here */ diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index d166250b4f..78d457ad2b 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -471,8 +471,9 @@ #define BRW_CONDITIONAL_GE 4 #define BRW_CONDITIONAL_L 5 #define BRW_CONDITIONAL_LE 6 -#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_R 7 #define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 #define BRW_DEBUG_NONE 0 #define BRW_DEBUG_BREAKPOINT 1 @@ -512,6 +513,7 @@ #define BRW_OPCODE_RSL 11 #define BRW_OPCODE_ASR 12 #define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_CMPN 17 #define BRW_OPCODE_JMPI 32 #define BRW_OPCODE_IF 34 #define BRW_OPCODE_IFF 35 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c new file mode 100644 index 0000000000..9fef230507 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -0,0 +1,903 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <stdarg.h> + +#include "main/mtypes.h" + +#include "brw_context.h" +#include "brw_defines.h" + +struct { + char *name; + int nsrc; + int ndst; +} opcode[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +char *conditional_modifier[16] = { + [BRW_CONDITIONAL_NONE] = "", + [BRW_CONDITIONAL_Z] = ".e", + [BRW_CONDITIONAL_NZ] = ".ne", + [BRW_CONDITIONAL_G] = ".g", + [BRW_CONDITIONAL_GE] = ".ge", + [BRW_CONDITIONAL_L] = ".l", + [BRW_CONDITIONAL_LE] = ".le", + [BRW_CONDITIONAL_R] = ".r", + [BRW_CONDITIONAL_O] = ".o", + [BRW_CONDITIONAL_U] = ".u", +}; + +char *negate[2] = { + [0] = "", + [1] = "-", +}; + +char *_abs[2] = { + [0] = "", + [1] = "(abs)", +}; + +char *vert_stride[16] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4", + [4] = "8", + [5] = "16", + [6] = "32", + [15] = "VxH", +}; + +char *width[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", +}; + +char *horiz_stride[4] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4" +}; + +char *chan_sel[4] = { + [0] = "x", + [1] = "y", + [2] = "z", + [3] = "w", +}; + +char *dest_condmod[16] = { +}; + +char *debug_ctrl[2] = { + [0] = "", + [1] = ".breakpoint" +}; + +char *saturate[2] = { + [0] = "", + [1] = ".sat" +}; + +char *exec_size[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", + [5] = "32" +}; + +char *pred_inv[2] = { + [0] = "+", + [1] = "-" +}; + +char *pred_ctrl_align16[16] = { + [1] = "", + [2] = ".x", + [3] = ".y", + [4] = ".z", + [5] = ".w", + [6] = ".any4h", + [7] = ".all4h", +}; + +char *pred_ctrl_align1[16] = { + [1] = "", + [2] = ".anyv", + [3] = ".allv", + [4] = ".any2h", + [5] = ".all2h", + [6] = ".any4h", + [7] = ".all4h", + [8] = ".any8h", + [9] = ".all8h", + [10] = ".any16h", + [11] = ".all16h", +}; + +char *thread_ctrl[4] = { + [0] = "", + [2] = "switch" +}; + +char *compr_ctrl[4] = { + [0] = "", + [1] = "sechalf", + [2] = "compr", +}; + +char *dep_ctrl[4] = { + [0] = "", + [1] = "NoDDClr", + [2] = "NoDDChk", + [3] = "NoDDClr,NoDDChk", +}; + +char *mask_ctrl[4] = { + [0] = "", + [1] = "nomask", +}; + +char *access_mode[2] = { + [0] = "align1", + [1] = "align16", +}; + +char *reg_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [4] = "UB", + [5] = "B", + [7] = "F" +}; + +char *imm_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [5] = "VF", + [5] = "V", + [7] = "F" +}; + +char *reg_file[4] = { + [0] = "A", + [1] = "g", + [2] = "m", + [3] = "imm", +}; + +char *writemask[16] = { + [0x0] = ".", + [0x1] = ".x", + [0x2] = ".y", + [0x3] = ".xy", + [0x4] = ".z", + [0x5] = ".xz", + [0x6] = ".yz", + [0x7] = ".xyz", + [0x8] = ".w", + [0x9] = ".xw", + [0xa] = ".yw", + [0xb] = ".xyw", + [0xc] = ".zw", + [0xd] = ".xzw", + [0xe] = ".yzw", + [0xf] = "", +}; + +char *end_of_thread[2] = { + [0] = "", + [1] = "EOT" +}; + +char *target_function[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read", + [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *math_function[16] = { + [BRW_MATH_FUNCTION_INV] = "inv", + [BRW_MATH_FUNCTION_LOG] = "log", + [BRW_MATH_FUNCTION_EXP] = "exp", + [BRW_MATH_FUNCTION_SQRT] = "sqrt", + [BRW_MATH_FUNCTION_RSQ] = "rsq", + [BRW_MATH_FUNCTION_SIN] = "sin", + [BRW_MATH_FUNCTION_COS] = "cos", + [BRW_MATH_FUNCTION_SINCOS] = "sincos", + [BRW_MATH_FUNCTION_TAN] = "tan", + [BRW_MATH_FUNCTION_POW] = "pow", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv", +}; + +char *math_saturate[2] = { + [0] = "", + [1] = "sat" +}; + +char *math_signed[2] = { + [0] = "", + [1] = "signed" +}; + +char *math_scalar[2] = { + [0] = "", + [1] = "scalar" +}; + +char *math_precision[2] = { + [0] = "", + [1] = "partial_precision" +}; + +char *urb_swizzle[4] = { + [BRW_URB_SWIZZLE_NONE] = "", + [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", + [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", +}; + +char *urb_allocate[2] = { + [0] = "", + [1] = "allocate" +}; + +char *urb_used[2] = { + [0] = "", + [1] = "used" +}; + +char *urb_complete[2] = { + [0] = "", + [1] = "complete" +}; + +char *sampler_target_format[4] = { + [0] = "F", + [2] = "UD", + [3] = "D" +}; + + +static int column; + +static int string (FILE *file, char *string) +{ + fputs (string, file); + column += strlen (string); + return 0; +} + +static int format (FILE *f, char *format, ...) +{ + char buf[1024]; + va_list args; + va_start (args, format); + + vsnprintf (buf, sizeof (buf) - 1, format, args); + string (f, buf); + return 0; +} + +static int newline (FILE *f) +{ + putc ('\n', f); + column = 0; + return 0; +} + +static int pad (FILE *f, int c) +{ + do + string (f, " "); + while (column < c); + return 0; +} + +static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space) +{ + if (!ctrl[id]) { + fprintf (file, "*** invalid %s value %d ", + name, id); + return 1; + } + if (ctrl[id][0]) + { + if (space && *space) + string (file, " "); + string (file, ctrl[id]); + if (space) + *space = 1; + } + return 0; +} + +static int print_opcode (FILE *file, int id) +{ + if (!opcode[id].name) { + format (file, "*** invalid opcode value %d ", id); + return 1; + } + string (file, opcode[id].name); + return 0; +} + +static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) +{ + int err = 0; + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (_reg_nr & 0xf0) { + case BRW_ARF_NULL: + string (file, "null"); + return -1; + case BRW_ARF_ADDRESS: + format (file, "a%d", _reg_nr & 0x0f); + break; + case BRW_ARF_ACCUMULATOR: + format (file, "acc%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK: + format (file, "mask%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK: + format (file, "msd%d", _reg_nr & 0x0f); + break; + case BRW_ARF_STATE: + format (file, "sr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_CONTROL: + format (file, "cr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_NOTIFICATION_COUNT: + format (file, "n%d", _reg_nr & 0x0f); + break; + case BRW_ARF_IP: + string (file, "ip"); + return -1; + break; + default: + format (file, "ARF%d", _reg_nr); + break; + } + } else { + err |= control (file, "src reg file", reg_file, _reg_file, NULL); + format (file, "%d", _reg_nr); + } + return err; +} + +static int dest (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + + if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da1.dest_subreg_nr) + format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); + } + else + { + string (file, "g[a0"); + if (inst->bits1.ia1.dest_subreg_nr) + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + if (inst->bits1.ia1.dest_indirect_offset) + format (file, " %d", inst->bits1.ia1.dest_indirect_offset); + string (file, "]"); + format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); + } + } + else + { + if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da16.dest_subreg_nr) + format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); + } + else + { + err = 1; + string (file, "Indirect align16 address mode not supported"); + } + } + + return 0; +} + +static int src_align1_region (FILE *file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride) +{ + int err = 0; + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ","); + err |= control (file, "width", width, _width, NULL); + string (file, ","); + err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL); + string (file, ">"); + return err; +} + +static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride, + GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, reg_num); + if (err == -1) + return 0; + if (sub_reg_num) + format (file, ".%d", sub_reg_num); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_ia1 (FILE *file, + GLuint type, + GLuint _reg_file, + GLint _addr_imm, + GLuint _addr_subreg_nr, + GLuint _negate, + GLuint __abs, + GLuint _addr_mode, + GLuint _horiz_stride, + GLuint _width, + GLuint _vert_stride) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + string (file, "g[a0"); + if (_addr_subreg_nr) + format (file, ".%d", _addr_subreg_nr); + if (_addr_imm) + format (file, " %d", _addr_imm); + string (file, "]"); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_da16 (FILE *file, + GLuint _reg_type, + GLuint _reg_file, + GLuint _vert_stride, + GLuint _reg_nr, + GLuint _subreg_nr, + GLuint __abs, + GLuint _negate, + GLuint swz_x, + GLuint swz_y, + GLuint swz_z, + GLuint swz_w) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, _reg_nr); + if (err == -1) + return 0; + if (_subreg_nr) + format (file, ".%d", _subreg_nr); + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ",1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + + +static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { + switch (type) { + case BRW_REGISTER_TYPE_UD: + format (file, "0x%08xUD", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_D: + format (file, "%dD", inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UW: + format (file, "0x%04xUW", (uint16_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_W: + format (file, "%dW", (int16_t) inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UB: + format (file, "0x%02xUB", (int8_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_VF: + format (file, "Vector Float"); + break; + case BRW_REGISTER_TYPE_V: + format (file, "0x%08xV", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_F: + format (file, "%-gF", inst->bits3.f); + } + return 0; +} + +static int src0 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src0_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src0_reg_type, + inst->bits1.da1.src0_reg_file, + inst->bits2.da1.src0_vert_stride, + inst->bits2.da1.src0_width, + inst->bits2.da1.src0_horiz_stride, + inst->bits2.da1.src0_reg_nr, + inst->bits2.da1.src0_subreg_nr, + inst->bits2.da1.src0_abs, + inst->bits2.da1.src0_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src0_reg_type, + inst->bits1.ia1.src0_reg_file, + inst->bits2.ia1.src0_indirect_offset, + inst->bits2.ia1.src0_subreg_nr, + inst->bits2.ia1.src0_negate, + inst->bits2.ia1.src0_abs, + inst->bits2.ia1.src0_address_mode, + inst->bits2.ia1.src0_horiz_stride, + inst->bits2.ia1.src0_width, + inst->bits2.ia1.src0_vert_stride); + } + } + else + { + if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src0_reg_type, + inst->bits1.da16.src0_reg_file, + inst->bits2.da16.src0_vert_stride, + inst->bits2.da16.src0_reg_nr, + inst->bits2.da16.src0_subreg_nr, + inst->bits2.da16.src0_abs, + inst->bits2.da16.src0_negate, + inst->bits2.da16.src0_swz_x, + inst->bits2.da16.src0_swz_y, + inst->bits2.da16.src0_swz_z, + inst->bits2.da16.src0_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +static int src1 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src1_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src1_reg_type, + inst->bits1.da1.src1_reg_file, + inst->bits3.da1.src1_vert_stride, + inst->bits3.da1.src1_width, + inst->bits3.da1.src1_horiz_stride, + inst->bits3.da1.src1_reg_nr, + inst->bits3.da1.src1_subreg_nr, + inst->bits3.da1.src1_abs, + inst->bits3.da1.src1_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src1_reg_type, + inst->bits1.ia1.src1_reg_file, + inst->bits3.ia1.src1_indirect_offset, + inst->bits3.ia1.src1_subreg_nr, + inst->bits3.ia1.src1_negate, + inst->bits3.ia1.src1_abs, + inst->bits3.ia1.src1_address_mode, + inst->bits3.ia1.src1_horiz_stride, + inst->bits3.ia1.src1_width, + inst->bits3.ia1.src1_vert_stride); + } + } + else + { + if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src1_reg_type, + inst->bits1.da16.src1_reg_file, + inst->bits3.da16.src1_vert_stride, + inst->bits3.da16.src1_reg_nr, + inst->bits3.da16.src1_subreg_nr, + inst->bits3.da16.src1_abs, + inst->bits3.da16.src1_negate, + inst->bits3.da16.src1_swz_x, + inst->bits3.da16.src1_swz_y, + inst->bits3.da16.src1_swz_z, + inst->bits3.da16.src1_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +int brw_disasm (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + int space = 0; + + if (inst->header.predicate_control) { + string (file, "("); + err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); + string (file, "f0"); + if (inst->bits2.da1.flag_reg_nr) + format (file, ".%d", inst->bits2.da1.flag_reg_nr); + if (inst->header.access_mode == BRW_ALIGN_1) + err |= control (file, "predicate control align1", pred_ctrl_align1, + inst->header.predicate_control, NULL); + else + err |= control (file, "predicate control align16", pred_ctrl_align16, + inst->header.predicate_control, NULL); + string (file, ") "); + } + + err |= print_opcode (file, inst->header.opcode); + err |= control (file, "saturate", saturate, inst->header.saturate, NULL); + err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); + + if (inst->header.opcode != BRW_OPCODE_SEND) + err |= control (file, "conditional modifier", conditional_modifier, + inst->header.destreg__conditionalmod, NULL); + + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "("); + err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); + string (file, ")"); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) + format (file, " %d", inst->header.destreg__conditionalmod); + + if (opcode[inst->header.opcode].ndst > 0) { + pad (file, 16); + err |= dest (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 0) { + pad (file, 32); + err |= src0 (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad (file, 48); + err |= src1 (file, inst); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) { + newline (file); + pad (file, 16); + space = 0; + err |= control (file, "target function", target_function, + inst->bits3.generic.msg_target, &space); + switch (inst->bits3.generic.msg_target) { + case BRW_MESSAGE_TARGET_MATH: + err |= control (file, "math function", math_function, + inst->bits3.math.function, &space); + err |= control (file, "math saturate", math_saturate, + inst->bits3.math.saturate, &space); + err |= control (file, "math signed", math_signed, + inst->bits3.math.int_type, &space); + err |= control (file, "math scalar", math_scalar, + inst->bits3.math.data_type, &space); + err |= control (file, "math precision", math_precision, + inst->bits3.math.precision, &space); + break; + case BRW_MESSAGE_TARGET_SAMPLER: + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + break; + case BRW_MESSAGE_TARGET_URB: + format (file, " %d", inst->bits3.urb.offset); + space = 1; + err |= control (file, "urb swizzle", urb_swizzle, + inst->bits3.urb.swizzle_control, &space); + err |= control (file, "urb allocate", urb_allocate, + inst->bits3.urb.allocate, &space); + err |= control (file, "urb used", urb_used, + inst->bits3.urb.used, &space); + err |= control (file, "urb complete", urb_complete, + inst->bits3.urb.complete, &space); + break; + case BRW_MESSAGE_TARGET_THREAD_SPAWNER: + break; + default: + format (file, "unsupported target %d", inst->bits3.generic.msg_target); + break; + } + if (space) + string (file, " "); + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } + pad (file, 64); + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "{"); + space = 1; + err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); + err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); + err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); + err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); + err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND) + err |= control (file, "end of thread", end_of_thread, + inst->bits3.generic.end_of_thread, &space); + if (space) + string (file, " "); + string (file, "}"); + } + string (file, ";"); + newline (file); + return err; +} diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 5152c3f3a5..44bb7bd588 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -25,13 +25,15 @@ * **************************************************************************/ -#include <stdlib.h> #include "main/glheader.h" #include "main/context.h" #include "main/state.h" -#include "main/api_validate.h" #include "main/enums.h" +#include "tnl/tnl.h" +#include "vbo/vbo_context.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" #include "brw_draw.h" #include "brw_defines.h" @@ -42,11 +44,6 @@ #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -#include "tnl/tnl.h" -#include "vbo/vbo_context.h" -#include "swrast/swrast.h" -#include "swrast_setup/swrast_setup.h" - #define FILE_DEBUG_FLAG DEBUG_BATCH static GLuint prim_to_hw_prim[GL_POLYGON+1] = { @@ -141,9 +138,11 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.verts_per_instance = trim(prim->mode, prim->count); prim_packet.start_vert_location = prim->start; + if (prim->indexed) + prim_packet.start_vert_location += brw->ib.start_vertex_offset; prim_packet.instance_count = 1; prim_packet.start_instance_location = 0; - prim_packet.base_vert_location = 0; + prim_packet.base_vert_location = prim->basevertex; /* Can't wrap here, since we rely on the validated state. */ brw->no_batch_wrap = GL_TRUE; @@ -185,6 +184,7 @@ static void brw_merge_inputs( struct brw_context *brw, for (i = 0; i < VERT_ATTRIB_MAX; i++) { brw->vb.inputs[i].glarray = arrays[i]; + brw->vb.inputs[i].attrib = (gl_vert_attrib) i; if (arrays[i]->StrideB != 0) brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << @@ -422,54 +422,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, return retval; } -static GLboolean brw_need_rebase( GLcontext *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_index_buffer *ib, - GLuint min_index ) -{ - if (min_index == 0) - return GL_FALSE; - - if (ib) { - if (!vbo_all_varyings_in_vbos(arrays)) - return GL_TRUE; - else - return GL_FALSE; - } - else { - /* Hmm. This isn't quite what I wanted. BRW can actually - * handle the mixed case well enough that we shouldn't need to - * rebase. However, it's probably not very common, nor hugely - * expensive to do it this way: - */ - if (!vbo_all_varyings_in_vbos(arrays)) - return GL_TRUE; - else - return GL_FALSE; - } -} - - void brw_draw_prims( GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index ) { GLboolean retval; - /* Decide if we want to rebase. If so we end up recursing once - * only into this function. - */ - if (brw_need_rebase( ctx, arrays, ib, min_index )) { - vbo_rebase_prims( ctx, arrays, - prim, nr_prims, - ib, min_index, max_index, - brw_draw_prims ); - - return; + if (!vbo_all_varyings_in_vbos(arrays)) { + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + + /* Decide if we want to rebase. If so we end up recursing once + * only into this function. + */ + if (min_index != 0) { + vbo_rebase_prims(ctx, arrays, + prim, nr_prims, + ib, min_index, max_index, + brw_draw_prims ); + return; + } } /* Make a first attempt at drawing: diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h index 9aebbdb1b8..2a14db217f 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.h +++ b/src/mesa/drivers/dri/i965/brw_draw.h @@ -39,6 +39,7 @@ void brw_draw_prims( GLcontext *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index ); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index c29f1dd5c0..765ae5a2fe 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -25,9 +25,9 @@ * **************************************************************************/ -#include <stdlib.h> #include "main/glheader.h" +#include "main/bufferobj.h" #include "main/context.h" #include "main/state.h" #include "main/api_validate.h" @@ -350,9 +350,6 @@ static void brw_prepare_vertices(struct brw_context *brw) unsigned int min_index = brw->vb.min_index; unsigned int max_index = brw->vb.max_index; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; - GLuint nr_enabled = 0; - struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; @@ -362,12 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ + brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~(1 << i); - enabled[nr_enabled++] = input; + brw->vb.enabled[brw->vb.nr_enabled++] = input; } /* XXX: In the rare cases where this happens we fallback all @@ -376,18 +374,17 @@ static void brw_prepare_vertices(struct brw_context *brw) * cases with > 17 vertex attributes enabled, so it probably * isn't an issue at this point. */ - if (nr_enabled >= BRW_VEP_MAX) { + if (brw->vb.nr_enabled >= BRW_VEP_MAX) { intel->Fallback = 1; return; } - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; - input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; - if (input->glarray->BufferObj->Name != 0) { + if (_mesa_is_bufferobj(input->glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(input->glarray->BufferObj); @@ -398,7 +395,23 @@ static void brw_prepare_vertices(struct brw_context *brw) dri_bo_reference(input->bo); input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; + input->count = input->glarray->_MaxElement; + + /* This is a common place to reach if the user mistakenly supplies + * a pointer in place of a VBO offset. If we just let it go through, + * we may end up dereferencing a pointer beyond the bounds of the + * GTT. We would hope that the VBO's max_index would save us, but + * Mesa appears to hand us min/max values not clipped to the + * array object's _MaxElement, and _MaxElement frequently appears + * to be wrong anyway. + * + * The VBO spec allows application termination in this case, and it's + * probably a service to the poor programmer to do so rather than + * trying to just not render. + */ + assert(input->offset < input->bo->size); } else { + input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; if (input->bo != NULL) { /* Already-uploaded vertex data is present from a previous * prepare_vertices, but we had to re-validate state due to @@ -410,7 +423,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ - if (i == 0) { + if (input->attrib == VERT_ATTRIB_POS) { /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) { @@ -466,8 +479,8 @@ static void brw_prepare_vertices(struct brw_context *brw) brw_prepare_query_begin(brw); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; brw_add_validated_bo(brw, input->bo); } @@ -477,34 +490,44 @@ static void brw_emit_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint i; - GLuint nr_enabled = 0; - /* Accumulate the list of enabled arrays. */ - while (vs_inputs) { - i = _mesa_ffsll(vs_inputs) - 1; - struct brw_vertex_element *input = &brw->vb.inputs[i]; + brw_emit_query_begin(brw); - vs_inputs &= ~(1 << i); - enabled[nr_enabled++] = input; + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (brw->vb.nr_enabled == 0) { + BEGIN_BATCH(3, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); + ADVANCE_BATCH(); + return; } - brw_emit_query_begin(brw); - /* Now emit VB and VEP state packets. * * This still defines a hardware VB for each input, even if they * are interleaved or from the same VBO. TBD if this makes a * performance difference. */ - BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS); + BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS); OUT_BATCH((CMD_VERTEX_BUFFER << 16) | - ((1 + nr_enabled * 4) - 2)); + ((1 + brw->vb.nr_enabled * 4) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | BRW_VB0_ACCESS_VERTEXDATA | @@ -524,15 +547,15 @@ static void brw_emit_vertices(struct brw_context *brw) input->offset + input->element_size); } } else - OUT_BATCH(brw->vb.max_index); + OUT_BATCH(input->stride ? input->count : 0); OUT_BATCH(0); /* Instance data step rate */ } ADVANCE_BATCH(); - BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, input->glarray->Size, input->glarray->Format, @@ -589,17 +612,20 @@ static void brw_prepare_indices(struct brw_context *brw) dri_bo *bo = NULL; struct gl_buffer_object *bufferobj; GLuint offset; + GLuint ib_type_size; if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count; + ib_type_size = get_size(index_buffer->type); + ib_size = ib_type_size * index_buffer->count; bufferobj = index_buffer->obj;; /* Turn into a proper VBO: */ - if (!bufferobj->Name) { - + if (!_mesa_is_bufferobj(bufferobj)) { + brw->ib.start_vertex_offset = 0; + /* Get new bufferobj, offset: */ get_space(brw, ib_size, &bo, &offset); @@ -615,6 +641,7 @@ static void brw_prepare_indices(struct brw_context *brw) } } else { offset = (GLuint) (unsigned long) index_buffer->ptr; + brw->ib.start_vertex_offset = 0; /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. @@ -635,39 +662,62 @@ static void brw_prepare_indices(struct brw_context *brw) bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), INTEL_READ); dri_bo_reference(bo); + + /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading + * the index buffer state when we're just moving the start index + * of our drawing. + */ + brw->ib.start_vertex_offset = offset / ib_type_size; + offset = 0; + ib_size = bo->size; } } - dri_bo_unreference(brw->ib.bo); - brw->ib.bo = bo; - brw->ib.offset = offset; + if (brw->ib.bo != bo || + brw->ib.offset != offset || + brw->ib.size != ib_size) + { + drm_intel_bo_unreference(brw->ib.bo); + brw->ib.bo = bo; + brw->ib.offset = offset; + brw->ib.size = ib_size; + + brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; + } else { + drm_intel_bo_unreference(bo); + } brw_add_validated_bo(brw, brw->ib.bo); } -static void brw_emit_indices(struct brw_context *brw) +const struct brw_tracked_state brw_indices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_INDICES, + .cache = 0, + }, + .prepare = brw_prepare_indices, +}; + +static void brw_emit_index_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - GLuint ib_size; if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count - 1; - /* Emit the indexbuffer packet: */ { struct brw_indexbuffer ib; memset(&ib, 0, sizeof(ib)); - + ib.header.bits.opcode = CMD_INDEX_BUFFER; ib.header.bits.length = sizeof(ib)/4 - 2; ib.header.bits.index_format = get_index_type(index_buffer->type); ib.header.bits.cut_index_enable = 0; - BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); @@ -676,18 +726,17 @@ static void brw_emit_indices(struct brw_context *brw) brw->ib.offset); OUT_RELOC(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, - brw->ib.offset + ib_size); + brw->ib.offset + brw->ib.size); OUT_BATCH( 0 ); ADVANCE_BATCH(); } } -const struct brw_tracked_state brw_indices = { +const struct brw_tracked_state brw_index_buffer = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH | BRW_NEW_INDICES, + .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER, .cache = 0, }, - .prepare = brw_prepare_indices, - .emit = brw_emit_indices, + .emit = brw_emit_index_buffer, }; diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index c53efba599..1df561386e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -62,7 +62,7 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc ) void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) { - p->current->header.destreg__conditonalmod = conditional; + p->current->header.destreg__conditionalmod = conditional; } void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 2412014248..241cdc33f8 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -481,8 +481,8 @@ static struct brw_instruction *next_insn( struct brw_compile *p, /* Reset this one-shot flag: */ - if (p->current->header.destreg__conditonalmod) { - p->current->header.destreg__conditonalmod = 0; + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; p->current->header.predicate_control = BRW_PREDICATE_NORMAL; } @@ -679,7 +679,7 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, assert(if_insn->header.opcode == BRW_OPCODE_IF); if_insn->bits3.if_else.jump_count = br * (insn - if_insn); - if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.pop_count = 0; if_insn->bits3.if_else.pad0 = 0; } @@ -871,7 +871,7 @@ void brw_CMP(struct brw_compile *p, { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); - insn->header.destreg__conditonalmod = conditional; + insn->header.destreg__conditionalmod = conditional; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); @@ -915,7 +915,7 @@ void brw_math( struct brw_compile *p, * instructions. */ insn->header.predicate_control = 0; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -952,7 +952,7 @@ void brw_math_16( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -969,7 +969,7 @@ void brw_math_16( struct brw_compile *p, */ insn = next_insn(p, BRW_OPCODE_SEND); insn->header.compression_control = BRW_COMPRESSION_2NDHALF; - insn->header.destreg__conditonalmod = msg_reg_nr+1; + insn->header.destreg__conditionalmod = msg_reg_nr+1; brw_set_dest(insn, offset(dest,1)); brw_set_src0(insn, src); @@ -1016,7 +1016,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -1062,7 +1062,7 @@ void brw_dp_READ_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); /* UW? */ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); @@ -1116,7 +1116,7 @@ void brw_dp_READ_4( struct brw_compile *p, insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ @@ -1190,7 +1190,7 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /*insn->header.access_mode = BRW_ALIGN_16;*/ @@ -1224,7 +1224,7 @@ void brw_fb_WRITE(struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1322,7 +1322,7 @@ void brw_SAMPLE(struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1375,7 +1375,7 @@ void brw_urb_WRITE(struct brw_compile *p, brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_urb_message(p->brw, insn, @@ -1410,7 +1410,7 @@ void brw_ff_sync(struct brw_compile *p, brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_ff_sync_message(p->brw, insn, diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index 980eac7646..a9b2aa2eac 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -101,7 +101,7 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, BRW_URB_SWIZZLE_NONE); } -void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) +static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) { struct brw_compile *p = &c->func; brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index a761c03153..ed9d2ffe60 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -93,7 +93,10 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) gs.thread4.nr_urb_entries = key->nr_urb_entries; gs.thread4.urb_entry_allocation_size = key->urb_size - 1; - gs.thread4.max_threads = 0; /* Hardware requirement */ + if (key->nr_urb_entries >= 8) + gs.thread4.max_threads = 1; + else + gs.thread4.max_threads = 0; if (BRW_IS_IGDNG(brw)) gs.thread4.rendering_enable = 1; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 85a7706404..ea71857548 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -172,6 +172,7 @@ static void prepare_psp_urb_cbs(struct brw_context *brw) brw_add_validated_bo(brw, brw->vs.state_bo); brw_add_validated_bo(brw, brw->gs.state_bo); brw_add_validated_bo(brw, brw->clip.state_bo); + brw_add_validated_bo(brw, brw->sf.state_bo); brw_add_validated_bo(brw, brw->wm.state_bo); brw_add_validated_bo(brw, brw->cc.state_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index cb9169e2ee..a195bc32b0 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -146,17 +146,12 @@ static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q) static void brw_check_query(GLcontext *ctx, struct gl_query_object *q) { - /* XXX: Need to expose dri_bo_is_idle from bufmgr. */ -#if 0 struct brw_query_object *query = (struct brw_query_object *)q; - if (dri_bo_is_idle(query->bo)) { + if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) { brw_queryobj_get_results(query); query->Base.Ready = GL_TRUE; } -#else - brw_wait_query(ctx, q); -#endif } /** Called to set up the query BO and account for its aperture space */ diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index b5f6371c82..bc0f076073 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -113,7 +113,7 @@ struct brw_sf_unit_key { unsigned int nr_urb_entries, urb_size, sfsize; - GLenum front_face, cull_face; + GLenum front_face, cull_face, provoking_vertex; unsigned scissor:1; unsigned line_smooth:1; unsigned point_sprite:1; @@ -153,6 +153,9 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); key->point_attenuated = ctx->Point._Attenuated; + /* _NEW_LIGHT */ + key->provoking_vertex = ctx->Light.ProvokingVertex; + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; } @@ -284,9 +287,15 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ - sf.sf7.trifan_pv = 2; - sf.sf7.linestrip_pv = 1; - sf.sf7.tristrip_pv = 2; + if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) { + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + } else { + sf.sf7.trifan_pv = 1; + sf.sf7.linestrip_pv = 0; + sf.sf7.tristrip_pv = 0; + } sf.sf7.line_last_pixel_enable = 0; /* Set bias for OpenGL rasterization rules: @@ -300,6 +309,9 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, &sf, sizeof(sf), NULL, NULL); + /* STATE_PREFETCH command description describes this state as being + * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. + */ /* Emit SF program relocation */ dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, @@ -340,6 +352,7 @@ static void upload_sf_unit( struct brw_context *brw ) const struct brw_tracked_state brw_sf_unit = { .dirty = { .mesa = (_NEW_POLYGON | + _NEW_LIGHT | _NEW_LINE | _NEW_POINT | _NEW_SCISSOR | diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index bf9f6cae55..78572356a3 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -92,6 +92,7 @@ const struct brw_tracked_state brw_clear_batch_cache; const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; +const struct brw_tracked_state brw_index_buffer; /** * Use same key for WM and VS surfaces. diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 38d9dd8991..414620d0b3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -94,6 +94,7 @@ const struct brw_tracked_state *atoms[] = &brw_drawing_rect, &brw_indices, + &brw_index_buffer, &brw_vertices, &brw_constant_buffer @@ -208,6 +209,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_PSP), DEFINE_BIT(BRW_NEW_FENCE), DEFINE_BIT(BRW_NEW_INDICES), + DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), DEFINE_BIT(BRW_NEW_BATCH), DEFINE_BIT(BRW_NEW_DEPTH_BUFFER), @@ -320,6 +322,19 @@ void brw_validate_state( struct brw_context *brw ) } } } + + /* Make sure that the textures which are referenced by the current + * brw fragment program are actually present/valid. + * If this fails, we can experience GPU lock-ups. + */ + { + const struct brw_fragment_program *fp; + fp = brw_fragment_program_const(brw->fragment_program); + if (fp) { + assert((fp->tex_units_used & ctx->Texture._EnabledUnits) + == fp->tex_units_used); + } + } } diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 8ba7eb27b3..66d4127271 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -442,8 +442,8 @@ struct brw_urb_fence { GLuint sf_fence:10; GLuint vf_fence:10; - GLuint cs_fence:10; - GLuint pad:2; + GLuint cs_fence:11; + GLuint pad:1; } bits1; }; @@ -1200,7 +1200,7 @@ struct brw_instruction GLuint predicate_control:4; GLuint predicate_inverse:1; GLuint execution_size:3; - GLuint destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ GLuint pad0:2; GLuint debug_control:1; GLuint saturate:1; @@ -1228,7 +1228,9 @@ struct brw_instruction GLuint dest_reg_type:3; GLuint src0_reg_file:2; GLuint src0_reg_type:3; - GLuint pad:6; + GLuint src1_reg_file:2; /* 0x00000c00 */ + GLuint src1_reg_type:3; /* 0x00007000 */ + GLuint pad:1; GLint dest_indirect_offset:10; /* offset against the deref'd address reg */ GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */ GLuint dest_horiz_stride:2; @@ -1243,7 +1245,7 @@ struct brw_instruction GLuint src0_reg_type:3; GLuint src1_reg_file:2; GLuint src1_reg_type:3; - GLuint pad0:1; + GLuint pad:1; GLuint dest_writemask:4; GLuint dest_subreg_nr:1; GLuint dest_reg_nr:8; @@ -1348,7 +1350,7 @@ struct brw_instruction GLuint src1_reg_nr:8; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad:1; + GLuint src1_address_mode:1; GLuint src1_horiz_stride:2; GLuint src1_width:3; GLuint src1_vert_stride:4; @@ -1363,7 +1365,7 @@ struct brw_instruction GLuint src1_reg_nr:8; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad0:1; + GLuint src1_address_mode:1; GLuint src1_swz_z:2; GLuint src1_swz_w:2; GLuint pad1:1; @@ -1377,7 +1379,7 @@ struct brw_instruction GLuint src1_subreg_nr:3; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad0:1; + GLuint src1_address_mode:1; GLuint src1_horiz_stride:2; GLuint src1_width:3; GLuint src1_vert_stride:4; @@ -1565,6 +1567,7 @@ struct brw_instruction GLint d; GLuint ud; + float f; } bits3; }; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 7f9b253534..5986cbffad 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -65,11 +65,6 @@ GLboolean brw_miptree_layout(struct intel_context *intel, if (mt->compressed) { mt->pitch = ALIGN(mt->width0, align_w); - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; - mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; - } else { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; - mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; } if (mt->first_level != mt->last_level) { @@ -90,6 +85,14 @@ GLboolean brw_miptree_layout(struct intel_context *intel, mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); + if (mt->compressed) { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; + } else { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; + } + for (level = mt->first_level; level <= mt->last_level; level++) { GLuint img_height; GLuint nr_images = 6; @@ -191,6 +194,16 @@ GLboolean brw_miptree_layout(struct intel_context *intel, } } + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + * We don't know of similar requirements for pre-965, but given that + * those docs are silent on padding requirements in general, let's play + * it safe. + */ + if (mt->target == GL_TEXTURE_CUBE_MAP) + mt->total_height += 2; break; } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 514f15d5e3..1638ef8111 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -68,6 +68,7 @@ static void release_tmps( struct brw_vs_compile *c ) static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; + int attributes_in_vue; /* Determine whether to use a real constant buffer or use a block * of GRF registers for constants. The later is faster but only @@ -128,6 +129,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } } + /* If there are no inputs, we'll still be reading one attribute's worth + * because it's required -- see urb_read_length setting. + */ + if (c->nr_inputs == 0) + reg++; /* Allocate outputs. The non-position outputs go straight into message regs. */ @@ -220,11 +226,22 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * vertex urb, so is half the amount: */ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; + /* Setting this field to 0 leads to undefined behavior according to the + * the VS_STATE docs. Our VUEs will always have at least one attribute + * sitting in them, even if it's padding. + */ + if (c->prog_data.urb_read_length == 0) + c->prog_data.urb_read_length = 1; + + /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size + * them to fit the biggest thing they need to. + */ + attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); if (BRW_IS_IGDNG(c->func.brw)) - c->prog_data.urb_entry_size = (c->nr_outputs + 6 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else - c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; c->prog_data.total_grf = reg; @@ -894,6 +911,7 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: + case PROGRAM_ENV_PARAM: if (c->vp->use_const_buffer) { return get_constant(c, inst, argIndex); } @@ -913,7 +931,6 @@ get_src_reg( struct brw_vs_compile *c, return brw_null_reg(); case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: case PROGRAM_WRITE_ONLY: default: assert(0); @@ -1191,7 +1208,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ 0, /* response len */ eot, /* eot */ - 1, /* writes complete */ + eot, /* writes complete */ 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); @@ -1245,9 +1262,49 @@ post_vs_emit( struct brw_vs_compile *c, /* patch up the END code to jump past subroutines, etc */ offset = last_inst - end_inst; - brw_set_src1(end_inst, brw_imm_d(offset * 16)); + if (offset > 1) { + brw_set_src1(end_inst, brw_imm_d(offset * 16)); + } else { + end_inst->header.opcode = BRW_OPCODE_NOP; + } } +static uint32_t +get_predicate(const struct prog_instruction *inst) +{ + if (inst->DstReg.CondMask == COND_TR) + return BRW_PREDICATE_NONE; + + /* All of GLSL only produces predicates for COND_NE and one channel per + * vector. Fail badly if someone starts doing something else, as it might + * mean infinite looping or something. + * + * We'd like to support all the condition codes, but our hardware doesn't + * quite match the Mesa IR, which is modeled after the NV extensions. For + * those, the instruction may update the condition codes or not, then any + * later instruction may use one of those condition codes. For gen4, the + * instruction may update the flags register based on one of the condition + * codes output by the instruction, and then further instructions may + * predicate on that. We can probably support this, but it won't + * necessarily be easy. + */ + assert(inst->DstReg.CondMask == COND_NE); + + switch (inst->DstReg.CondSwizzle) { + case SWIZZLE_XXXX: + return BRW_PREDICATE_ALIGN16_REPLICATE_X; + case SWIZZLE_YYYY: + return BRW_PREDICATE_ALIGN16_REPLICATE_Y; + case SWIZZLE_ZZZZ: + return BRW_PREDICATE_ALIGN16_REPLICATE_Z; + case SWIZZLE_WWWW: + return BRW_PREDICATE_ALIGN16_REPLICATE_W; + default: + _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", + inst->DstReg.CondMask); + return BRW_PREDICATE_NORMAL; + } +} /* Emit the vertex program instructions here. */ @@ -1256,6 +1313,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; const GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn, if_depth = 0, loop_depth = 0; GLuint end_offset = 0; @@ -1266,7 +1324,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) GLuint file; if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("vs-emit:\n"); + _mesa_printf("vs-mesa:\n"); _mesa_print_program(&c->vp->program.Base); _mesa_printf("\n"); } @@ -1453,7 +1511,10 @@ void brw_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_IF: assert(if_depth < MAX_IF_DEPTH); - if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); + if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); + /* Note that brw_IF smashes the predicate_control field. */ + if_inst[if_depth]->header.predicate_control = get_predicate(inst); + if_depth++; break; case OPCODE_ELSE: if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); @@ -1462,45 +1523,48 @@ void brw_vs_emit(struct brw_vs_compile *c ) assert(if_depth > 0); brw_ENDIF(p, if_inst[--if_depth]); break; -#if 0 case OPCODE_BGNLOOP: loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: + brw_set_predicate_control(p, get_predicate(inst)); brw_BREAK(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_CONT: + brw_set_predicate_control(p, get_predicate(inst)); brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: { struct brw_instruction *inst0, *inst1; + GLuint br = 1; + loop_depth--; + + if (BRW_IS_IGDNG(brw)) + br = 2; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); /* patch all the BREAK/CONT instructions from last BEGINLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; if (inst0->header.opcode == BRW_OPCODE_BREAK) { - inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { - inst0->bits3.if_else.jump_count = inst1 - inst0; + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } } } break; -#else - (void) loop_inst; - (void) loop_depth; -#endif case OPCODE_BRA: - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_set_predicate_control_flag_value(p, 0xff); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_CAL: brw_set_access_mode(p, BRW_ALIGN_1); @@ -1541,6 +1605,19 @@ void brw_vs_emit(struct brw_vs_compile *c ) "unknown"); } + /* Set the predication update on the last instruction of the native + * instruction sequence. + * + * This would be problematic if it was set on a math instruction, + * but that shouldn't be the case with the current GLSL compiler. + */ + if (inst->CondUpdate) { + struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; + + assert(hw_insn->header.destreg__conditionalmod == 0); + hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; + } + if ((inst->DstReg.File == PROGRAM_OUTPUT) && (inst->DstReg.Index != VERT_RESULT_HPOS) && c->output_regs[inst->DstReg.Index].used_in_src) { @@ -1578,4 +1655,13 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_vertex_write(c); post_vs_emit(c, end_inst, last_inst); + + if (INTEL_DEBUG & DEBUG_VS) { + int i; + + _mesa_printf("vs-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 14e05be4f6..2292de94c4 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -41,13 +41,13 @@ GLuint brw_wm_nr_args( GLuint opcode ) { switch (opcode) { case WM_FRONTFACING: - return 0; case WM_PIXELXY: + return 0; case WM_CINTERP: case WM_WPOSXY: + case WM_DELTAXY: return 1; case WM_LINTERP: - case WM_DELTAXY: case WM_PIXELW: return 2; case WM_FB_WRITE: @@ -171,9 +171,11 @@ static void do_wm_prog( struct brw_context *brw, * differently from "simple" shaders. */ if (fp->isGLSL) { + c->dispatch_width = 8; brw_wm_glsl_emit(brw, c); } else { + c->dispatch_width = 16; brw_wm_non_glsl_emit(brw, c); } @@ -202,6 +204,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; GLuint i; @@ -263,6 +266,7 @@ static void brw_wm_populate_key( struct brw_context *brw, brw_wm_lookup_iz(line_aa, lookup, + uses_depth, key); diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index ba497432c6..872b1f3ecf 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -38,6 +38,8 @@ #include "brw_context.h" #include "brw_eu.h" +#define SATURATE (1<<5) + /* A big lookup table is used to figure out which and how many * additional regs will inserted before the main payload in the WM * program execution. These mainly relate to depth and stencil @@ -203,7 +205,6 @@ struct brw_wm_compile { GLuint fp_temp; GLuint fp_interp_emitted; GLuint fp_fragcolor_emitted; - GLuint fp_deriv_emitted; struct prog_src_register pixel_xy; struct prog_src_register delta_xy; @@ -260,6 +261,7 @@ struct brw_wm_compile { GLuint tmp_index; GLuint tmp_max; GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; + GLuint dispatch_width; /** we may need up to 3 constants per instruction (if use_const_buffer) */ struct { @@ -292,10 +294,16 @@ void brw_wm_print_program( struct brw_wm_compile *c, void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 9f82916c02..bf80a2942a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -34,8 +34,6 @@ #include "brw_context.h" #include "brw_wm.h" -#define SATURATE (1<<5) - /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ @@ -65,8 +63,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) static void emit_pixel_xy(struct brw_compile *p, const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) + GLuint mask) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -98,8 +95,7 @@ static void emit_pixel_xy(struct brw_compile *p, static void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) + const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); @@ -283,6 +279,79 @@ static void emit_frontfacing( struct brw_compile *p, brw_set_predicate_control_flag_value(p, 0xff); } +/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input + * looking like: + * + * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br + * + * and we're trying to produce: + * + * DDX DDY + * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) + * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) + * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) + * (ss0.br - ss0.bl) (ss0.tr - ss0.br) + * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) + * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) + * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) + * (ss1.br - ss1.bl) (ss1.tr - ss1.br) + * + * and add another set of two more subspans if in 16-pixel dispatch mode. + * + * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result + * for each pair, and vertstride = 2 jumps us 2 elements after processing a + * pair. But for DDY, it's harder, as we want to produce the pairs swizzled + * between each other. We could probably do it like ddx and swizzle the right + * order later, but bail for now and just produce + * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) + */ +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0) +{ + int i; + struct brw_reg src0, src1; + + if (mask & SATURATE) + brw_set_saturate(p, 1); + for (i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + if (is_ddx) { + src0 = brw_reg(arg0[i].file, arg0[i].nr, 1, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + src1 = brw_reg(arg0[i].file, arg0[i].nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + } else { + src0 = brw_reg(arg0[i].file, arg0[i].nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + src1 = brw_reg(arg0[i].file, arg0[i].nr, 2, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + } + brw_ADD(p, dst[i], src0, negate(src1)); + } + } + if (mask & SATURATE) + brw_set_saturate(p, 0); +} + static void emit_alu1( struct brw_compile *p, struct brw_instruction *(*func)(struct brw_compile *, struct brw_reg, @@ -545,16 +614,18 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[0], arg0[2], arg1[2]); + brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); brw_set_saturate(p, 0); } @@ -565,17 +636,19 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[0], arg0[3], arg1[3]); + brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]); brw_set_saturate(p, 0); } @@ -586,17 +659,19 @@ static void emit_dph( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); - brw_MAC(p, dst[0], arg0[2], arg1[2]); + brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_ADD(p, dst[0], dst[0], arg1[3]); + brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]); brw_set_saturate(p, 0); } @@ -632,18 +707,19 @@ static void emit_math1( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || - // function == BRW_MATH_FUNCTION_SINCOS); - + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + brw_MOV(p, brw_message_reg(2), arg0[0]); /* Send two messages to perform all 16 operations: */ brw_math_16(p, - dst[0], + dst[dst_chan], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, @@ -659,10 +735,12 @@ static void emit_math2( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_push_insn_state(p); @@ -681,7 +759,7 @@ static void emit_math2( struct brw_compile *p, */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, - dst[0], + dst[dst_chan], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, @@ -691,7 +769,7 @@ static void emit_math2( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, - offset(dst[0],1), + offset(dst[dst_chan],1), function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 4, @@ -728,10 +806,14 @@ static void emit_tex( struct brw_wm_compile *c, emit = WRITEMASK_XY; nr = 2; break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; nr = 3; break; + default: + /* unexpected target */ + abort(); } if (inst->tex_shadow) { @@ -803,11 +885,15 @@ static void emit_txb( struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), arg[2]); break; + default: + /* unexpected target */ + abort(); } brw_MOV(p, brw_message_reg(8), arg[3]); @@ -893,6 +979,20 @@ static void emit_kil( struct brw_wm_compile *c, } } +/* KIL_NV kills the pixels that are currently executing, not based on a test + * of the arguments. + */ +static void emit_kil_nv( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, r0uw, c->emit_mask_reg, r0uw); + brw_pop_insn_state(p); +} static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, @@ -1194,11 +1294,11 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: - emit_pixel_xy(p, dst, dst_flags, args[0]); + emit_pixel_xy(p, dst, dst_flags); break; case WM_DELTAXY: - emit_delta_xy(p, dst, dst_flags, args[0], args[1]); + emit_delta_xy(p, dst, dst_flags, args[0]); break; case WM_WPOSXY: @@ -1243,6 +1343,14 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; + case OPCODE_DDX: + emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]); + break; + + case OPCODE_DDY: + emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); + break; + case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; @@ -1372,6 +1480,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_kil(c, args[0]); break; + case OPCODE_KIL_NV: + emit_kil_nv(c); + break; + default: _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? @@ -1385,4 +1497,13 @@ void brw_wm_emit( struct brw_wm_compile *c ) inst->dst[i]->hw_reg, inst->dst[i]->spill_slot); } + + if (INTEL_DEBUG & DEBUG_WM) { + int i; + + _mesa_printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index b9e8dd2e96..4e3edfbbff 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -42,6 +42,12 @@ #include "shader/prog_statevars.h" +/** An invalid texture target */ +#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS + +/** An invalid texture unit */ +#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT + #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS #define X 0 @@ -199,6 +205,15 @@ static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, { struct prog_instruction *inst = get_fp_inst(c); + assert(tex_src_unit < BRW_MAX_TEX_UNIT || + tex_src_unit == TEX_UNIT_NONE); + assert(tex_src_target < NUM_TEXTURE_TARGETS || + tex_src_target == TEX_TARGET_NONE); + + /* update mask of which texture units are referenced by this program */ + if (tex_src_unit != TEX_UNIT_NONE) + c->fp->tex_units_used |= (1 << tex_src_unit); + memset(inst, 0, sizeof(*inst)); inst->Opcode = op; @@ -223,12 +238,45 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, struct prog_src_register src2 ) { return emit_tex_op(c, op, dest, saturate, - 0, 0, 0, /* tex unit, target, shadow */ + TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */ src0, src1, src2); } - +/* Many Mesa opcodes produce the same value across all the result channels. + * We'd rather not have to support that splatting in the opcode implementations, + * and brw_wm_pass*.c wants to optimize them out by shuffling references around + * anyway. We can easily get both by emitting the opcode to one channel, and + * then MOVing it to the others, which brw_wm_pass*.c already understands. + */ +static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst0) +{ + struct prog_instruction *inst; + unsigned int dst_chan; + unsigned int other_channel_mask; + + if (inst0->DstReg.WriteMask == 0) + return NULL; + + dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1; + inst = get_fp_inst(c); + *inst = *inst0; + inst->DstReg.WriteMask = 1 << dst_chan; + + other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); + if (other_channel_mask != 0) { + inst = emit_op(c, + OPCODE_MOV, + dst_mask(inst0->DstReg, other_channel_mask), + 0, + src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), + src_undef(), + src_undef()); + } + return inst; +} + /*********************************************************************** * Special instructions for interpolation and other tasks @@ -376,14 +424,6 @@ static void emit_interp( struct brw_wm_compile *c, } break; case FRAG_ATTRIB_FOGC: - /* The FOGC input is really special. When a program uses glFogFragCoord, - * the results returned are supposed to be (f,0,0,1). But for Mesa GLSL, - * the glFrontFacing and glPointCoord values are also stashed in FOGC. - * So, write the interpolated fog value to X, then either 0, 1, or the - * stashed values to Y, Z, W. Note that this means that - * glFogFragCoord.yzw can be wrong in those cases! - */ - /* Interpolate the fog coordinate */ emit_op(c, WM_PINTERP, @@ -393,26 +433,40 @@ static void emit_interp( struct brw_wm_compile *c, deltas, get_pixel_w(c)); - /* Move the front facing value into FOGC.y if it's needed. */ - if (c->fp->program.UsesFrontFacing) { - emit_op(c, - WM_FRONTFACING, - dst_mask(dst, WRITEMASK_Y), - 0, - src_undef(), - src_undef(), - src_undef()); - } else { - emit_op(c, - OPCODE_MOV, - dst_mask(dst, WRITEMASK_Y), - 0, - src_swizzle1(interp, SWIZZLE_ZERO), - src_undef(), - src_undef()); - } + emit_op(c, + OPCODE_MOV, + dst_mask(dst, WRITEMASK_YZW), + 0, + src_swizzle(interp, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ONE), + src_undef(), + src_undef()); + break; + + case FRAG_ATTRIB_FACE: + /* XXX review/test this case */ + emit_op(c, + WM_FRONTFACING, + dst_mask(dst, WRITEMASK_X), + 0, + src_undef(), + src_undef(), + src_undef()); + break; + + case FRAG_ATTRIB_PNTC: + /* XXX review/test this case */ + emit_op(c, + WM_PINTERP, + dst_mask(dst, WRITEMASK_XY), + 0, + interp, + deltas, + get_pixel_w(c)); - /* Should do the PointCoord thing here. */ emit_op(c, OPCODE_MOV, dst_mask(dst, WRITEMASK_ZW), @@ -425,6 +479,7 @@ static void emit_interp( struct brw_wm_compile *c, src_undef(), src_undef()); break; + default: emit_op(c, WM_PINTERP, @@ -439,38 +494,6 @@ static void emit_interp( struct brw_wm_compile *c, c->fp_interp_emitted |= 1<<idx; } -static void emit_ddx( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - GLuint idx = inst->SrcReg[0].Index; - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - - c->fp_deriv_emitted |= 1<<idx; - emit_op(c, - OPCODE_DDX, - inst->DstReg, - 0, - interp, - get_pixel_w(c), - src_undef()); -} - -static void emit_ddy( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - GLuint idx = inst->SrcReg[0].Index; - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - - c->fp_deriv_emitted |= 1<<idx; - emit_op(c, - OPCODE_DDY, - inst->DstReg, - 0, - interp, - get_pixel_w(c), - src_undef()); -} - /*********************************************************************** * Hacks to extend the program parameter and constant lists. */ @@ -643,6 +666,8 @@ static void precalc_tex( struct brw_wm_compile *c, struct prog_dst_register tmpcoord; const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + assert(unit < BRW_MAX_TEX_UNIT); + if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { struct prog_instruction *out; struct prog_dst_register tmp0 = get_temp(c); @@ -683,7 +708,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp0 = 1 / tmp1 */ emit_op(c, OPCODE_RCP, - tmp0, + dst_mask(tmp0, WRITEMASK_X), 0, tmp1src, src_undef(), @@ -694,7 +719,7 @@ static void precalc_tex( struct brw_wm_compile *c, tmpcoord, 0, src0, - tmp0src, + src_swizzle1(tmp0src, SWIZZLE_X), src_undef()); release_temp(c, tmp0); @@ -717,7 +742,11 @@ static void precalc_tex( struct brw_wm_compile *c, tmpcoord, 0, inst->SrcReg[0], - scale, + src_swizzle(scale, + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_ONE, + SWIZZLE_ONE), src_undef()); coord = src_reg_from_dst(tmpcoord); @@ -1041,6 +1070,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) c->delta_xy = src_undef(); c->pixel_w = src_undef(); c->nr_fp_insns = 0; + c->fp->tex_units_used = 0x0; /* Emit preamble instructions. This is where special instructions such as * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to @@ -1108,6 +1138,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_TXB: out = emit_insn(c, inst); out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); break; case OPCODE_XPD: @@ -1123,20 +1154,16 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) */ out->DstReg.WriteMask = 0; break; - case OPCODE_DDX: - emit_ddx(c, inst); - break; - case OPCODE_DDY: - emit_ddy(c, inst); - break; case OPCODE_END: emit_fb_write(c); break; case OPCODE_PRINT: break; - default: - emit_insn(c, inst); + if (brw_wm_is_scalar_result(inst->Opcode)) + emit_scalar_insn(c, inst); + else + emit_insn(c, inst); break; } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 19f777fe32..c9fe1dd8ad 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -10,6 +10,9 @@ enum _subroutine { SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 }; +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint component); /** * Determine if the given fragment program uses GLSL features such @@ -19,16 +22,16 @@ enum _subroutine { GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { int i; + for (i = 0; i < fp->Base.NumInstructions; i++) { const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { + case OPCODE_ARL: case OPCODE_IF: case OPCODE_ENDIF: case OPCODE_CAL: case OPCODE_BRK: case OPCODE_RET: - case OPCODE_DDX: - case OPCODE_DDY: case OPCODE_NOISE1: case OPCODE_NOISE2: case OPCODE_NOISE3: @@ -130,19 +133,6 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, c->wm_regs[file][index][component].inited = GL_TRUE; } -/** - * Examine instruction's write mask to find index of first component - * enabled for writing. - */ -static int get_scalar_dst_index(const struct prog_instruction *inst) -{ - int i; - for (i = 0; i < 4; i++) - if (inst->DstReg.WriteMask & (1<<i)) - break; - return i; -} - static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; @@ -302,7 +292,7 @@ static void prealloc_reg(struct brw_wm_compile *c) int i, j; struct brw_reg reg; int urb_read_length = 0; - GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; GLuint reg_index = 0; memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); @@ -402,6 +392,27 @@ static void prealloc_reg(struct brw_wm_compile *c) prealloc_grf(c, 126); prealloc_grf(c, 127); + for (i = 0; i < c->nr_fp_insns; i++) { + const struct prog_instruction *inst = &c->prog_instructions[i]; + struct brw_reg dst[4]; + + switch (inst->Opcode) { + case OPCODE_TEX: + case OPCODE_TXB: + /* Allocate the channels of texture results contiguously, + * since they are written out that way by the sampler unit. + */ + for (j = 0; j < 4; j++) { + dst[j] = get_dst_reg(c, inst, j); + if (j != 0) + assert(dst[j].nr == dst[j - 1].nr + 1); + } + break; + default: + break; + } + } + /* An instruction may reference up to three constants. * They'll be found in these registers. * XXX alloc these on demand! @@ -639,23 +650,6 @@ static void invoke_subroutine( struct brw_wm_compile *c, } } -static void emit_abs( struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (inst->DstReg.WriteMask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i); - src = get_src_reg(c, inst, 0, i); - brw_MOV(p, dst, brw_abs(src)); - } - } - brw_set_saturate(p, 0); -} - static void emit_trunc( struct brw_wm_compile *c, const struct prog_instruction *inst) { @@ -1031,12 +1025,20 @@ static void emit_dp3(struct brw_wm_compile *c, struct brw_reg src0[3], src1[3], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 3; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -1050,11 +1052,19 @@ static void emit_dp4(struct brw_wm_compile *c, struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 4; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, brw_null_reg(), src0[2], src1[2]); @@ -1069,11 +1079,19 @@ static void emit_dph(struct brw_wm_compile *c, struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 4; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); @@ -1091,37 +1109,28 @@ static void emit_math1(struct brw_wm_compile *c, const struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; - struct brw_reg src0, dst, tmp; - const int mark = mark_tmps( c ); - int i; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; - tmp = alloc_tmp(c); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); /* Get first component of source register */ + dst = get_dst_reg(c, inst, dst_chan); src0 = get_src_reg(c, inst, 0, 0); - /* tmp = func(src0) */ brw_MOV(p, brw_message_reg(2), src0); brw_math(p, - tmp, + dst, func, (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); - - /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/ - - /* replicate tmp value across enabled dest channels */ - for (i = 0; i < 4; i++) { - if (inst->DstReg.WriteMask & (1 << i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, tmp); - } - } - - release_tmps(c, mark); } static void emit_rcp(struct brw_wm_compile *c, @@ -1192,24 +1201,6 @@ static void emit_arl(struct brw_wm_compile *c, brw_set_saturate(p, 0); } -static void emit_sub(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg_imm(c, inst, 1, i); - brw_ADD(p, dst, src0, negate(src1)); - } - } - brw_set_saturate(p, 0); -} static void emit_mul(struct brw_wm_compile *c, const struct prog_instruction *inst) @@ -1321,7 +1312,15 @@ static void emit_pow(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + dst = get_dst_reg(c, inst, dst_chan); src0 = get_src_reg_imm(c, inst, 0, 0); src1 = get_src_reg_imm(c, inst, 1, 0); @@ -1474,61 +1473,6 @@ static void emit_sne(struct brw_wm_compile *c, emit_sop(c, inst, BRW_CONDITIONAL_NEQ); } -static void emit_ddx(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - GLuint nr, i; - src0 = get_src_reg(c, inst, 0, 0); - w = get_src_reg(c, inst, 1, 3); - nr = src0.nr; - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, interp[i]); - brw_MUL(p, dst, dst, w); - } - } - brw_set_saturate(p, 0); -} - -static void emit_ddy(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - nr = src0.nr; - w = get_src_reg(c, inst, 1, 3); - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, suboffset(interp[i], 1)); - brw_MUL(p, dst, dst, w); - } - } - brw_set_saturate(p, 0); -} - static INLINE struct brw_reg high_words( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), @@ -2623,10 +2567,13 @@ static void emit_txb(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; - GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->TexSrcUnit; GLuint i; GLuint msg_type; + assert(unit < BRW_MAX_TEX_UNIT); + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) @@ -2646,11 +2593,15 @@ static void emit_txb(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(3), src[1]); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: brw_MOV(p, brw_message_reg(2), src[0]); brw_MOV(p, brw_message_reg(3), src[1]); brw_MOV(p, brw_message_reg(4), src[2]); break; + default: + /* invalid target */ + abort(); } brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ @@ -2683,13 +2634,16 @@ static void emit_tex(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; - GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->TexSrcUnit; GLuint msg_len; GLuint i, nr; GLuint emit; GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0; GLuint msg_type; + assert(unit < BRW_MAX_TEX_UNIT); + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) @@ -2707,10 +2661,14 @@ static void emit_tex(struct brw_wm_compile *c, emit = WRITEMASK_XY; nr = 2; break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; nr = 3; break; + default: + /* invalid target */ + abort(); } msg_len = 1; @@ -2766,6 +2724,21 @@ static void post_wm_emit( struct brw_wm_compile *c ) brw_resolve_cals(&c->func); } +static void +get_argument_regs(struct brw_wm_compile *c, + const struct prog_instruction *inst, + int index, + struct brw_reg *regs, + int mask) +{ + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1 << i)) + regs[i] = get_src_reg(c, inst, index, i); + } +} + static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) { #define MAX_IF_DEPTH 32 @@ -2783,6 +2756,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { const struct prog_instruction *inst = &c->prog_instructions[i]; + int dst_flags; + struct brw_reg args[3][4], dst[4]; + int j; c->cur_inst = i; @@ -2800,6 +2776,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) else brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + dst_flags = inst->DstReg.WriteMask; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + dst_flags |= SATURATE; + switch (inst->Opcode) { case WM_PIXELXY: emit_pixel_xy(c, inst); @@ -2828,18 +2808,12 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case WM_FRONTFACING: emit_frontfacing(c, inst); break; - case OPCODE_ABS: - emit_abs(c, inst); - break; case OPCODE_ADD: emit_add(c, inst); break; case OPCODE_ARL: emit_arl(c, inst); break; - case OPCODE_SUB: - emit_sub(c, inst); - break; case OPCODE_FRC: emit_frc(c, inst); break; @@ -2891,10 +2865,16 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_min_max(c, inst); break; case OPCODE_DDX: - emit_ddx(c, inst); - break; case OPCODE_DDY: - emit_ddy(c, inst); + for (j = 0; j < 4; j++) { + if (inst->DstReg.WriteMask & (1 << j)) + dst[j] = get_dst_reg(c, inst, j); + else + dst[j] = brw_null_reg(); + } + get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW); + emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), + args[0]); break; case OPCODE_SLT: emit_slt(c, inst); @@ -3007,7 +2987,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) loop_depth--; inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + /* patch all the BREAK/CONT instructions from last BGNLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; if (inst0->header.opcode == BRW_OPCODE_BREAK) { @@ -3032,8 +3012,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); -} + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } +} /** * Do GPU code generation for shaders that use GLSL features such as diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index 8fd067abe7..5e399ac62a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -122,6 +122,7 @@ const struct { */ void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ) { GLuint reg = 2; @@ -131,7 +132,7 @@ void brw_wm_lookup_iz( GLuint line_aa, if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) key->computes_depth = 1; - if (wm_iz_table[lookup].sd_present) { + if (wm_iz_table[lookup].sd_present || ps_uses_depth) { key->source_depth_reg = reg; reg += 2; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 92142764f5..6279258339 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -257,34 +257,6 @@ static void pass0_set_dst( struct brw_wm_compile *c, } -static void pass0_set_dst_scalar( struct brw_wm_compile *c, - struct brw_wm_instruction *out, - const struct prog_instruction *inst, - GLuint writemask ) -{ - if (writemask) { - const struct prog_dst_register *dst = &inst->DstReg; - GLuint i; - - /* Compute only the first (X) value: - */ - out->writemask = WRITEMASK_X; - out->dst[0] = get_value(c); - - /* Update our tracking register file for all the components in - * writemask: - */ - for (i = 0; i < 4; i++) { - if (writemask & (1<<i)) { - pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[0]); - } - } - } - else - out->writemask = 0; -} - - static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, struct prog_src_register src, GLuint i ) @@ -363,10 +335,7 @@ translate_insn(struct brw_wm_compile *c, /* Dst: */ - if (brw_wm_is_scalar_result(out->opcode)) - pass0_set_dst_scalar(c, out, inst, writemask); - else - pass0_set_dst(c, out, inst, writemask); + pass0_set_dst(c, out, inst, writemask); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index 3436a24717..b449394029 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -178,6 +178,11 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read1 = writemask; break; + case OPCODE_DDX: + case OPCODE_DDY: + read0 = writemask; + break; + case OPCODE_MAD: case OPCODE_CMP: case OPCODE_LRP: @@ -270,6 +275,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_DST: case WM_FRONTFACING: + case OPCODE_KIL_NV: default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 3fc18ff1f3..dff466587a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -103,6 +103,10 @@ struct wm_sampler_key { GLenum minfilter, magfilter; GLenum comparemode, comparefunc; dri_bo *sdc_bo; + + /** If target is cubemap, take context setting. + */ + GLboolean seamless_cube_map; } sampler[BRW_MAX_TEX_UNIT]; }; @@ -169,30 +173,33 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, } } - if (key->tex_target == GL_TEXTURE_CUBE_MAP && - (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) { - /* If we're using anything but nearest sampling for a cube map, we - * need to set this wrap mode to avoid GPU lock-ups. - */ - sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; - } - else if (key->tex_target == GL_TEXTURE_1D) { + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); + + /* Cube-maps on 965 and later must use the same wrap mode for all 3 + * coordinate dimensions. Futher, only CUBE and CLAMP are valid. + */ + if (key->tex_target == GL_TEXTURE_CUBE_MAP) { + if (key->seamless_cube_map && + (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + } else { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + } + } else if (key->tex_target == GL_TEXTURE_1D) { /* There's a bug in 1D texture sampling - it actually pays * attention to the wrap_t value, though it should not. * Override the wrap_t value here to GL_REPEAT to keep * any nonexistent border pixels from floating in. */ - sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); - sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; } - else { - sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); - sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); - sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); - } + /* Set shadow function: */ @@ -249,6 +256,9 @@ brw_wm_sampler_populate_key(struct brw_context *brw, entry->tex_target = texObj->Target; + entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) + ? ctx->Texture.CubeMapSeamless : GL_FALSE; + entry->wrap_r = texObj->WrapR; entry->wrap_s = texObj->WrapS; entry->wrap_t = texObj->WrapT; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 39f8c6d522..361f91292b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -107,6 +107,12 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* as far as we can tell */ key->computes_depth = (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; + /* BRW_NEW_DEPTH_BUFFER + * Override for NULL depthbuffer case, required by the Pixel Shader Computed + * Depth field. + */ + if (brw->state.depth_region == NULL) + key->computes_depth = 0; /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; @@ -300,6 +306,7 @@ const struct brw_tracked_state brw_wm_unit = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | + BRW_NEW_DEPTH_BUFFER | BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 096f74394e..51539ac1e7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -545,15 +545,20 @@ brw_update_renderbuffer_surface(struct brw_context *brw, irb->texformat->MesaFormat); } key.tiling = region->tiling; - key.width = region->width; - key.height = region->height; + if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) { + key.width = rb->Width; + key.height = rb->Height; + } else { + key.width = region->width; + key.height = region->height; + } key.pitch = region->pitch; key.cpp = region->cpp; key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - key.tiling = 0; + key.tiling = I915_TILING_X; key.width = 1; key.height = 1; key.cpp = 4; @@ -629,7 +634,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], offsetof(struct brw_surface_state, ss1), region_bo, - surf.ss1.base_addr, + surf.ss1.base_addr - region_bo->offset, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c new file mode 120000 index 0000000000..0b2e56ab24 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_syncobj.c @@ -0,0 +1 @@ +../intel/intel_syncobj.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 0f87fc46a4..6aa36d10b1 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -196,6 +196,16 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, struct intel_context *intel = batch->intel; GLuint used = batch->ptr - batch->map; + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch->buf; + drm_intel_bo_reference(intel->first_post_swapbuffers_batch); + } + + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch->buf; + drm_intel_bo_reference(intel->first_post_swapbuffers_batch); + } + if (used == 0) { batch->cliprect_mode = IGNORE_CLIPRECTS; return; diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 2e95bd1013..43141c509c 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -26,9 +26,6 @@ **************************************************************************/ -#include <stdio.h> -#include <errno.h> - #include "main/mtypes.h" #include "main/context.h" #include "main/enums.h" @@ -477,6 +474,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) BR13 |= BR13_565; } + assert(irb->region->tiling != I915_TILING_Y); + #ifndef I915 if (irb->region->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; @@ -571,6 +570,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, assert( logic_op - GL_CLEAR >= 0 ); assert( logic_op - GL_CLEAR < 0x10 ); + assert(dst_pitch > 0); if (w < 0 || h < 0) return GL_TRUE; @@ -633,3 +633,43 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, return GL_TRUE; } + +/* We don't have a memmove-type blit like some other hardware, so we'll do a + * rectangular blit covering a large space, then emit 1-scanline blit at the + * end to cover the last if we need. + */ +void +intel_emit_linear_blit(struct intel_context *intel, + drm_intel_bo *dst_bo, + unsigned int dst_offset, + drm_intel_bo *src_bo, + unsigned int src_offset, + unsigned int size) +{ + GLuint pitch, height; + + /* The pitch is a signed value. */ + pitch = MIN2(size, (1 << 15) - 1); + height = size / pitch; + intelEmitCopyBlit(intel, 1, + pitch, src_bo, src_offset, I915_TILING_NONE, + pitch, dst_bo, dst_offset, I915_TILING_NONE, + 0, 0, /* src x/y */ + 0, 0, /* dst x/y */ + pitch, height, /* w, h */ + GL_COPY); + + src_offset += pitch * height; + dst_offset += pitch * height; + size -= pitch * height; + assert (size < (1 << 15)); + if (size != 0) { + intelEmitCopyBlit(intel, 1, + size, src_bo, src_offset, I915_TILING_NONE, + size, dst_bo, dst_offset, I915_TILING_NONE, + 0, 0, /* src x/y */ + 0, 0, /* dst x/y */ + size, 1, /* w, h */ + GL_COPY); + } +} diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h index 152fa3f17b..240cb7cd1b 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.h +++ b/src/mesa/drivers/dri/intel/intel_blit.h @@ -63,5 +63,11 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op); +void intel_emit_linear_blit(struct intel_context *intel, + drm_intel_bo *dst_bo, + unsigned int dst_offset, + drm_intel_bo *src_bo, + unsigned int src_offset, + unsigned int size); #endif diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 9600557f2c..c55c5c426e 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -28,9 +28,11 @@ #include "main/imports.h" #include "main/mtypes.h" +#include "main/macros.h" #include "main/bufferobj.h" #include "intel_context.h" +#include "intel_blit.h" #include "intel_buffer_objects.h" #include "intel_batchbuffer.h" #include "intel_regions.h" @@ -128,9 +130,10 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj) * Allocate space for and store data in a buffer object. Any data that was * previously stored in the buffer object is lost. If data is NULL, * memory will be allocated, but no copy will occur. - * Called via glBufferDataARB(). + * Called via ctx->Driver.BufferData(). + * \return GL_TRUE for success, GL_FALSE if out of memory */ -static void +static GLboolean intel_bufferobj_data(GLcontext * ctx, GLenum target, GLsizeiptrARB size, @@ -165,15 +168,19 @@ intel_bufferobj_data(GLcontext * ctx, if (intel_obj->sys_buffer != NULL) { if (data != NULL) memcpy(intel_obj->sys_buffer, data, size); - return; + return GL_TRUE; } } #endif intel_bufferobj_alloc_buffer(intel, intel_obj); + if (!intel_obj->buffer) + return GL_FALSE; if (data != NULL) dri_bo_subdata(intel_obj->buffer, 0, size, data); } + + return GL_TRUE; } @@ -243,8 +250,10 @@ intel_bufferobj_map(GLcontext * ctx, return obj->Pointer; } - if (!read_only) - intelFlush(ctx); + /* Flush any existing batchbuffer that might have written to this + * buffer. + */ + intelFlush(ctx); if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); @@ -263,32 +272,205 @@ intel_bufferobj_map(GLcontext * ctx, } obj->Pointer = intel_obj->buffer->virtual; + obj->Length = obj->Size; + obj->Offset = 0; + return obj->Pointer; } +/** + * Called via glMapBufferRange(). + * + * The goal of this extension is to allow apps to accumulate their rendering + * at the same time as they accumulate their buffer object. Without it, + * you'd end up blocking on execution of rendering every time you mapped + * the buffer to put new data in. + * + * We support it in 3 ways: If unsynchronized, then don't bother + * flushing the batchbuffer before mapping the buffer, which can save blocking + * in many cases. If we would still block, and they allow the whole buffer + * to be invalidated, then just allocate a new buffer to replace the old one. + * If not, and we'd block, and they allow the subrange of the buffer to be + * invalidated, then we can make a new little BO, let them write into that, + * and blit it into the real BO at unmap time. + */ +static void * +intel_bufferobj_map_range(GLcontext * ctx, + GLenum target, GLintptr offset, GLsizeiptr length, + GLbitfield access, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + + /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also + * internally uses our functions directly. + */ + obj->Offset = offset; + obj->Length = length; + obj->AccessFlags = access; + + if (intel_obj->sys_buffer) { + obj->Pointer = intel_obj->sys_buffer + offset; + return obj->Pointer; + } + + if (intel_obj->region) + intel_bufferobj_cow(intel, intel_obj); + + /* If the mapping is synchronized with other GL operations, flush + * the batchbuffer so that GEM knows about the buffer access for later + * syncing. + */ + if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) + intelFlush(ctx); + + if (intel_obj->buffer == NULL) { + obj->Pointer = NULL; + return NULL; + } + + /* If the user doesn't care about existing buffer contents and mapping + * would cause us to block, then throw out the old buffer. + */ + if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && + (access & GL_MAP_INVALIDATE_BUFFER_BIT) && + drm_intel_bo_busy(intel_obj->buffer)) { + drm_intel_bo_unreference(intel_obj->buffer); + intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", + intel_obj->Base.Size, 64); + } + + /* If the user is mapping a range of an active buffer object but + * doesn't require the current contents of that range, make a new + * BO, and we'll copy what they put in there out at unmap or + * FlushRange time. + */ + if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && + drm_intel_bo_busy(intel_obj->buffer)) { + if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { + intel_obj->range_map_buffer = _mesa_malloc(length); + obj->Pointer = intel_obj->range_map_buffer; + } else { + intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, + "range map", + length, 64); + if (!(access & GL_MAP_READ_BIT) && + intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); + intel_obj->mapped_gtt = GL_TRUE; + } else { + drm_intel_bo_map(intel_obj->range_map_bo, + (access & GL_MAP_WRITE_BIT) != 0); + intel_obj->mapped_gtt = GL_FALSE; + } + obj->Pointer = intel_obj->range_map_bo->virtual; + } + return obj->Pointer; + } + + if (!(access & GL_MAP_READ_BIT) && + intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(intel_obj->buffer); + intel_obj->mapped_gtt = GL_TRUE; + } else { + drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); + intel_obj->mapped_gtt = GL_FALSE; + } + + obj->Pointer = intel_obj->buffer->virtual + offset; + return obj->Pointer; +} + +/* Ideally we'd use a BO to avoid taking up cache space for the temporary + * data, but FlushMappedBufferRange may be followed by further writes to + * the pointer, so we would have to re-map after emitting our blit, which + * would defeat the point. + */ +static void +intel_bufferobj_flush_mapped_range(GLcontext *ctx, GLenum target, + GLintptr offset, GLsizeiptr length, + struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + drm_intel_bo *temp_bo; + + /* Unless we're in the range map using a temporary system buffer, + * there's no work to do. + */ + if (intel_obj->range_map_buffer == NULL) + return; + + temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64); + + drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer); + + intel_emit_linear_blit(intel, + intel_obj->buffer, obj->Offset + offset, + temp_bo, 0, + length); + + drm_intel_bo_unreference(temp_bo); +} + /** - * Called via glMapBufferARB(). + * Called via glUnmapBuffer(). */ static GLboolean intel_bufferobj_unmap(GLcontext * ctx, GLenum target, struct gl_buffer_object *obj) { + struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); + assert(obj->Pointer); if (intel_obj->sys_buffer != NULL) { - assert(obj->Pointer); - obj->Pointer = NULL; + /* always keep the mapping around. */ + } else if (intel_obj->range_map_buffer != NULL) { + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel->batch); + free(intel_obj->range_map_buffer); + intel_obj->range_map_buffer = NULL; + } else if (intel_obj->range_map_bo != NULL) { + if (intel_obj->mapped_gtt) { + drm_intel_gem_bo_unmap_gtt(intel_obj->range_map_bo); + } else { + drm_intel_bo_unmap(intel_obj->range_map_bo); + } + + intel_emit_linear_blit(intel, + intel_obj->buffer, obj->Offset, + intel_obj->range_map_bo, 0, + obj->Length); + + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel->batch); + + drm_intel_bo_unreference(intel_obj->range_map_bo); + intel_obj->range_map_bo = NULL; } else if (intel_obj->buffer != NULL) { - assert(obj->Pointer); if (intel_obj->mapped_gtt) { drm_intel_gem_bo_unmap_gtt(intel_obj->buffer); } else { drm_intel_bo_unmap(intel_obj->buffer); } - obj->Pointer = NULL; } + obj->Pointer = NULL; + obj->Offset = 0; + obj->Length = 0; + return GL_TRUE; } @@ -325,6 +507,64 @@ intel_bufferobj_buffer(struct intel_context *intel, return intel_obj->buffer; } +static void +intel_bufferobj_copy_subdata(GLcontext *ctx, + struct gl_buffer_object *src, + struct gl_buffer_object *dst, + GLintptr read_offset, GLintptr write_offset, + GLsizeiptr size) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_src = intel_buffer_object(src); + struct intel_buffer_object *intel_dst = intel_buffer_object(dst); + drm_intel_bo *src_bo, *dst_bo; + + if (size == 0) + return; + + /* If we're in system memory, just map and memcpy. */ + if (intel_src->sys_buffer || intel_dst->sys_buffer) { + /* The same buffer may be used, but note that regions copied may + * not overlap. + */ + if (src == dst) { + char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, + GL_READ_WRITE, dst); + memcpy(ptr + write_offset, ptr + read_offset, size); + intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); + } else { + const char *src_ptr; + char *dst_ptr; + + src_ptr = intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER, + GL_READ_ONLY, src); + dst_ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, + GL_WRITE_ONLY, dst); + + memcpy(dst_ptr + write_offset, src_ptr + read_offset, size); + + intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src); + intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); + } + } + + /* Otherwise, we have real BOs, so blit them. */ + + dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART); + src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ); + + intel_emit_linear_blit(intel, + dst_bo, write_offset, + src_bo, read_offset, size); + + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel->batch); +} + void intelInitBufferObjectFuncs(struct dd_function_table *functions) { @@ -334,5 +574,8 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions) functions->BufferSubData = intel_bufferobj_subdata; functions->GetBufferSubData = intel_bufferobj_get_subdata; functions->MapBuffer = intel_bufferobj_map; + functions->MapBufferRange = intel_bufferobj_map_range; + functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range; functions->UnmapBuffer = intel_bufferobj_unmap; + functions->CopyBufferSubData = intel_bufferobj_copy_subdata; } diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h index 8164407f07..bf3e08a320 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h @@ -48,6 +48,12 @@ struct intel_buffer_object struct intel_region *region; /* Is there a zero-copy texture associated with this (pixel) buffer object? */ + + drm_intel_bo *range_map_bo; + void *range_map_buffer; + unsigned int range_map_offset; + GLsizei range_map_size; + GLboolean mapped_gtt; }; diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h index 3c38f1676c..3dc8653a73 100644 --- a/src/mesa/drivers/dri/intel/intel_chipset.h +++ b/src/mesa/drivers/dri/intel/intel_chipset.h @@ -66,6 +66,7 @@ #define PCI_CHIP_Q45_G 0x2E12 #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 +#define PCI_CHIP_B43_G 0x2E42 #define PCI_CHIP_ILD_G 0x0042 #define PCI_CHIP_ILM_G 0x0046 @@ -83,7 +84,8 @@ #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ - devid == PCI_CHIP_G41_G) + devid == PCI_CHIP_G41_G || \ + devid == PCI_CHIP_B43_G) #define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) #define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index cfddabd318..fef977f465 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -27,25 +27,9 @@ **************************************************************************/ #include "main/glheader.h" -#include "main/enums.h" -#include "main/image.h" #include "main/mtypes.h" -#include "main/arrayobj.h" -#include "main/attrib.h" -#include "main/blend.h" -#include "main/bufferobj.h" -#include "main/buffers.h" -#include "main/depth.h" -#include "main/enable.h" -#include "main/macros.h" -#include "main/matrix.h" -#include "main/polygon.h" -#include "main/texstate.h" -#include "main/shaders.h" -#include "main/stencil.h" -#include "main/varray.h" -#include "glapi/dispatch.h" #include "swrast/swrast.h" +#include "drivers/common/meta.h" #include "intel_context.h" #include "intel_blit.h" @@ -54,6 +38,7 @@ #include "intel_fbo.h" #include "intel_pixel.h" #include "intel_regions.h" +#include "intel_batchbuffer.h" #define FILE_DEBUG_FLAG DEBUG_BLIT @@ -140,7 +125,7 @@ intelClear(GLcontext *ctx, GLbitfield mask) * buffer with it. */ if (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) { - int color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS); + int color_bit = _mesa_ffs(mask & BUFFER_BITS_COLOR); if (color_bit != 0) { tri_mask |= blit_mask & (1 << (color_bit - 1)); blit_mask &= ~(1 << (color_bit - 1)); @@ -150,14 +135,18 @@ intelClear(GLcontext *ctx, GLbitfield mask) /* SW fallback clearing */ swrast_mask = mask & ~tri_mask & ~blit_mask; - for (i = 0; i < BUFFER_COUNT; i++) { - GLuint bufBit = 1 << i; - if ((blit_mask | tri_mask) & bufBit) { + { + /* look for non-Intel renderbuffers (clear them with swrast) */ + GLbitfield blit_or_tri = blit_mask | tri_mask; + while (blit_or_tri) { + GLuint i = _mesa_ffs(blit_or_tri) - 1; + GLbitfield bufBit = 1 << i; if (!fb->Attachment[i].Renderbuffer->ClassID) { blit_mask &= ~bufBit; tri_mask &= ~bufBit; swrast_mask |= bufBit; } + blit_or_tri ^= bufBit; } } @@ -182,7 +171,9 @@ intelClear(GLcontext *ctx, GLbitfield mask) } DBG("\n"); } - meta_clear_tris(&intel->meta, tri_mask); + + _mesa_meta_Clear(&intel->ctx, tri_mask); + intel_batchbuffer_flush(intel->batch); } if (swrast_mask) { diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 4abb525f78..d49d95768d 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -38,6 +38,7 @@ #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" #include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" #include "i830_dri.h" @@ -161,6 +162,9 @@ intelGetString(GLcontext * ctx, GLenum name) case PCI_CHIP_G41_G: chipset = "Intel(R) G41"; break; + case PCI_CHIP_B43_G: + chipset = "Intel(R) B43"; + break; case PCI_CHIP_ILD_G: chipset = "Intel(R) IGDNG_D"; break; @@ -503,7 +507,8 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2) - && (screen->dri2.loader->flushFrontBuffer != NULL)) { + && (screen->dri2.loader->flushFrontBuffer != NULL) && + intel->driDrawable && intel->driDrawable->loaderPrivate) { (*screen->dri2.loader->flushFrontBuffer)(intel->driDrawable, intel->driDrawable->loaderPrivate); @@ -513,7 +518,7 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) * each of N places that do rendering. This has worse performances, * but it is much easier to get correct. */ - if (intel->is_front_buffer_rendering) { + if (!intel->is_front_buffer_rendering) { intel->front_buffer_dirty = GL_FALSE; } } @@ -529,7 +534,27 @@ intelFlush(GLcontext * ctx) static void intel_glFlush(GLcontext *ctx) { + struct intel_context *intel = intel_context(ctx); + intel_flush(ctx, GL_TRUE); + + /* We're using glFlush as an indicator that a frame is done, which is + * what DRI2 does before calling SwapBuffers (and means we should catch + * people doing front-buffer rendering, as well).. + * + * Wait for the swapbuffers before the one we just emitted, so we don't + * get too many swaps outstanding for apps that are GPU-heavy but not + * CPU-heavy. + * + * Unfortunately, we don't have a handle to the batch containing the swap, + * and getting our hands on that doesn't seem worth it, so we just us the + * first batch we emitted after the last swap. + */ + if (intel->first_post_swapbuffers_batch != NULL) { + drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + } } void @@ -563,11 +588,6 @@ intelInitDriverFunctions(struct dd_function_table *functions) functions->GetString = intelGetString; functions->UpdateState = intelInvalidateState; - functions->CopyColorTable = _swrast_CopyColorTable; - functions->CopyColorSubTable = _swrast_CopyColorSubTable; - functions->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D; - functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D; - intelInitTextureFuncs(functions); intelInitTextureImageFuncs(functions); intelInitTextureSubImageFuncs(functions); @@ -577,6 +597,7 @@ intelInitDriverFunctions(struct dd_function_table *functions) intelInitBufferFuncs(functions); intelInitPixelFuncs(functions); intelInitBufferObjectFuncs(functions); + intel_init_syncobj_functions(functions); } @@ -692,6 +713,8 @@ intelInitContext(struct intel_context *intel, _swrast_allow_pixel_fog(ctx, GL_FALSE); _swrast_allow_vertex_fog(ctx, GL_TRUE); + _mesa_meta_init(ctx); + intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24; intel->hw_stipple = 1; @@ -752,7 +775,7 @@ intelInitContext(struct intel_context *intel, if (intel->use_texture_tiling && !intel->intelScreen->kernel_exec_fencing) { fprintf(stderr, "No kernel support for execution fencing, " - "disabling texture tiling"); + "disabling texture tiling\n"); intel->use_texture_tiling = GL_FALSE; } intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z"); @@ -795,6 +818,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) INTEL_FIREVERTICES(intel); + _mesa_meta_free(&intel->ctx); + meta_destroy_metaops(&intel->meta); intel->vtbl.destroy(intel); @@ -814,6 +839,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->prim.vb = NULL; dri_bo_unreference(intel->prim.vb_bo); intel->prim.vb_bo = NULL; + dri_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; if (release_texture_heaps) { /* Nothing is currently done here to free texture heaps; @@ -891,6 +918,14 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) GLboolean intelUnbindContext(__DRIcontextPrivate * driContextPriv) { + struct intel_context *intel = + (struct intel_context *) driContextPriv->driverPrivate; + + /* Deassociate the context with the drawables. + */ + intel->driDrawable = NULL; + intel->driReadDrawable = NULL; + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 08bea88c95..03e7cf39d6 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -80,9 +80,19 @@ extern void intelFallback(struct intel_context *intel, GLuint bit, #define INTEL_MAX_FIXUP 64 +struct intel_sync_object { + struct gl_sync_object Base; + + /** Batch associated with this sync object */ + drm_intel_bo *bo; +}; + +/** + * intel_context is derived from Mesa's context class: GLcontext. + */ struct intel_context { - GLcontext ctx; /* the parent class */ + GLcontext ctx; /**< base class, must be first field */ struct { @@ -178,6 +188,7 @@ struct intel_context GLboolean ttm; struct intel_batchbuffer *batch; + drm_intel_bo *first_post_swapbuffers_batch; GLboolean no_batch_wrap; unsigned batch_id; @@ -307,7 +318,7 @@ struct intel_context __DRIdrawablePrivate *driReadDrawable; __DRIscreenPrivate *driScreen; intelScreenPrivate *intelScreen; - volatile struct drm_i915_sarea *sarea; + volatile drm_i915_sarea_t *sarea; GLuint lastStamp; @@ -465,6 +476,8 @@ extern void intelFlush(GLcontext * ctx); extern void intelInitDriverFunctions(struct dd_function_table *functions); +void intel_init_syncobj_functions(struct dd_function_table *functions); + /* ================================================================ * intel_state.c: diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 7742609d24..5431cf90a1 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -30,10 +30,14 @@ #include "intel_extensions.h" +#define need_GL_ARB_copy_buffer +#define need_GL_ARB_draw_elements_base_vertex #define need_GL_ARB_framebuffer_object +#define need_GL_ARB_map_buffer_range #define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_shader_objects +#define need_GL_ARB_sync #define need_GL_ARB_vertex_array_object #define need_GL_ARB_vertex_program #define need_GL_ARB_vertex_shader @@ -48,6 +52,7 @@ #define need_GL_EXT_framebuffer_blit #define need_GL_EXT_gpu_program_parameters #define need_GL_EXT_point_parameters +#define need_GL_EXT_provoking_vertex #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side #define need_GL_APPLE_vertex_array_object @@ -68,9 +73,14 @@ * i965_dri. */ static const struct dri_extension card_extensions[] = { + { "GL_ARB_copy_buffer", GL_ARB_copy_buffer_functions }, + { "GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions }, { "GL_ARB_half_float_pixel", NULL }, + { "GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions }, { "GL_ARB_multitexture", NULL }, { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, + { "GL_ARB_point_sprite", NULL }, + { "GL_ARB_sync", GL_ARB_sync_functions }, { "GL_ARB_texture_border_clamp", NULL }, { "GL_ARB_texture_cube_map", NULL }, { "GL_ARB_texture_env_add", NULL }, @@ -92,6 +102,7 @@ static const struct dri_extension card_extensions[] = { { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, { "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions }, { "GL_EXT_packed_depth_stencil", NULL }, + { "GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_edge_clamp", NULL }, @@ -119,8 +130,10 @@ static const struct dri_extension i915_extensions[] = { { "GL_ARB_fragment_program", NULL }, { "GL_ARB_shadow", NULL }, { "GL_ARB_texture_non_power_of_two", NULL }, + { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions }, { "GL_ATI_texture_env_combine3", NULL }, { "GL_EXT_shadow_funcs", NULL }, + { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions }, { "GL_NV_texture_env_combine4", NULL }, { NULL, NULL } }; @@ -128,6 +141,7 @@ static const struct dri_extension i915_extensions[] = { /** i965-only extensions */ static const struct dri_extension brw_extensions[] = { + { "GL_ARB_depth_clamp", NULL }, { "GL_ARB_depth_texture", NULL }, { "GL_ARB_fragment_program", NULL }, { "GL_ARB_fragment_program_shadow", NULL }, @@ -135,6 +149,7 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions}, { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { "GL_ARB_point_sprite", NULL }, + { "GL_ARB_seamless_cube_map", NULL }, { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 666893596e..8dfb24290d 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -35,6 +35,7 @@ #include "main/context.h" #include "main/texformat.h" #include "main/texrender.h" +#include "drivers/common/meta.h" #include "intel_context.h" #include "intel_buffers.h" @@ -700,74 +701,6 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) /** - * Called from glBlitFramebuffer(). - * For now, we're doing an approximation with glCopyPixels(). - * XXX we need to bypass all the per-fragment operations, except scissor. - */ -static void -intel_blit_framebuffer(GLcontext *ctx, - GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, - GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, - GLbitfield mask, GLenum filter) -{ - const GLfloat xZoomSave = ctx->Pixel.ZoomX; - const GLfloat yZoomSave = ctx->Pixel.ZoomY; - GLsizei width, height; - GLfloat xFlip = 1.0F, yFlip = 1.0F; - - if (srcX1 < srcX0) { - GLint tmp = srcX1; - srcX1 = srcX0; - srcX0 = tmp; - xFlip = -1.0F; - } - - if (srcY1 < srcY0) { - GLint tmp = srcY1; - srcY1 = srcY0; - srcY0 = tmp; - yFlip = -1.0F; - } - - width = srcX1 - srcX0; - height = srcY1 - srcY0; - - ctx->Pixel.ZoomX = xFlip * (dstX1 - dstX0) / (srcX1 - srcY0); - ctx->Pixel.ZoomY = yFlip * (dstY1 - dstY0) / (srcY1 - srcY0); - - if (ctx->Pixel.ZoomX < 0.0F) { - dstX0 = MAX2(dstX0, dstX1); - } - else { - dstX0 = MIN2(dstX0, dstX1); - } - - if (ctx->Pixel.ZoomY < 0.0F) { - dstY0 = MAX2(dstY0, dstY1); - } - else { - dstY0 = MIN2(dstY0, dstY1); - } - - if (mask & GL_COLOR_BUFFER_BIT) { - ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, - dstX0, dstY0, GL_COLOR); - } - if (mask & GL_DEPTH_BUFFER_BIT) { - ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, - dstX0, dstY0, GL_DEPTH); - } - if (mask & GL_STENCIL_BUFFER_BIT) { - ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, - dstX0, dstY0, GL_STENCIL); - } - - ctx->Pixel.ZoomX = xZoomSave; - ctx->Pixel.ZoomY = yZoomSave; -} - - -/** * Do one-time context initializations related to GL_EXT_framebuffer_object. * Hook in device driver functions. */ @@ -782,5 +715,5 @@ intel_fbo_init(struct intel_context *intel) intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture; intel->ctx.Driver.ResizeBuffers = intel_resize_buffers; intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer; - intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer; + intel->ctx.Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer; } diff --git a/src/mesa/drivers/dri/intel/intel_generatemipmap.c b/src/mesa/drivers/dri/intel/intel_generatemipmap.c index fe986092db..237754d469 100644 --- a/src/mesa/drivers/dri/intel/intel_generatemipmap.c +++ b/src/mesa/drivers/dri/intel/intel_generatemipmap.c @@ -223,10 +223,6 @@ fail: * * The texture object's miptree must be mapped. * - * It would be really nice if this was just called by Mesa whenever mipmaps - * needed to be regenerated, rather than us having to remember to do so in - * each texture image modification path. - * * This function should also include an accelerated path. */ void diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 540e7620a9..9a0bcc07a5 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -93,19 +93,12 @@ static const GLubyte *map_pbo( GLcontext *ctx, return ADD_POINTERS(buf, bitmap); } -static GLboolean test_bit( const GLubyte *src, - GLuint bit ) +static GLboolean test_bit( const GLubyte *src, GLuint bit ) { return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0; } -static GLboolean test_msb_bit(const GLubyte *src, GLuint bit) -{ - return (src[bit/8] & (1<<(7 - (bit % 8)))) ? 1 : 0; -} - -static void set_bit( GLubyte *dest, - GLuint bit ) +static void set_bit( GLubyte *dest, GLuint bit ) { dest[bit/8] |= 1 << (bit % 8); } @@ -216,7 +209,7 @@ do_blit_bitmap( GLcontext *ctx, if (!dst) return GL_FALSE; - if (unpack->BufferObj->Name) { + if (_mesa_is_bufferobj(unpack->BufferObj)) { bitmap = map_pbo(ctx, width, height, unpack, bitmap); if (bitmap == NULL) return GL_TRUE; /* even though this is an error, we're done */ @@ -336,7 +329,7 @@ out: if (INTEL_DEBUG & DEBUG_SYNC) intel_batchbuffer_flush(intel->batch); - if (unpack->BufferObj->Name) { + if (_mesa_is_bufferobj(unpack->BufferObj)) { /* done with PBO so unmap it now */ ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, unpack->BufferObj); @@ -365,9 +358,7 @@ intel_texture_bitmap(GLcontext * ctx, GLuint texname; GLfloat vertices[4][4]; GLint old_active_texture; - GLubyte *unpacked_bitmap; GLubyte *a8_bitmap; - int x, y; GLfloat dst_z; /* We need a fragment program for the KIL effect */ @@ -427,23 +418,17 @@ intel_texture_bitmap(GLcontext * ctx, return GL_FALSE; } - /* Convert the A1 bitmap to an A8 format suitable for glTexImage */ - if (unpack->BufferObj->Name) { + if (_mesa_is_bufferobj(unpack->BufferObj)) { bitmap = map_pbo(ctx, width, height, unpack, bitmap); if (bitmap == NULL) return GL_TRUE; /* even though this is an error, we're done */ } - unpacked_bitmap = _mesa_unpack_bitmap(width, height, bitmap, - unpack); + + /* Convert the A1 bitmap to an A8 format suitable for glTexImage */ a8_bitmap = _mesa_calloc(width * height); - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - if (test_msb_bit(unpacked_bitmap, ALIGN(width, 8) * y + x)) - a8_bitmap[y * width + x] = 0xff; - } - } - _mesa_free(unpacked_bitmap); - if (unpack->BufferObj->Name) { + _mesa_expand_bitmap(width, height, unpack, bitmap, a8_bitmap, width, 0xff); + + if (_mesa_is_bufferobj(unpack->BufferObj)) { /* done with PBO so unmap it now */ ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, unpack->BufferObj); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index 5d52335dee..f058b3c8e4 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -26,18 +26,13 @@ **************************************************************************/ #include "main/glheader.h" -#include "main/enums.h" #include "main/image.h" #include "main/state.h" #include "main/mtypes.h" -#include "main/macros.h" -#include "swrast/swrast.h" +#include "drivers/common/meta.h" -#include "intel_screen.h" #include "intel_context.h" -#include "intel_batchbuffer.h" #include "intel_buffers.h" -#include "intel_blit.h" #include "intel_regions.h" #include "intel_pixel.h" @@ -97,162 +92,6 @@ intel_check_copypixel_blit_fragment_ops(GLcontext * ctx) ctx->Color.BlendEnabled); } -#ifdef I915 -/* Doesn't work for overlapping regions. Could do a double copy or - * just fallback. - */ -static GLboolean -do_texture_copypixels(GLcontext * ctx, - GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint dstx, GLint dsty, GLenum type) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_region *dst = intel_drawbuf_region(intel); - struct intel_region *src = copypix_src_region(intel, type); - GLenum src_format; - GLenum src_type; - - DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, - srcx, srcy, width, height, dstx, dsty); - - if (!src || !dst || type != GL_COLOR) - return GL_FALSE; - - if (ctx->_ImageTransferState) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: check_color failed\n", __FUNCTION__); - return GL_FALSE; - } - - /* Can't handle overlapping regions. Don't have sufficient control - * over rasterization to pull it off in-place. Punt on these for - * now. - * - * XXX: do a copy to a temporary. - */ - if (src->buffer == dst->buffer) { - drm_clip_rect_t srcbox; - drm_clip_rect_t dstbox; - drm_clip_rect_t tmp; - - srcbox.x1 = srcx; - srcbox.y1 = srcy; - srcbox.x2 = srcx + width; - srcbox.y2 = srcy + height; - - if (ctx->Pixel.ZoomX > 0) { - dstbox.x1 = dstx; - dstbox.x2 = dstx + width * ctx->Pixel.ZoomX; - } else { - dstbox.x1 = dstx + width * ctx->Pixel.ZoomX; - dstbox.x2 = dstx; - } - if (ctx->Pixel.ZoomY > 0) { - dstbox.y1 = dsty; - dstbox.y2 = dsty + height * ctx->Pixel.ZoomY; - } else { - dstbox.y1 = dsty + height * ctx->Pixel.ZoomY; - dstbox.y2 = dsty; - } - - DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2); - DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2, - width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY); - - if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) { - DBG("%s: regions overlap\n", __FUNCTION__); - return GL_FALSE; - } - } - - intelFlush(&intel->ctx); - - intel->vtbl.install_meta_state(intel); - - /* Is this true? Also will need to turn depth testing on according - * to state: - */ - intel->vtbl.meta_no_stencil_write(intel); - intel->vtbl.meta_no_depth_write(intel); - - /* Set the 3d engine to draw into the destination region: - */ - intel->vtbl.meta_draw_region(intel, dst, intel->depth_region); - - intel->vtbl.meta_import_pixel_state(intel); - - if (src->cpp == 2) { - src_format = GL_RGB; - src_type = GL_UNSIGNED_SHORT_5_6_5; - } - else { - src_format = GL_BGRA; - src_type = GL_UNSIGNED_BYTE; - } - - /* Set the frontbuffer up as a large rectangular texture. - */ - if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, 0, - src->pitch, - src->height, src_format, src_type)) { - intel->vtbl.leave_meta_state(intel); - return GL_FALSE; - } - - - intel->vtbl.meta_texture_blend_replace(intel); - - LOCK_HARDWARE(intel); - - if (intel->driDrawable->numClipRects) { - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - - srcy = dPriv->h - srcy - height; /* convert from gl to hardware coords */ - - srcx += dPriv->x; - srcy += dPriv->y; - - /* Clip against the source region. This is the only source - * clipping we do. XXX: Just set the texcord wrap mode to clamp - * or similar. - * - */ - if (0) { - GLint orig_x = srcx; - GLint orig_y = srcy; - - if (!_mesa_clip_to_region(0, 0, src->pitch, src->height, - &srcx, &srcy, &width, &height)) - goto out; - - dstx += srcx - orig_x; - dsty += (srcy - orig_y) * ctx->Pixel.ZoomY; - } - - /* Just use the regular cliprect mechanism... Does this need to - * even hold the lock??? - */ - intel->vtbl.meta_draw_quad(intel, - dstx, - dstx + width * ctx->Pixel.ZoomX, - dPriv->h - (dsty + height * ctx->Pixel.ZoomY), - dPriv->h - (dsty), 0, /* XXX: what z value? */ - 0x00ff00ff, - srcx, srcx + width, srcy, srcy + height); - - out: - intel->vtbl.leave_meta_state(intel); - intel_batchbuffer_emit_mi_flush(intel->batch); - } - UNLOCK_HARDWARE(intel); - - DBG("%s: success\n", __FUNCTION__); - return GL_TRUE; -} -#endif /* I915 */ - /** * CopyPixels with the blitter. Don't support zooming, pixel transfer, etc. @@ -400,12 +239,6 @@ intelCopyPixels(GLcontext * ctx, if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) return; -#ifdef I915 - if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) - return; -#endif - - DBG("fallback to _swrast_CopyPixels\n"); - - _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type); + /* this will use swrast if needed */ + _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type); } diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index a6b6824164..5ffa847fd4 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -29,8 +29,6 @@ #include "main/enums.h" #include "main/image.h" #include "main/mtypes.h" -#include "main/macros.h" -#include "main/bufferobj.h" #include "main/teximage.h" #include "main/texenv.h" #include "main/texobj.h" @@ -41,163 +39,22 @@ #include "main/enable.h" #include "main/buffers.h" #include "main/fbobject.h" -#include "main/renderbuffer.h" #include "main/depth.h" #include "main/hash.h" #include "main/blend.h" -#include "main/viewport.h" -#include "glapi/dispatch.h" #include "swrast/swrast.h" +#include "drivers/common/meta.h" -#include "intel_screen.h" #include "intel_context.h" #include "intel_batchbuffer.h" #include "intel_blit.h" #include "intel_buffers.h" #include "intel_regions.h" #include "intel_pixel.h" -#include "intel_buffer_objects.h" #include "intel_fbo.h" -static GLboolean -intel_texture_drawpixels(GLcontext * ctx, - GLint x, GLint y, - GLsizei width, GLsizei height, - GLenum format, - GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels) -{ - struct intel_context *intel = intel_context(ctx); - GLuint texname; - GLfloat vertices[4][4]; - GLfloat z; - GLint old_active_texture; - GLenum internalFormat; - - /* We're going to mess with texturing with no regard to existing texture - * state, so if there is some set up we have to bail. - */ - if (ctx->Texture._EnabledUnits != 0) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "glDrawPixels() fallback: texturing enabled\n"); - return GL_FALSE; - } - - /* Can't do textured DrawPixels with a fragment program, unless we were - * to generate a new program that sampled our texture and put the results - * in the fragment color before the user's program started. - */ - if (ctx->FragmentProgram.Enabled) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "glDrawPixels() fallback: fragment program enabled\n"); - return GL_FALSE; - } - - /* We don't have a way to generate fragments with stencil values which - * will set the resulting stencil value. - */ - if (format == GL_STENCIL_INDEX || format == GL_DEPTH_STENCIL) - return GL_FALSE; - - /* Check that we can load in a texture this big. */ - if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) || - height > (1 << (ctx->Const.MaxTextureLevels - 1))) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "glDrawPixels() fallback: bitmap too large (%dx%d)\n", - width, height); - return GL_FALSE; - } - - /* To do DEPTH_COMPONENT, we would need to change our setup to not draw to - * the color buffer, and sample the texture values into the fragment depth - * in a program. - */ - if (format == GL_DEPTH_COMPONENT) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, - "glDrawPixels() fallback: format == GL_DEPTH_COMPONENT\n"); - return GL_FALSE; - } - - if (!ctx->Extensions.ARB_texture_non_power_of_two && - (!is_power_of_two(width) || !is_power_of_two(height))) { - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, - "glDrawPixels() fallback: NPOT texture\n"); - return GL_FALSE; - } - - _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | - GL_CURRENT_BIT | GL_VIEWPORT_BIT); - _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); - - /* XXX: pixel store stuff */ - _mesa_Disable(GL_POLYGON_STIPPLE); - - old_active_texture = ctx->Texture.CurrentUnit; - _mesa_ActiveTextureARB(GL_TEXTURE0_ARB); - _mesa_Enable(GL_TEXTURE_2D); - _mesa_GenTextures(1, &texname); - _mesa_BindTexture(GL_TEXTURE_2D, texname); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - _mesa_TexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); - if (type == GL_ALPHA) - internalFormat = GL_ALPHA; - else - internalFormat = GL_RGBA; - _mesa_TexImage2D(GL_TEXTURE_2D, 0, internalFormat, width, height, 0, format, - type, pixels); - - meta_set_passthrough_transform(&intel->meta); - - /* convert rasterpos Z from [0,1] to NDC coord in [-1,1] */ - z = -1.0 + 2.0 * ctx->Current.RasterPos[2]; - - /* RasterPos[2] already takes into account the DepthRange mapping. */ - _mesa_DepthRange(0.0, 1.0); - - /* Create the vertex buffer based on the current raster pos. The x and y - * we're handed are ctx->Current.RasterPos[0,1] rounded to integers. - * We also apply the depth. However, the W component is already multiplied - * into ctx->Current.RasterPos[0,1,2] and we can ignore it at this point. - */ - vertices[0][0] = x; - vertices[0][1] = y; - vertices[0][2] = z; - vertices[0][3] = 1.0; - vertices[1][0] = x + width * ctx->Pixel.ZoomX; - vertices[1][1] = y; - vertices[1][2] = z; - vertices[1][3] = 1.0; - vertices[2][0] = x + width * ctx->Pixel.ZoomX; - vertices[2][1] = y + height * ctx->Pixel.ZoomY; - vertices[2][2] = z; - vertices[2][3] = 1.0; - vertices[3][0] = x; - vertices[3][1] = y + height * ctx->Pixel.ZoomY; - vertices[3][2] = z; - vertices[3][3] = 1.0; - - _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices); - _mesa_Enable(GL_VERTEX_ARRAY); - meta_set_default_texrect(&intel->meta); - - _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); - - meta_restore_texcoords(&intel->meta); - meta_restore_transform(&intel->meta); - - _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); - _mesa_PopClientAttrib(); - _mesa_PopAttrib(); - - _mesa_DeleteTextures(1, &texname); - - return GL_TRUE; -} +/** XXX compare perf of this vs. _mesa_meta_DrawPixels(STENCIL) */ static GLboolean intel_stencil_drawpixels(GLcontext * ctx, GLint x, GLint y, @@ -404,17 +261,25 @@ intelDrawPixels(GLcontext * ctx, const struct gl_pixelstore_attrib *unpack, const GLvoid * pixels) { - if (intel_texture_drawpixels(ctx, x, y, width, height, format, type, - unpack, pixels)) - return; - +#if 0 + /* XXX this function doesn't seem to work reliably even when all + * the pre-requisite conditions are met. + * Note that this function is never hit with conform. + * Fall back to swrast because even the _mesa_meta_DrawPixels() approach + * isn't working because of an apparent stencil bug. + */ if (intel_stencil_drawpixels(ctx, x, y, width, height, format, type, unpack, pixels)) return; +#else + (void) intel_stencil_drawpixels; /* silence warning */ + if (format == GL_STENCIL_INDEX) { + _swrast_DrawPixels(ctx, x, y, width, height, format, type, + unpack, pixels); + return; + } +#endif - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); - - _swrast_DrawPixels(ctx, x, y, width, height, format, type, - unpack, pixels); + _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type, + unpack, pixels); } diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c index 8713463ace..bc67f6242a 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_read.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c @@ -180,16 +180,7 @@ do_blit_readpixels(GLcontext * ctx, if (!src) return GL_FALSE; - if (pack->BufferObj->Name) { - /* XXX This validation should be done by core mesa: - */ - if (!_mesa_validate_pbo_access(2, pack, width, height, 1, - format, type, pixels)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels"); - return GL_TRUE; - } - } - else { + if (!_mesa_is_bufferobj(pack->BufferObj)) { /* PBO only for now: */ if (INTEL_DEBUG & DEBUG_PIXEL) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 7525cd9c4d..a86c66a844 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -181,6 +181,28 @@ intel_region_alloc(struct intel_context *intel, dri_bo *buffer; struct intel_region *region; + /* If we're tiled, our allocations are in 8 or 32-row blocks, so + * failure to align our height means that we won't allocate enough pages. + * + * If we're untiled, we still have to align to 2 rows high because the + * data port accesses 2x2 blocks even if the bottom row isn't to be + * rendered, so failure to align means we could walk off the end of the + * GTT and fault. + */ + if (tiling == I915_TILING_X) + height = ALIGN(height, 8); + else if (tiling == I915_TILING_Y) + height = ALIGN(height, 32); + else + height = ALIGN(height, 2); + + /* If we're untiled, we have to align to 2 rows high because the + * data port accesses 2x2 blocks even if the bottom row isn't to be + * rendered, so failure to align means we could walk off the end of the + * GTT and fault. + */ + height = ALIGN(height, 2); + if (expect_accelerated_upload) { buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region", pitch * cpp * height, 64); @@ -452,6 +474,7 @@ void intel_region_cow(struct intel_context *intel, struct intel_region *region) { struct intel_buffer_object *pbo = region->pbo; + GLboolean ok; intel_region_release_pbo(intel, region); @@ -463,13 +486,14 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) */ LOCK_HARDWARE(intel); - assert(intelEmitCopyBlit(intel, - region->cpp, - region->pitch, pbo->buffer, 0, region->tiling, - region->pitch, region->buffer, 0, region->tiling, - 0, 0, 0, 0, - region->pitch, region->height, - GL_COPY)); + ok = intelEmitCopyBlit(intel, + region->cpp, + region->pitch, pbo->buffer, 0, region->tiling, + region->pitch, region->buffer, 0, region->tiling, + 0, 0, 0, 0, + region->pitch, region->height, + GL_COPY); + assert(ok); UNLOCK_HARDWARE(intel); } diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 6bbc995c1e..1b8c56e68d 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -69,7 +69,11 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_DESC_END DRI_CONF_OPT_END - DRI_CONF_TEXTURE_TILING(false) +#ifdef I915 + DRI_CONF_TEXTURE_TILING(false) +#else + DRI_CONF_TEXTURE_TILING(true) +#endif DRI_CONF_OPT_BEGIN(early_z, bool, false) DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") @@ -628,10 +632,10 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen) /* Otherwise, use the classic buffer manager. */ if (intelScreen->bufmgr == NULL) { if (gem_disable) { - fprintf(stderr, "GEM disabled. Using classic.\n"); + _mesa_warning(NULL, "GEM disabled. Using classic."); } else { - fprintf(stderr, "Failed to initialize GEM. " - "Falling back to classic.\n"); + _mesa_warning(NULL, + "Failed to initialize GEM. Falling back to classic."); } if (intelScreen->tex.size == 0) { diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 34b78ebc1a..28eabbc005 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -444,23 +444,30 @@ intel_renderbuffer_unmap(struct intel_context *intel, * _ColorReadBuffer, _DepthBuffer or _StencilBuffer fields. */ static void -intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) +intel_map_unmap_framebuffer(struct intel_context *intel, + struct gl_framebuffer *fb, + GLboolean map) { - GLcontext *ctx = &intel->ctx; - GLuint i, j; + GLuint i; /* color draw buffers */ - for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) { + for (i = 0; i < fb->_NumColorDrawBuffers; i++) { if (map) - intel_renderbuffer_map(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); + intel_renderbuffer_map(intel, fb->_ColorDrawBuffers[i]); else - intel_renderbuffer_unmap(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); + intel_renderbuffer_unmap(intel, fb->_ColorDrawBuffers[i]); } + /* color read buffer */ + if (map) + intel_renderbuffer_map(intel, fb->_ColorReadBuffer); + else + intel_renderbuffer_unmap(intel, fb->_ColorReadBuffer); + /* check for render to textures */ for (i = 0; i < BUFFER_COUNT; i++) { struct gl_renderbuffer_attachment *att = - ctx->DrawBuffer->Attachment + i; + fb->Attachment + i; struct gl_texture_object *tex = att->Texture; if (tex) { /* render to texture */ @@ -472,36 +479,25 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) } } - /* color read buffers */ - if (map) - intel_renderbuffer_map(intel, ctx->ReadBuffer->_ColorReadBuffer); - else - intel_renderbuffer_unmap(intel, ctx->ReadBuffer->_ColorReadBuffer); - /* depth buffer (Note wrapper!) */ - if (ctx->DrawBuffer->_DepthBuffer) { + if (fb->_DepthBuffer) { if (map) - intel_renderbuffer_map(intel, ctx->DrawBuffer->_DepthBuffer->Wrapped); + intel_renderbuffer_map(intel, fb->_DepthBuffer->Wrapped); else - intel_renderbuffer_unmap(intel, - ctx->DrawBuffer->_DepthBuffer->Wrapped); + intel_renderbuffer_unmap(intel, fb->_DepthBuffer->Wrapped); } /* stencil buffer (Note wrapper!) */ - if (ctx->DrawBuffer->_StencilBuffer) { + if (fb->_StencilBuffer) { if (map) - intel_renderbuffer_map(intel, - ctx->DrawBuffer->_StencilBuffer->Wrapped); + intel_renderbuffer_map(intel, fb->_StencilBuffer->Wrapped); else - intel_renderbuffer_unmap(intel, - ctx->DrawBuffer->_StencilBuffer->Wrapped); + intel_renderbuffer_unmap(intel, fb->_StencilBuffer->Wrapped); } } - - /** - * Prepare for softare rendering. Map current read/draw framebuffers' + * Prepare for software rendering. Map current read/draw framebuffers' * renderbuffes and all currently bound texture objects. * * Old note: Moved locking out to get reasonable span performance. @@ -522,11 +518,13 @@ intelSpanRenderStart(GLcontext * ctx) } } - intel_map_unmap_buffers(intel, GL_TRUE); + intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_TRUE); + if (ctx->ReadBuffer != ctx->DrawBuffer) + intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_TRUE); } /** - * Called when done softare rendering. Unmap the buffers we mapped in + * Called when done software rendering. Unmap the buffers we mapped in * the above function. */ void @@ -544,7 +542,9 @@ intelSpanRenderFinish(GLcontext * ctx) } } - intel_map_unmap_buffers(intel, GL_FALSE); + intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_FALSE); + if (ctx->ReadBuffer != ctx->DrawBuffer) + intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_FALSE); UNLOCK_HARDWARE(intel); } @@ -711,6 +711,9 @@ intel_set_span_functions(struct intel_context *intel, intel_YTile_InitStencilPointers_z24_s8(rb); break; } + } else { + _mesa_problem(NULL, + "Unexpected ActualFormat in intelSetSpanFunctions"); } break; default: diff --git a/src/mesa/drivers/dri/intel/intel_syncobj.c b/src/mesa/drivers/dri/intel/intel_syncobj.c new file mode 100644 index 0000000000..1286fe929b --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_syncobj.c @@ -0,0 +1,132 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +/** @file intel_syncobj.c + * + * Support for ARB_sync + * + * ARB_sync is implemented by flushing the current batchbuffer and keeping a + * reference on it. We can then check for completion or wait for compeltion + * using the normal buffer object mechanisms. This does mean that if an + * application is using many sync objects, it will emit small batchbuffers + * which may end up being a significant overhead. In other tests of removing + * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant + * performance bottleneck, though. + */ + +#include "main/simple_list.h" +#include "main/imports.h" + +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_reg.h" + +static struct gl_sync_object * +intel_new_sync_object(GLcontext *ctx, GLuint id) +{ + struct intel_sync_object *sync; + + sync = _mesa_calloc(sizeof(struct intel_sync_object)); + + return &sync->Base; +} + +static void +intel_delete_sync_object(GLcontext *ctx, struct gl_sync_object *s) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + drm_intel_bo_unreference(sync->bo); + _mesa_free(sync); +} + +static void +intel_fence_sync(GLcontext *ctx, struct gl_sync_object *s, + GLenum condition, GLbitfield flags) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE); + intel_batchbuffer_emit_mi_flush(intel->batch); + + sync->bo = intel->batch->buf; + drm_intel_bo_reference(sync->bo); + + intelFlush(ctx); +} + +/* We ignore the user-supplied timeout. This is weaselly -- we're allowed to + * round to an implementation-dependent accuracy, and right now our + * implementation "rounds" to the wait-forever value. + * + * The fix would be a new kernel function to do the GTT transition with a + * timeout. + */ +static void intel_client_wait_sync(GLcontext *ctx, struct gl_sync_object *s, + GLbitfield flags, GLuint64 timeout) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + if (sync->bo) { + drm_intel_bo_wait_rendering(sync->bo); + s->StatusFlag = 1; + drm_intel_bo_unreference(sync->bo); + sync->bo = NULL; + } +} + +/* We have nothing to do for WaitSync. Our GL command stream is sequential, + * so given that the sync object has already flushed the batchbuffer, + * any batchbuffers coming after this waitsync will naturally not occur until + * the previous one is done. + */ +static void intel_server_wait_sync(GLcontext *ctx, struct gl_sync_object *s, + GLbitfield flags, GLuint64 timeout) +{ +} + +static void intel_check_sync(GLcontext *ctx, struct gl_sync_object *s) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + if (sync->bo && drm_intel_bo_busy(sync->bo)) { + drm_intel_bo_unreference(sync->bo); + sync->bo = NULL; + s->StatusFlag = 1; + } +} + +void intel_init_syncobj_functions(struct dd_function_table *functions) +{ + functions->NewSyncObject = intel_new_sync_object; + functions->DeleteSyncObject = intel_delete_sync_object; + functions->FenceSync = intel_fence_sync; + functions->CheckSync = intel_check_sync; + functions->ClientWaitSync = intel_client_wait_sync; + functions->ServerWaitSync = intel_server_wait_sync; +} diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 028b49c14d..ac557a9200 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -30,7 +30,8 @@ #include "main/image.h" #include "main/teximage.h" #include "main/mipmap.h" -#include "swrast/swrast.h" + +#include "drivers/common/meta.h" #include "intel_screen.h" #include "intel_context.h" @@ -90,7 +91,6 @@ do_copy_texsubimage(struct intel_context *intel, GLint x, GLint y, GLsizei width, GLsizei height) { GLcontext *ctx = &intel->ctx; - struct gl_texture_object *texObj = intelImage->base.TexObject; const struct intel_region *src = get_teximage_source(intel, internalFormat); @@ -170,11 +170,6 @@ do_copy_texsubimage(struct intel_context *intel, UNLOCK_HARDWARE(intel); - /* GL_SGIS_generate_mipmap */ - if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } - return GL_TRUE; } @@ -221,8 +216,8 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, return; fail: - _swrast_copy_teximage1d(ctx, target, level, internalFormat, x, y, - width, border); + _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y, + width, border); } @@ -269,8 +264,8 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, return; fail: - _swrast_copy_teximage2d(ctx, target, level, internalFormat, x, y, - width, height, border); + _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, + width, height, border); } @@ -294,7 +289,7 @@ intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, if (!do_copy_texsubimage(intel_context(ctx), target, intel_texture_image(texImage), internalFormat, xoffset, 0, x, y, width, 1)) { - _swrast_copy_texsubimage1d(ctx, target, level, xoffset, x, y, width); + _mesa_meta_CopyTexSubImage1D(ctx, target, level, xoffset, x, y, width); } } @@ -320,10 +315,10 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, internalFormat, xoffset, yoffset, x, y, width, height)) { - DBG("%s - fallback to swrast\n", __FUNCTION__); + DBG("%s - fallback to _mesa_meta_CopyTexSubImage2D\n", __FUNCTION__); - _swrast_copy_texsubimage2d(ctx, target, level, - xoffset, yoffset, x, y, width, height); + _mesa_meta_CopyTexSubImage2D(ctx, target, level, + xoffset, yoffset, x, y, width, height); } } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index c5f5220837..66201b1f46 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -1,15 +1,11 @@ -#include <stdlib.h> -#include <stdio.h> - #include "main/glheader.h" #include "main/macros.h" #include "main/mtypes.h" #include "main/enums.h" -#include "main/colortab.h" +#include "main/bufferobj.h" #include "main/convolve.h" #include "main/context.h" -#include "main/simple_list.h" #include "main/texcompress.h" #include "main/texformat.h" #include "main/texgetimage.h" @@ -206,7 +202,7 @@ try_pbo_upload(struct intel_context *intel, GLuint src_offset, src_stride; GLuint dst_offset, dst_stride; - if (unpack->BufferObj->Name == 0 || + if (!_mesa_is_bufferobj(unpack->BufferObj) || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { DBG("%s: failure 1\n", __FUNCTION__); @@ -264,7 +260,7 @@ try_pbo_zcopy(struct intel_context *intel, GLuint src_offset, src_stride; GLuint dst_offset, dst_stride; - if (unpack->BufferObj->Name == 0 || + if (!_mesa_is_bufferobj(unpack->BufferObj) || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { DBG("%s: failure 1\n", __FUNCTION__); @@ -427,7 +423,7 @@ intelTexImage(GLcontext * ctx, */ if (dims <= 2 && intelImage->mt && - unpack->BufferObj->Name != 0 && + _mesa_is_bufferobj(unpack->BufferObj) && check_pbo_format(internalFormat, format, type, intelImage->base.TexFormat)) { @@ -550,11 +546,6 @@ intelTexImage(GLcontext * ctx, } UNLOCK_HARDWARE(intel); - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } } diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index 1f27131dac..751ec2c98c 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -44,10 +44,12 @@ intelTexSubimage(GLcontext * ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint width, GLint height, GLint depth, + GLsizei imageSize, GLenum format, GLenum type, const void *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, - struct gl_texture_image *texImage) + struct gl_texture_image *texImage, + GLboolean compressed) { struct intel_context *intel = intel_context(ctx); struct intel_texture_image *intelImage = intel_texture_image(texImage); @@ -59,9 +61,14 @@ intelTexSubimage(GLcontext * ctx, intelFlush(ctx); - pixels = - _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, format, - type, pixels, packing, "glTexSubImage2D"); + if (compressed) + pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize, + pixels, packing, + "glCompressedTexImage"); + else + pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, + format, type, pixels, packing, + "glTexSubImage"); if (!pixels) return; @@ -90,15 +97,28 @@ intelTexSubimage(GLcontext * ctx, assert(dstRowStride); - if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, - texImage->TexFormat, - texImage->Data, - xoffset, yoffset, zoffset, - dstRowStride, - texImage->ImageOffsets, - width, height, depth, - format, type, pixels, packing)) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); + if (compressed) { + if (intelImage->mt) { + struct intel_region *dst = intelImage->mt->region; + + _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch, + xoffset, yoffset / 4, + (width + 3) & ~3, (height + 3) / 4, + pixels, (width + 3) & ~3, 0, 0); + } else + memcpy(texImage->Data, pixels, imageSize); + } + else { + if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, + texImage->TexFormat, + texImage->Data, + xoffset, yoffset, zoffset, + dstRowStride, + texImage->ImageOffsets, + width, height, depth, + format, type, pixels, packing)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); + } } _mesa_unmap_teximage_pbo(ctx, packing); @@ -109,11 +129,6 @@ intelTexSubimage(GLcontext * ctx, } UNLOCK_HARDWARE(intel); - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } } @@ -132,8 +147,8 @@ intelTexSubImage3D(GLcontext * ctx, intelTexSubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing, texObj, texImage); + width, height, depth, 0, + format, type, pixels, packing, texObj, texImage, GL_FALSE); } @@ -152,8 +167,8 @@ intelTexSubImage2D(GLcontext * ctx, intelTexSubimage(ctx, 2, target, level, xoffset, yoffset, 0, - width, height, 1, - format, type, pixels, packing, texObj, texImage); + width, height, 1, 0, + format, type, pixels, packing, texObj, texImage, GL_FALSE); } @@ -172,8 +187,8 @@ intelTexSubImage1D(GLcontext * ctx, intelTexSubimage(ctx, 1, target, level, xoffset, 0, 0, - width, 1, 1, - format, type, pixels, packing, texObj, texImage); + width, 1, 1, 0, + format, type, pixels, packing, texObj, texImage, GL_FALSE); } static void @@ -187,8 +202,11 @@ intelCompressedTexSubImage2D(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - fprintf(stderr, "stubbed CompressedTexSubImage2D: %dx%d@%dx%d\n", - width, height, xoffset, yoffset); + intelTexSubimage(ctx, 2, + target, level, + xoffset, yoffset, 0, + width, height, 1, imageSize, + format, 0, pixels, &ctx->Unpack, texObj, texImage, GL_TRUE); } diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile index e81a1b38ac..fbce70c37b 100644 --- a/src/mesa/drivers/dri/r200/Makefile +++ b/src/mesa/drivers/dri/r200/Makefile @@ -18,16 +18,18 @@ CS_SOURCES = radeon_cs_space_drm.c endif RADEON_COMMON_SOURCES = \ - radeon_texture.c \ + radeon_bo_legacy.c \ radeon_common_context.c \ radeon_common.c \ + radeon_cs_legacy.c \ radeon_dma.c \ + radeon_debug.c \ + radeon_fbo.c \ radeon_lock.c \ - radeon_bo_legacy.c \ - radeon_cs_legacy.c \ radeon_mipmap_tree.c \ + radeon_queryobj.c \ radeon_span.c \ - radeon_fbo.c + radeon_texture.c DRIVER_SOURCES = r200_context.c \ @@ -53,7 +55,7 @@ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) X86_SOURCES = -DRIVER_DEFINES = -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R200 +DRIVER_DEFINES = -DRADEON_R200 -Wall DRI_LIB_DEPS += $(RADEON_LDFLAGS) diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index d49f4fabe7..1d1bea6f5f 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -49,6 +49,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /* The state atoms will be emitted in the order they appear in the atom list, * so this step is important. */ +#define insert_at_tail_if(atom_list, atom) \ + do { \ + struct radeon_state_atom* __atom = (atom); \ + if (__atom->check) \ + insert_at_tail((atom_list), __atom); \ + } while(0) + void r200SetUpAtomList( r200ContextPtr rmesa ) { int i, mtu; @@ -58,80 +65,52 @@ void r200SetUpAtomList( r200ContextPtr rmesa ) make_empty_list(&rmesa->radeon.hw.atomlist); rmesa->radeon.hw.atomlist.name = "atom-list"; - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.set ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.set ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf ); for (i = 0; i < mtu; ++i) - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] ); for (i = 0; i < mtu; ++i) - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] ); for (i = 0; i < 6; ++i) - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] ); for (i = 0; i < 8; ++i) - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] ); for (i = 0; i < 3 + mtu; ++i) - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt ); for (i = 0; i < 2; ++i) - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] ); for (i = 0; i < 6; ++i) - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] ); - insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] ); -} - -void r200EmitScissor(r200ContextPtr rmesa) -{ - BATCH_LOCALS(&rmesa->radeon); - if (!rmesa->radeon.radeonScreen->kernel_mm) { - return; - } - if (rmesa->radeon.state.scissor.enabled) { - BEGIN_BATCH(8); - OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0)); - OUT_BATCH(R200_SCISSOR_ENABLE | rmesa->hw.set.cmd[SET_RE_CNTL]); - OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0)); - OUT_BATCH(R200_SCISSOR_ENABLE_0); - OUT_BATCH(CP_PACKET0(R200_RE_SCISSOR_TL_0, 0)); - OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) | - rmesa->radeon.state.scissor.rect.x1); - OUT_BATCH(CP_PACKET0(R200_RE_SCISSOR_BR_0, 0)); - OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2 - 1) << 16) | - (rmesa->radeon.state.scissor.rect.x2 - 1)); - END_BATCH(); - } else { - BEGIN_BATCH(4); - OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0)); - OUT_BATCH(rmesa->hw.set.cmd[SET_RE_CNTL] & ~R200_SCISSOR_ENABLE); - OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0)); - OUT_BATCH(0); - END_BATCH(); - } + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] ); + insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.sci ); } /* Fire a section of the retained (indexed_verts) buffer as a regular @@ -147,10 +126,9 @@ void r200EmitVbufPrim( r200ContextPtr rmesa, radeonEmitState(&rmesa->radeon); - if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS)) - fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__, - rmesa->store.cmd_used/4, primitive, vertex_nr); - r200EmitScissor(rmesa); + radeon_print(RADEON_RENDER|RADEON_SWRENDER,RADEON_VERBOSE, + "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__, + rmesa->store.cmd_used/4, primitive, vertex_nr); BEGIN_BATCH(3); OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0); @@ -164,7 +142,6 @@ static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type) BATCH_LOCALS(&rmesa->radeon); if (vertex_count > 0) { - r200EmitScissor(rmesa); BEGIN_BATCH(8+2); OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_INDX_2, 0); OUT_BATCH(R200_VF_PRIM_WALK_IND | @@ -198,8 +175,7 @@ void r200FlushElts(GLcontext *ctx) r200ContextPtr rmesa = R200_CONTEXT(ctx); int nr, elt_used = rmesa->tcl.elt_used; - if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS)) - fprintf(stderr, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used); assert( rmesa->radeon.dma.flush == r200FlushElts ); rmesa->radeon.dma.flush = NULL; @@ -213,8 +189,11 @@ void r200FlushElts(GLcontext *ctx) radeon_bo_unref(rmesa->radeon.tcl.elt_dma_bo); rmesa->radeon.tcl.elt_dma_bo = NULL; - if (R200_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "%s: Syncing\n", __FUNCTION__); + if (R200_ELT_BUF_SZ > elt_used) + radeonReturnDmaRegion(&rmesa->radeon, R200_ELT_BUF_SZ - elt_used); + + if (radeon_is_debug_enabled(RADEON_SYNC, RADEON_CRITICAL)) { + radeon_print(RADEON_SYNC, RADEON_NORMAL, "%s: Syncing\n", __FUNCTION__); radeonFinish( rmesa->radeon.glCtx ); } } @@ -225,41 +204,20 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, GLuint min_nr ) { GLushort *retval; - int ret; - if (R200_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive); assert((primitive & R200_VF_PRIM_WALK_IND)); radeonEmitState(&rmesa->radeon); -#ifdef RADEON_DEBUG_BO - rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, - 0, R200_ELT_BUF_SZ, 4, - RADEON_GEM_DOMAIN_GTT, 0, "ELT"); -#else - rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, - 0, R200_ELT_BUF_SZ, 4, - RADEON_GEM_DOMAIN_GTT, 0); -#endif - rmesa->radeon.tcl.elt_dma_offset = 0; + radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo, + &rmesa->radeon.tcl.elt_dma_offset, R200_ELT_BUF_SZ, 4); rmesa->tcl.elt_used = min_nr * 2; - ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.tcl.elt_dma_bo, - RADEON_GEM_DOMAIN_GTT, 0); - if (ret) { - fprintf(stderr,"failure to revalidate BOs - badness\n"); - } - radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); retval = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; - - if (R200_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: header prim %x \n", - __FUNCTION__, primitive); - assert(!rmesa->radeon.dma.flush); rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; rmesa->radeon.dma.flush = r200FlushElts; @@ -267,7 +225,17 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, return retval; } +void r200EmitMaxVtxIndex(r200ContextPtr rmesa, int count) +{ + BATCH_LOCALS(&rmesa->radeon); + if (rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH(CP_PACKET0(R200_SE_VF_MAX_VTX_INDX, 0)); + OUT_BATCH(count); + END_BATCH(); + } +} void r200EmitVertexAOS( r200ContextPtr rmesa, GLuint vertex_size, @@ -276,8 +244,7 @@ void r200EmitVertexAOS( r200ContextPtr rmesa, { BATCH_LOCALS(&rmesa->radeon); - if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL)) - fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n", + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s: vertex_size 0x%x offset 0x%x \n", __FUNCTION__, vertex_size, offset); @@ -296,9 +263,9 @@ void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset) int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; int i; - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, - offset); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, + "%s: nr=%d, ofs=0x%08x\n", + __FUNCTION__, nr, offset); BEGIN_BATCH(sz+2+ (nr*2)); OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1); diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 9a92a32079..3ddb5bf7d6 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -60,9 +60,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_tcl.h" #include "r200_maos.h" #include "r200_vertprog.h" +#include "radeon_queryobj.h" #include "radeon_span.h" +#define need_GL_ARB_occlusion_query #define need_GL_ARB_vertex_program #define need_GL_ATI_fragment_shader #define need_GL_EXT_blend_minmax @@ -116,6 +118,7 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name ) const struct dri_extension card_extensions[] = { { "GL_ARB_multitexture", NULL }, + { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, { "GL_ARB_texture_border_clamp", NULL }, { "GL_ARB_texture_env_add", NULL }, { "GL_ARB_texture_env_combine", NULL }, @@ -218,26 +221,6 @@ static void r200InitDriverFuncs( struct dd_function_table *functions ) functions->GetString = r200GetString; } -static const struct dri_debug_control debug_control[] = -{ - { "fall", DEBUG_FALLBACKS }, - { "tex", DEBUG_TEXTURE }, - { "ioctl", DEBUG_IOCTL }, - { "prim", DEBUG_PRIMS }, - { "vert", DEBUG_VERTS }, - { "state", DEBUG_STATE }, - { "code", DEBUG_CODEGEN }, - { "vfmt", DEBUG_VFMT }, - { "vtxf", DEBUG_VFMT }, - { "verb", DEBUG_VERBOSE }, - { "dri", DEBUG_DRI }, - { "dma", DEBUG_DMA }, - { "san", DEBUG_SANITY }, - { "sync", DEBUG_SYNC }, - { "pix", DEBUG_PIXEL }, - { "mem", DEBUG_MEMORY }, - { NULL, 0 } -}; static void r200_get_lock(radeonContextPtr radeon) { @@ -262,6 +245,19 @@ static void r200_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmes { } +static void r200_emit_query_finish(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZPASS_ADDR, 0)); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + query->curr_offset += sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} static void r200_init_vtbl(radeonContextPtr radeon) { @@ -270,6 +266,8 @@ static void r200_init_vtbl(radeonContextPtr radeon) radeon->vtbl.emit_cs_header = r200_vtbl_emit_cs_header; radeon->vtbl.swtcl_flush = r200_swtcl_flush; radeon->vtbl.fallback = r200Fallback; + radeon->vtbl.update_scissor = r200_vtbl_update_scissor; + radeon->vtbl.emit_query_finish = r200_emit_query_finish; } @@ -309,7 +307,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache, "def_max_anisotropy"); - if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) { + if ( sPriv->drm_version.major == 1 + && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) { if ( sPriv->drm_version.minor < 13 ) fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " "disabling.\n", sPriv->drm_version.minor ); @@ -326,9 +325,10 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, _mesa_init_driver_functions(&functions); r200InitDriverFuncs(&functions); r200InitIoctlFuncs(&functions); - r200InitStateFuncs(&functions); + r200InitStateFuncs(&functions, screen->kernel_mm); r200InitTextureFuncs(&functions); r200InitShaderFuncs(&functions); + radeonInitQueryObjFunctions(&functions); if (!radeonInitContext(&rmesa->radeon, &functions, glVisual, driContextPriv, @@ -457,6 +457,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, driInitSingleExtension( ctx, ATI_fs_extension ); if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) driInitExtensions( ctx, point_extensions, GL_FALSE ); + + if (!rmesa->radeon.radeonScreen->kernel_mm) + _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); #if 0 r200InitDriverFuncs( ctx ); r200InitIoctlFuncs( ctx ); @@ -475,13 +478,6 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, rmesa->prefer_gart_client_texturing = (getenv("R200_GART_CLIENT_TEXTURES") != 0); -#if DO_DEBUG - R200_DEBUG = driParseDebugString( getenv( "R200_DEBUG" ), - debug_control ); - R200_DEBUG |= driParseDebugString( getenv( "RADEON_DEBUG" ), - debug_control ); -#endif - tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode"); if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) { fprintf(stderr, "disabling 3D acceleration\n"); @@ -500,3 +496,15 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, } +void r200DestroyContext( __DRIcontextPrivate *driContextPriv ) +{ + int i; + r200ContextPtr rmesa = (r200ContextPtr)driContextPriv->driverPrivate; + if (rmesa) + { + for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS ; i++ ) { + _math_matrix_dtr( &rmesa->TexGenMatrix[i] ); + } + } + radeonDestroyContext(driContextPriv); +} diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index 6267293817..246f98c6dc 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -467,6 +467,23 @@ struct r200_texture_state { #define PRF_STATE_SIZE 3 +#define SCI_CMD_0 0 +#define SCI_RE_AUX 1 +#define SCI_CMD_1 2 +#define SCI_XY_1 3 +#define SCI_CMD_2 4 +#define SCI_XY_2 5 +#define SCI_STATE_SIZE 6 + +#define R200_QUERYOBJ_CMD_0 0 +#define R200_QUERYOBJ_DATA_0 1 +#define R200_QUERYOBJ_CMDSIZE 2 + +#define STP_CMD_0 0 +#define STP_DATA_0 1 +#define STP_CMD_1 2 +#define STP_STATE_SIZE 35 + struct r200_hw_state { /* Hardware state, stored as cmdbuf commands: * -- Need to doublebuffer for @@ -475,6 +492,7 @@ struct r200_hw_state { */ struct radeon_state_atom ctx; struct radeon_state_atom set; + struct radeon_state_atom sci; struct radeon_state_atom vte; struct radeon_state_atom lin; struct radeon_state_atom msk; @@ -508,12 +526,12 @@ struct r200_hw_state { struct radeon_state_atom atf; struct radeon_state_atom spr; struct radeon_state_atom ptp; + struct radeon_state_atom stp; }; struct r200_state { /* Derived state for internal purposes: */ - struct radeon_stipple_state stipple; struct r200_texture_state texture; GLuint envneeded; }; @@ -526,8 +544,6 @@ struct r200_state { struct r200_tcl_info { GLuint hw_primitive; - GLuint *Elts; - int elt_used; }; diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index 4dbda39eb9..b238adb972 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -190,7 +190,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) GLuint color_mask = 0; GLuint orig_mask = mask; - if ( R200_DEBUG & DEBUG_IOCTL ) { + if ( R200_DEBUG & RADEON_IOCTL ) { if (rmesa->radeon.sarea) fprintf( stderr, "r200Clear %x %d\n", mask, rmesa->radeon.sarea->pfCurrentPage); else @@ -229,7 +229,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) } if ( mask ) { - if (R200_DEBUG & DEBUG_FALLBACKS) + if (R200_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask); _swrast_Clear( ctx, mask ); } @@ -274,7 +274,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size, drm_radeon_mem_alloc_t alloc; int ret; - if (R200_DEBUG & DEBUG_IOCTL) + if (R200_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, writefreq, priority); @@ -314,7 +314,7 @@ void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer) drm_radeon_mem_free_t memfree; int ret; - if (R200_DEBUG & DEBUG_IOCTL) + if (R200_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s %p\n", __FUNCTION__, pointer); if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) { @@ -370,7 +370,7 @@ GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer, offset >= 0 && offset + size < rmesa->radeon.radeonScreen->gartTextures.size); - if (R200_DEBUG & DEBUG_IOCTL) + if (R200_DEBUG & RADEON_IOCTL) fprintf(stderr, "r200IsGartMemory( %p ) : %d\n", pointer, valid ); return valid; diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h index 2a4b8a11f4..8d51aefa04 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.h +++ b/src/mesa/drivers/dri/r200/r200_ioctl.h @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drm.h" #include "radeon_drm.h" +extern void r200EmitMaxVtxIndex(r200ContextPtr rmesa, int count); extern void r200EmitVertexAOS( r200ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, @@ -98,6 +99,16 @@ do { \ rmesa->radeon.hw.is_dirty = GL_TRUE; \ } while (0) +#define R200_SET_STATE( rmesa, ATOM, index, newvalue ) \ + do { \ + uint32_t __index = (index); \ + uint32_t __dword = (newvalue); \ + if (__dword != (rmesa)->hw.ATOM.cmd[__index]) { \ + R200_STATECHANGE( (rmesa), ATOM ); \ + (rmesa)->hw.ATOM.cmd[__index] = __dword; \ + } \ + } while(0) + #define R200_DB_STATE( ATOM ) \ memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \ rmesa->hw.ATOM.cmd_size * 4) @@ -125,10 +136,12 @@ static INLINE int R200_DB_STATECHANGE( * are available, you will also be adding an rmesa->state.max_state_size because * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts. */ -#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int)) -#define VERT_AOS_BUFSZ (5 * sizeof(int)) +#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2)) +#define VERT_AOS_BUFSZ (5) #define ELTS_BUFSZ(nr) (12 + nr * 2) -#define VBUF_BUFSZ (3 * sizeof(int)) +#define VBUF_BUFSZ (3) +#define SCISSOR_BUFSZ (8) +#define INDEX_BUFSZ (8+2) static inline uint32_t cmdpacket3(int cmd_type) { diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c index 654f2c6ae9..95773871e0 100644 --- a/src/mesa/drivers/dri/r200/r200_pixel.c +++ b/src/mesa/drivers/dri/r200/r200_pixel.c @@ -53,14 +53,14 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format, r200ContextPtr rmesa = R200_CONTEXT(ctx); GLuint cpp = rmesa->radeon.radeonScreen->cpp; - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); if ( (pitch & 63) || ctx->_ImageTransferState || packing->SwapBytes || packing->LsbFirst) { - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s: failed 1\n", __FUNCTION__); return GL_FALSE; } @@ -68,12 +68,12 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format, if ( type == GL_UNSIGNED_INT_8_8_8_8_REV && cpp == 4 && format == GL_BGRA ) { - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s: passed 2\n", __FUNCTION__); return GL_TRUE; } - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s: failed\n", __FUNCTION__); return GL_FALSE; @@ -101,7 +101,7 @@ check_color_per_fragment_ops( const GLcontext *ctx ) } - +#if 0 static GLboolean clip_pixelrect( const GLcontext *ctx, const GLframebuffer *buffer, @@ -142,6 +142,7 @@ clip_pixelrect( const GLcontext *ctx, return GL_TRUE; } +#endif static GLboolean r200TryReadPixels( GLcontext *ctx, @@ -150,22 +151,22 @@ r200TryReadPixels( GLcontext *ctx, const struct gl_pixelstore_attrib *pack, GLvoid *pixels ) { + return GL_FALSE; +#if 0 r200ContextPtr rmesa = R200_CONTEXT(ctx); GLint pitch = pack->RowLength ? pack->RowLength : width; GLint blit_format; GLuint cpp = rmesa->radeon.radeonScreen->cpp; GLint size = width * height * cpp; - return GL_FALSE; -#if 0 - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); /* Only accelerate reading to GART buffers. */ if ( !r200IsGartMemory(rmesa, pixels, pitch * height * rmesa->radeon.radeonScreen->cpp ) ) { - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s: dest not GART\n", __FUNCTION__); } @@ -173,7 +174,7 @@ r200TryReadPixels( GLcontext *ctx, * blitter: */ if (!pack->Invert) { - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s: MESA_PACK_INVERT not set\n", __FUNCTION__); return GL_FALSE; } @@ -206,7 +207,7 @@ r200TryReadPixels( GLcontext *ctx, if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height, &size)) { UNLOCK_HARDWARE( &rmesa->radeon ); - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s totally clipped -- nothing to do\n", __FUNCTION__); return GL_TRUE; @@ -231,7 +232,7 @@ r200TryReadPixels( GLcontext *ctx, y += dPriv->y; - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "readpixel blit src_pitch %d dst_pitch %d\n", src_pitch, dst_pitch); @@ -274,7 +275,7 @@ r200ReadPixels( GLcontext *ctx, const struct gl_pixelstore_attrib *pack, GLvoid *pixels ) { - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); if (!r200TryReadPixels( ctx, x, y, width, height, format, type, pack, @@ -292,6 +293,10 @@ static void do_draw_pix( GLcontext *ctx, const void *pixels, GLuint planemask) { + if (R200_DEBUG & RADEON_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + +#if 0 r200ContextPtr rmesa = R200_CONTEXT(ctx); __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); drm_clip_rect_t *box = dPriv->pClipRects; @@ -304,9 +309,6 @@ static void do_draw_pix( GLcontext *ctx, int src_offset = r200GartOffsetFromVirtual( rmesa, pixels ); int src_pitch = pitch * rmesa->radeon.radeonScreen->cpp; - if (R200_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); -#if 0 switch ( rmesa->radeon.radeonScreen->cpp ) { case 2: blit_format = R200_CP_COLOR_FORMAT_RGB565; @@ -386,7 +388,7 @@ r200TryDrawPixels( GLcontext *ctx, GLuint cpp = rmesa->radeon.radeonScreen->cpp; GLint size = height * pitch * cpp; - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); /* check that we're drawing to exactly one color buffer */ @@ -412,7 +414,7 @@ r200TryDrawPixels( GLcontext *ctx, /* Can't do conversions on GART reads/draws. */ if ( !r200IsGartMemory( rmesa, pixels, size ) ) { - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s: not GART memory\n", __FUNCTION__); return GL_FALSE; } @@ -455,7 +457,7 @@ r200DrawPixels( GLcontext *ctx, const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels ) { - if (R200_DEBUG & DEBUG_PIXEL) + if (R200_DEBUG & RADEON_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); if (!r200TryDrawPixels( ctx, x, y, width, height, format, type, diff --git a/src/mesa/drivers/dri/r200/r200_sanity.c b/src/mesa/drivers/dri/r200/r200_sanity.c index 36530c224e..1241a926ba 100644 --- a/src/mesa/drivers/dri/r200/r200_sanity.c +++ b/src/mesa/drivers/dri/r200/r200_sanity.c @@ -48,11 +48,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define MORE_VERBOSE 1 #if MORE_VERBOSE -#define VERBOSE (R200_DEBUG & DEBUG_VERBOSE) +#define VERBOSE (R200_DEBUG & RADEON_VERBOSE) #define NORMAL (1) #else #define VERBOSE 0 -#define NORMAL (R200_DEBUG & DEBUG_VERBOSE) +#define NORMAL (R200_DEBUG & RADEON_VERBOSE) #endif diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 5a6fd20d8c..76852e315c 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -764,31 +764,6 @@ static void r200PolygonOffset( GLcontext *ctx, rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32; } -static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - GLuint i; - drm_radeon_stipple_t stipple; - - /* Must flip pattern upside down. - */ - for ( i = 0 ; i < 32 ; i++ ) { - rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i]; - } - - /* TODO: push this into cmd mechanism - */ - radeon_firevertices(&rmesa->radeon); - LOCK_HARDWARE( &rmesa->radeon ); - - /* FIXME: Use window x,y offsets into stipple RAM. - */ - stipple.mask = rmesa->state.stipple.mask; - drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, - &stipple, sizeof(stipple) ); - UNLOCK_HARDWARE( &rmesa->radeon ); -} - static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); @@ -1053,7 +1028,7 @@ void r200UpdateMaterial( GLcontext *ctx ) if (ctx->Light.ColorMaterialEnabled) mask &= ~ctx->Light.ColorMaterialBitmask; - if (R200_DEBUG & DEBUG_STATE) + if (R200_DEBUG & RADEON_STATE) fprintf(stderr, "%s\n", __FUNCTION__); if (mask & MAT_BIT_FRONT_EMISSION) { @@ -1650,6 +1625,30 @@ void r200UpdateWindow( GLcontext *ctx ) rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = tz.ui32; } +void r200_vtbl_update_scissor( GLcontext *ctx ) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + unsigned x1, y1, x2, y2; + struct radeon_renderbuffer *rrb; + + R200_SET_STATE(r200, set, SET_RE_CNTL, R200_SCISSOR_ENABLE | r200->hw.set.cmd[SET_RE_CNTL]); + + if (r200->radeon.state.scissor.enabled) { + x1 = r200->radeon.state.scissor.rect.x1; + y1 = r200->radeon.state.scissor.rect.y1; + x2 = r200->radeon.state.scissor.rect.x2; + y2 = r200->radeon.state.scissor.rect.y2; + } else { + rrb = radeon_get_colorbuffer(&r200->radeon); + x1 = 0; + y1 = 0; + x2 = rrb->base.Width - 1; + y2 = rrb->base.Height - 1; + } + + R200_SET_STATE(r200, sci, SCI_XY_1, x1 | (y1 << 16)); + R200_SET_STATE(r200, sci, SCI_XY_2, x2 | (y2 << 16)); +} static void r200Viewport( GLcontext *ctx, GLint x, GLint y, @@ -1791,7 +1790,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) r200ContextPtr rmesa = R200_CONTEXT(ctx); GLuint p, flag; - if ( R200_DEBUG & DEBUG_STATE ) + if ( R200_DEBUG & RADEON_STATE ) fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr( cap ), state ? "GL_TRUE" : "GL_FALSE" ); @@ -2175,7 +2174,7 @@ void r200LightingSpaceChange( GLcontext *ctx ) r200ContextPtr rmesa = R200_CONTEXT(ctx); GLboolean tmp; - if (R200_DEBUG & DEBUG_STATE) + if (R200_DEBUG & RADEON_STATE) fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]); @@ -2191,7 +2190,7 @@ void r200LightingSpaceChange( GLcontext *ctx ) rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS; } - if (R200_DEBUG & DEBUG_STATE) + if (R200_DEBUG & RADEON_STATE) fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]); } @@ -2234,7 +2233,7 @@ static void update_texturematrix( GLcontext *ctx ) GLuint compsel = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]; int unit; - if (R200_DEBUG & DEBUG_STATE) + if (R200_DEBUG & RADEON_STATE) fprintf(stderr, "%s before COMPSEL: %x\n", __FUNCTION__, rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]); @@ -2289,8 +2288,11 @@ static GLboolean r200ValidateBuffers(GLcontext *ctx) { r200ContextPtr rmesa = R200_CONTEXT(ctx); struct radeon_renderbuffer *rrb; + struct radeon_dma_bo *dma_bo; int i, ret; + if (RADEON_DEBUG & RADEON_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs); rrb = radeon_get_colorbuffer(&rmesa->radeon); @@ -2323,9 +2325,12 @@ static GLboolean r200ValidateBuffers(GLcontext *ctx) RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } - ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); - if (ret) - return GL_FALSE; + dma_bo = first_elem(&rmesa->radeon.dma.reserved); + { + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, dma_bo->bo, RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + } return GL_TRUE; } @@ -2461,9 +2466,24 @@ static void r200WrapRunPipeline( GLcontext *ctx ) } +static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask ) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + GLint i; + + radeon_firevertices(&r200->radeon); + + R200_STATECHANGE(r200, stp); + + /* Must flip pattern upside down. + */ + for ( i = 31 ; i >= 0; i--) { + r200->hw.stp.cmd[3 + i] = ((GLuint *) mask)[i]; + } +} /* Initialize the driver's state functions. */ -void r200InitStateFuncs( struct dd_function_table *functions ) +void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 ) { functions->UpdateState = r200InvalidateState; functions->LightingSpaceChange = r200LightingSpaceChange; @@ -2497,7 +2517,10 @@ void r200InitStateFuncs( struct dd_function_table *functions ) functions->LogicOpcode = r200LogicOpCode; functions->PolygonMode = r200PolygonMode; functions->PolygonOffset = r200PolygonOffset; - functions->PolygonStipple = r200PolygonStipple; + if (dri2) + functions->PolygonStipple = r200PolygonStipple; + else + functions->PolygonStipple = radeonPolygonStipplePreKMS; functions->PointParameterfv = r200PointParameter; functions->PointSize = r200PointSize; functions->RenderMode = r200RenderMode; diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h index 23cf8aea66..9c62f0a644 100644 --- a/src/mesa/drivers/dri/r200/r200_state.h +++ b/src/mesa/drivers/dri/r200/r200_state.h @@ -38,7 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_context.h" extern void r200InitState( r200ContextPtr rmesa ); -extern void r200InitStateFuncs( struct dd_function_table *functions ); +extern void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 ); extern void r200InitTnlFuncs( GLcontext *ctx ); extern void r200UpdateMaterial( GLcontext *ctx ); @@ -49,6 +49,8 @@ extern void r200UpdateDrawBuffer(GLcontext *ctx); extern GLboolean r200ValidateState( GLcontext *ctx ); +extern void r200_vtbl_update_scissor( GLcontext *ctx ); + extern void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ); #define FALLBACK( rmesa, bit, mode ) do { \ if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index bc871d9904..7697306d88 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -51,6 +51,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_tcl.h" #include "r200_tex.h" #include "r200_swtcl.h" +#include "radeon_queryobj.h" #include "xmlpool.h" @@ -226,57 +227,73 @@ static int cmdscl2( int offset, int stride, int count ) return h.i; } -#define CHECK( NM, FLAG ) \ +/** + * Check functions are used to check if state is active. + * If it is active check function returns maximum emit size. + */ +#define CHECK( NM, FLAG, ADD ) \ static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \ { \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ (void) rmesa; \ - return (FLAG) ? atom->cmd_size : 0; \ + return (FLAG) ? atom->cmd_size + (ADD) : 0; \ } -#define TCL_CHECK( NM, FLAG ) \ +#define TCL_CHECK( NM, FLAG, ADD ) \ static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \ { \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ - return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \ + return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \ } -#define TCL_OR_VP_CHECK( NM, FLAG ) \ +#define TCL_OR_VP_CHECK( NM, FLAG, ADD ) \ static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ { \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ - return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0; \ + return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0; \ } -#define VP_CHECK( NM, FLAG ) \ +#define VP_CHECK( NM, FLAG, ADD ) \ static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ { \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ (void) atom; \ - return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \ + return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \ } -CHECK( always, GL_TRUE ) -CHECK( never, GL_FALSE ) -CHECK( tex_any, ctx->Texture._EnabledUnits ) -CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) ); -CHECK( tex_pair, (rmesa->state.texture.unit[atom->idx].unitneeded | rmesa->state.texture.unit[atom->idx & ~1].unitneeded) ) -CHECK( tex, rmesa->state.texture.unit[atom->idx].unitneeded ) -CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled ) - CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled) ) -CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) ) -CHECK( afs, ctx->ATIFragmentShader._Enabled ) -CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT ) -TCL_CHECK( tcl_fog, ctx->Fog.Enabled ) -TCL_CHECK( tcl, GL_TRUE ) -TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded ) -TCL_CHECK( tcl_lighting, ctx->Light.Enabled ) -TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled ) -TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))) ) -TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE ) -VP_CHECK( tcl_vp, GL_TRUE ) -VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 ) -VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 ) +CHECK( always, GL_TRUE, 0 ) +CHECK( always_add4, GL_TRUE, 4 ) +CHECK( never, GL_FALSE, 0 ) +CHECK( tex_any, ctx->Texture._EnabledUnits, 0 ) +CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled), 0 ); +CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled, 0 ) + CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 ) +CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)), 0 ) +CHECK( afs, ctx->ATIFragmentShader._Enabled, 0 ) +CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 3 + 3*5 - CUBE_STATE_SIZE ) +CHECK( tex_cube_cs, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 2 + 4*5 - CUBE_STATE_SIZE ) +TCL_CHECK( tcl_fog, ctx->Fog.Enabled, 0 ) +TCL_CHECK( tcl_fog_add4, ctx->Fog.Enabled, 4 ) +TCL_CHECK( tcl, GL_TRUE, 0 ) +TCL_CHECK( tcl_add8, GL_TRUE, 8 ) +TCL_CHECK( tcl_add4, GL_TRUE, 4 ) +TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded, 0 ) +TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 ) +TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 0 ) +TCL_CHECK( tcl_tex_add4, rmesa->state.texture.unit[atom->idx].unitneeded, 4 ) +TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 ) +TCL_CHECK( tcl_lighting_add6, ctx->Light.Enabled, 6 ) +TCL_CHECK( tcl_light_add8, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 8 ) +TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 0 ) +TCL_OR_VP_CHECK( tcl_ucp_add4, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 4 ) +TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE, 0 ) +TCL_OR_VP_CHECK( tcl_or_vp_add2, GL_TRUE, 2 ) +VP_CHECK( tcl_vp, GL_TRUE, 0 ) +VP_CHECK( tcl_vp_add4, GL_TRUE, 4 ) +VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 0 ) +VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 0 ) +VP_CHECK( tcl_vp_size_add4, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 4 ) +VP_CHECK( tcl_vpp_size_add4, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 4 ) #define OUT_VEC(hdr, data) do { \ drm_radeon_cmd_header_t h; \ @@ -324,14 +341,22 @@ VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 9 OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \ OUT_BATCH_TABLE((data), h.scalars.count); \ } while(0) +static int check_rrb(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + rrb = radeon_get_colorbuffer(&r200->radeon); + if (!rrb || !rrb->bo) + return 0; + return atom->cmd_size; +} static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 6; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1)); OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18)); @@ -342,9 +367,8 @@ static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 8; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1); OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1); @@ -355,9 +379,8 @@ static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 8; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1); OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1); @@ -368,9 +391,8 @@ static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 4; OUT_VECLINEAR(atom->cmd[0], atom->cmd+1); } @@ -378,9 +400,8 @@ static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 2; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_SCL(atom->cmd[0], atom->cmd+1); END_BATCH(); @@ -391,9 +412,8 @@ static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 4; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_VEC(atom->cmd[0], atom->cmd+1); END_BATCH(); @@ -406,10 +426,10 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom) struct radeon_renderbuffer *rrb; uint32_t cbpitch; uint32_t zbpitch, depth_fmt; - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); /* output the first 7 bytes of context */ - BEGIN_BATCH_NO_AUTOSTATE(dwords+2+2); + BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_BATCH_TABLE(atom->cmd, 5); rrb = radeon_get_depthbuffer(&r200->radeon); @@ -466,6 +486,31 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom) END_BATCH(); } +static int check_always_ctx( GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + struct radeon_renderbuffer *rrb, *drb; + uint32_t dwords; + + rrb = radeon_get_colorbuffer(&r200->radeon); + if (!rrb || !rrb->bo) { + return 0; + } + + drb = radeon_get_depthbuffer(&r200->radeon); + + dwords = 10; + if (drb) + dwords += 6; + if (rrb) + dwords += 8; + if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) + dwords += 4; + + + return dwords; +} + static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); @@ -473,7 +518,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) struct radeon_renderbuffer *rrb, *drb; uint32_t cbpitch = 0; uint32_t zbpitch = 0; - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); uint32_t depth_fmt; rrb = radeon_get_colorbuffer(&r200->radeon); @@ -511,14 +556,6 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; } - dwords = 10; - if (drb) - dwords += 6; - if (rrb) - dwords += 6; - if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) - dwords += 4; - /* output the first 7 bytes of context */ BEGIN_BATCH_NO_AUTOSTATE(dwords); @@ -546,7 +583,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); - OUT_BATCH(cbpitch); + OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); } if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) { @@ -556,17 +593,46 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) END_BATCH(); } +static int get_tex_size(GLcontext* ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + uint32_t dwords = atom->cmd_size + 2; + int i = atom->idx; + radeonTexObj *t = r200->state.texture.unit[i].texobj; + if (!(t && t->mt && !t->image_override)) + dwords -= 2; + + return dwords; +} + +static int check_tex_pair(GLcontext* ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + /** XOR is bit flip operation so use it for finding pair */ + if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx ^ 1].unitneeded)) + return 0; + + return get_tex_size(ctx, atom); +} + +static int check_tex(GLcontext* ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + if (!(r200->state.texture.unit[atom->idx].unitneeded)) + return 0; + + return get_tex_size(ctx, atom); +} + + static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); int i = atom->idx; radeonTexObj *t = r200->state.texture.unit[i].texobj; - radeon_mipmap_level *lvl; - if (t && t->mt && !t->image_override) - dwords += 2; BEGIN_BATCH_NO_AUTOSTATE(dwords); /* is this ok even with drm older than 1.18? */ OUT_BATCH_TABLE(atom->cmd, 10); @@ -584,18 +650,13 @@ static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) END_BATCH(); } -static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +static int get_tex_mm_size(GLcontext* ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); - BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->cmd_size + 2; + int hastexture = 1; int i = atom->idx; radeonTexObj *t = r200->state.texture.unit[i].texobj; - radeon_mipmap_level *lvl; - int hastexture = 1; - - if (!r200->state.texture.unit[i].unitneeded) - hastexture = 0; if (!t) hastexture = 0; else { @@ -603,16 +664,46 @@ static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) hastexture = 0; } - if (hastexture) - dwords += 2; - else - dwords -= 2; + if (!hastexture) + dwords -= 4; + return dwords; +} + +static int check_tex_pair_mm(GLcontext* ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + /** XOR is bit flip operation so use it for finding pair */ + if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx ^ 1].unitneeded)) + return 0; + + return get_tex_mm_size(ctx, atom); +} + +static int check_tex_mm(GLcontext* ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + if (!(r200->state.texture.unit[atom->idx].unitneeded)) + return 0; + + return get_tex_mm_size(ctx, atom); +} + + +static void tex_emit_mm(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = atom->check(ctx, atom); + int i = atom->idx; + radeonTexObj *t = r200->state.texture.unit[i].texobj; + if (!r200->state.texture.unit[i].unitneeded) + dwords -= 4; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (32 * i), 7)); OUT_BATCH_TABLE((atom->cmd + 1), 8); - if (hastexture) { + if (dwords > atom->cmd_size) { OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0)); if (t->mt && !t->image_override) { OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, @@ -631,12 +722,15 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = 3; + uint32_t dwords = atom->check(ctx, atom); int i = atom->idx, j; radeonTexObj *t = r200->state.texture.unit[i].texobj; radeon_mipmap_level *lvl; - BEGIN_BATCH_NO_AUTOSTATE(dwords + (3 * 5)); + if (!(t && !t->image_override)) + dwords = 2; + + BEGIN_BATCH_NO_AUTOSTATE(dwords); /* XXX that size won't really match with image_override... */ OUT_BATCH_TABLE(atom->cmd, 2); @@ -655,12 +749,14 @@ static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = 2; + uint32_t dwords = atom->check(ctx, atom); int i = atom->idx, j; radeonTexObj *t = r200->state.texture.unit[i].texobj; radeon_mipmap_level *lvl; + if (!(t && !t->image_override)) + dwords = 2; - BEGIN_BATCH_NO_AUTOSTATE(dwords + (4 * 5)); + BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_BATCH_TABLE(atom->cmd, 2); if (t && !t->image_override) { @@ -668,7 +764,7 @@ static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) for (j = 1; j <= 5; j++) { OUT_BATCH(CP_PACKET0(R200_PP_CUBIC_OFFSET_F1_0 + (24*i) + (4 * (j-1)), 0)); OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset, - RADEON_GEM_DOMAIN_VRAM, 0, 0); + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); } } END_BATCH(); @@ -706,23 +802,32 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \ rmesa->hw.ATOM.name = NM; \ rmesa->hw.ATOM.idx = IDX; \ - rmesa->hw.ATOM.check = check_##CHK; \ + if (check_##CHK != check_never) { \ + rmesa->hw.ATOM.check = check_##CHK; \ + rmesa->radeon.hw.max_state_size += SZ * sizeof(int); \ + } else { \ + rmesa->hw.ATOM.check = NULL; \ + } \ rmesa->hw.ATOM.dirty = GL_FALSE; \ - rmesa->radeon.hw.max_state_size += SZ * sizeof(int); \ } while (0) /* Allocate state buffers: */ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) - ALLOC_STATE( ctx, always, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 ); + ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 ); else - ALLOC_STATE( ctx, always, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 ); + ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 ); if (rmesa->radeon.radeonScreen->kernel_mm) + { rmesa->hw.ctx.emit = ctx_emit_cs; + rmesa->hw.ctx.check = check_always_ctx; + } else + { rmesa->hw.ctx.emit = ctx_emit; + } ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 ); ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 ); ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); @@ -734,49 +839,60 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 ); ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 ); ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 ); - if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { - if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { - /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */ - ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 ); - ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 ); - ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); + { + int state_size = TEX_STATE_SIZE_NEWDRM; + if (!rmesa->radeon.radeonScreen->drmSupportsFragShader) { + state_size = TEX_STATE_SIZE_OLDDRM; } - else { - ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 ); - ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 ); - ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 ); + if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { + /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */ + ALLOC_STATE( tex[0], tex_pair_mm, state_size, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex_pair_mm, state_size, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + else { + ALLOC_STATE( tex[0], tex_mm, state_size, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex_mm, state_size, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + ALLOC_STATE( tex[2], tex_mm, state_size, "TEX/tex-2", 2 ); + ALLOC_STATE( tex[3], tex_mm, state_size, "TEX/tex-3", 3 ); + ALLOC_STATE( tex[4], tex_mm, state_size, "TEX/tex-4", 4 ); + ALLOC_STATE( tex[5], tex_mm, state_size, "TEX/tex-5", 5 ); + if (!rmesa->radeon.radeonScreen->kernel_mm) + { + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { + rmesa->hw.tex[0].check = check_tex_pair; + rmesa->hw.tex[1].check = check_tex_pair; + } else { + rmesa->hw.tex[0].check = check_tex; + rmesa->hw.tex[1].check = check_tex; + } + rmesa->hw.tex[2].check = check_tex; + rmesa->hw.tex[3].check = check_tex; + rmesa->hw.tex[4].check = check_tex; + rmesa->hw.tex[5].check = check_tex; + } + if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { + ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 ); + ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); + ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); + } else { + ALLOC_STATE( atf, never, ATF_STATE_SIZE, "ATF/tfactor", 0 ); + ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); + ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); + } } - ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-2", 2 ); - ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-3", 3 ); - ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-4", 4 ); - ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-5", 5 ); - ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 ); - ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); - ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); } - else { - if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { - ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 ); - ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 ); - ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); - } - else { - ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 ); - ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 ); - ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 ); - } - ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-2", 2 ); - ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-3", 3 ); - ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-4", 4 ); - ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-5", 5 ); - ALLOC_STATE( atf, never, ATF_STATE_SIZE, "TF/tfactor", 0 ); - ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); - ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); + /* polygon stipple is done with irq for non-kms */ + if (rmesa->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 ); } - for (i = 0; i < 5; i++) + for (i = 0; i < 6; i++) if (rmesa->radeon.radeonScreen->kernel_mm) - rmesa->hw.tex[i].emit = tex_emit_cs; + rmesa->hw.tex[i].emit = tex_emit_mm; else rmesa->hw.tex[i].emit = tex_emit; if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) { @@ -786,10 +902,11 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 ); ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 ); ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 ); - for (i = 0; i < 5; i++) - if (rmesa->radeon.radeonScreen->kernel_mm) + for (i = 0; i < 6; i++) + if (rmesa->radeon.radeonScreen->kernel_mm) { rmesa->hw.cube[i].emit = cube_emit_cs; - else + rmesa->hw.cube[i].check = check_tex_cube_cs; + } else rmesa->hw.cube[i].emit = cube_emit; } else { @@ -803,10 +920,17 @@ void r200InitState( r200ContextPtr rmesa ) if (rmesa->radeon.radeonScreen->drmSupportsVertexProgram) { ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 ); - ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); - ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); - ALLOC_STATE( vpp[0], tcl_vp, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 ); - ALLOC_STATE( vpp[1], tcl_vpp_size, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 ); + if (rmesa->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE( vpi[0], tcl_vp_add4, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); + ALLOC_STATE( vpi[1], tcl_vp_size_add4, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); + ALLOC_STATE( vpp[0], tcl_vp_add4, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 ); + ALLOC_STATE( vpp[1], tcl_vpp_size_add4, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 ); + } else { + ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); + ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); + ALLOC_STATE( vpp[0], tcl_vp, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 ); + ALLOC_STATE( vpp[1], tcl_vpp_size, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 ); + } } else { ALLOC_STATE( pvs, never, PVS_STATE_SIZE, "PVS/pvscntl", 0 ); @@ -819,35 +943,69 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( tcl, tcl_or_vp, TCL_STATE_SIZE, "TCL/tcl", 0 ); ALLOC_STATE( msl, tcl, MSL_STATE_SIZE, "MSL/matrix-select", 0 ); ALLOC_STATE( tcg, tcl, TCG_STATE_SIZE, "TCG/texcoordgen", 0 ); - ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 ); - ALLOC_STATE( mtl[1], tcl_lighting, MTL_STATE_SIZE, "MTL1/material1", 1 ); - ALLOC_STATE( grd, tcl_or_vp, GRD_STATE_SIZE, "GRD/guard-band", 0 ); - ALLOC_STATE( fog, tcl_fog, FOG_STATE_SIZE, "FOG/fog", 0 ); - ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 ); - ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 ); - ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 ); - ALLOC_STATE( mat[R200_MTX_IMV], tcl, MAT_STATE_SIZE, "MAT/it-modelview", 0 ); - ALLOC_STATE( mat[R200_MTX_MVP], tcl, MAT_STATE_SIZE, "MAT/modelproject", 0 ); - ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex, MAT_STATE_SIZE, "MAT/texmat0", 0 ); - ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex, MAT_STATE_SIZE, "MAT/texmat1", 1 ); - ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex, MAT_STATE_SIZE, "MAT/texmat2", 2 ); - ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex, MAT_STATE_SIZE, "MAT/texmat3", 3 ); - ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex, MAT_STATE_SIZE, "MAT/texmat4", 4 ); - ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex, MAT_STATE_SIZE, "MAT/texmat5", 5 ); - ALLOC_STATE( ucp[0], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-0", 0 ); - ALLOC_STATE( ucp[1], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); - ALLOC_STATE( ucp[2], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-2", 2 ); - ALLOC_STATE( ucp[3], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-3", 3 ); - ALLOC_STATE( ucp[4], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-4", 4 ); - ALLOC_STATE( ucp[5], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-5", 5 ); - ALLOC_STATE( lit[0], tcl_light, LIT_STATE_SIZE, "LIT/light-0", 0 ); - ALLOC_STATE( lit[1], tcl_light, LIT_STATE_SIZE, "LIT/light-1", 1 ); - ALLOC_STATE( lit[2], tcl_light, LIT_STATE_SIZE, "LIT/light-2", 2 ); - ALLOC_STATE( lit[3], tcl_light, LIT_STATE_SIZE, "LIT/light-3", 3 ); - ALLOC_STATE( lit[4], tcl_light, LIT_STATE_SIZE, "LIT/light-4", 4 ); - ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 ); - ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 ); - ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 ); + if (rmesa->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE( mtl[0], tcl_lighting_add6, MTL_STATE_SIZE, "MTL0/material0", 0 ); + ALLOC_STATE( mtl[1], tcl_lighting_add6, MTL_STATE_SIZE, "MTL1/material1", 1 ); + ALLOC_STATE( grd, tcl_or_vp_add2, GRD_STATE_SIZE, "GRD/guard-band", 0 ); + ALLOC_STATE( fog, tcl_fog_add4, FOG_STATE_SIZE, "FOG/fog", 0 ); + ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 0 ); + ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 0 ); + ALLOC_STATE( mat[R200_MTX_MV], tcl_add4, MAT_STATE_SIZE, "MAT/modelview", 0 ); + ALLOC_STATE( mat[R200_MTX_IMV], tcl_add4, MAT_STATE_SIZE, "MAT/it-modelview", 0 ); + ALLOC_STATE( mat[R200_MTX_MVP], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 0 ); + ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat0", 0 ); + ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 ); + ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat2", 2 ); + ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat3", 3 ); + ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat4", 4 ); + ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat5", 5 ); + ALLOC_STATE( ucp[0], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-0", 0 ); + ALLOC_STATE( ucp[1], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); + ALLOC_STATE( ucp[2], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-2", 2 ); + ALLOC_STATE( ucp[3], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-3", 3 ); + ALLOC_STATE( ucp[4], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-4", 4 ); + ALLOC_STATE( ucp[5], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-5", 5 ); + ALLOC_STATE( lit[0], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-0", 0 ); + ALLOC_STATE( lit[1], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-1", 1 ); + ALLOC_STATE( lit[2], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-2", 2 ); + ALLOC_STATE( lit[3], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-3", 3 ); + ALLOC_STATE( lit[4], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-4", 4 ); + ALLOC_STATE( lit[5], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-5", 5 ); + ALLOC_STATE( lit[6], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-6", 6 ); + ALLOC_STATE( lit[7], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-7", 7 ); + ALLOC_STATE( sci, rrb, SCI_STATE_SIZE, "SCI/scissor", 0 ); + } else { + ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 ); + ALLOC_STATE( mtl[1], tcl_lighting, MTL_STATE_SIZE, "MTL1/material1", 1 ); + ALLOC_STATE( grd, tcl_or_vp, GRD_STATE_SIZE, "GRD/guard-band", 0 ); + ALLOC_STATE( fog, tcl_fog, FOG_STATE_SIZE, "FOG/fog", 0 ); + ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 ); + ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 ); + ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 ); + ALLOC_STATE( mat[R200_MTX_IMV], tcl, MAT_STATE_SIZE, "MAT/it-modelview", 0 ); + ALLOC_STATE( mat[R200_MTX_MVP], tcl, MAT_STATE_SIZE, "MAT/modelproject", 0 ); + ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex, MAT_STATE_SIZE, "MAT/texmat0", 0 ); + ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex, MAT_STATE_SIZE, "MAT/texmat1", 1 ); + ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex, MAT_STATE_SIZE, "MAT/texmat2", 2 ); + ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex, MAT_STATE_SIZE, "MAT/texmat3", 3 ); + ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex, MAT_STATE_SIZE, "MAT/texmat4", 4 ); + ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex, MAT_STATE_SIZE, "MAT/texmat5", 5 ); + ALLOC_STATE( ucp[0], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-0", 0 ); + ALLOC_STATE( ucp[1], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); + ALLOC_STATE( ucp[2], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-2", 2 ); + ALLOC_STATE( ucp[3], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-3", 3 ); + ALLOC_STATE( ucp[4], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-4", 4 ); + ALLOC_STATE( ucp[5], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-5", 5 ); + ALLOC_STATE( lit[0], tcl_light, LIT_STATE_SIZE, "LIT/light-0", 0 ); + ALLOC_STATE( lit[1], tcl_light, LIT_STATE_SIZE, "LIT/light-1", 1 ); + ALLOC_STATE( lit[2], tcl_light, LIT_STATE_SIZE, "LIT/light-2", 2 ); + ALLOC_STATE( lit[3], tcl_light, LIT_STATE_SIZE, "LIT/light-3", 3 ); + ALLOC_STATE( lit[4], tcl_light, LIT_STATE_SIZE, "LIT/light-4", 4 ); + ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 ); + ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 ); + ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 ); + ALLOC_STATE( sci, never, SCI_STATE_SIZE, "SCI/scissor", 0 ); + } ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 ); ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 ); ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 ); @@ -862,7 +1020,10 @@ void r200InitState( r200ContextPtr rmesa ) } if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) { ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 ); - ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 ); + if (rmesa->radeon.radeonScreen->kernel_mm) + ALLOC_STATE( ptp, tcl_add8, PTP_STATE_SIZE, "PTP/pointparams", 0 ); + else + ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 ); } else { ALLOC_STATE (spr, never, SPR_STATE_SIZE, "SPR/pointsprite", 0 ); @@ -954,19 +1115,29 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL); rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL); rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL); + + rmesa->hw.sci.cmd[SCI_CMD_0] = CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0); + rmesa->hw.sci.cmd[SCI_CMD_1] = CP_PACKET0(R200_RE_TOP_LEFT, 0); + rmesa->hw.sci.cmd[SCI_CMD_2] = CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0); + if (rmesa->radeon.radeonScreen->kernel_mm) { - rmesa->hw.mtl[0].emit = mtl_emit; - rmesa->hw.mtl[1].emit = mtl_emit; - rmesa->hw.vpi[0].emit = veclinear_emit; - rmesa->hw.vpi[1].emit = veclinear_emit; - rmesa->hw.vpp[0].emit = veclinear_emit; - rmesa->hw.vpp[1].emit = veclinear_emit; + rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0); + rmesa->hw.stp.cmd[STP_DATA_0] = 0; + rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31); + + rmesa->hw.mtl[0].emit = mtl_emit; + rmesa->hw.mtl[1].emit = mtl_emit; + + rmesa->hw.vpi[0].emit = veclinear_emit; + rmesa->hw.vpi[1].emit = veclinear_emit; + rmesa->hw.vpp[0].emit = veclinear_emit; + rmesa->hw.vpp[1].emit = veclinear_emit; - rmesa->hw.grd.emit = scl_emit; - rmesa->hw.fog.emit = vec_emit; - rmesa->hw.glt.emit = vec_emit; - rmesa->hw.eye.emit = vec_emit; + rmesa->hw.grd.emit = scl_emit; + rmesa->hw.fog.emit = vec_emit; + rmesa->hw.glt.emit = vec_emit; + rmesa->hw.eye.emit = vec_emit; for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++) rmesa->hw.mat[i].emit = vec_emit; @@ -1435,6 +1606,12 @@ void r200InitState( r200ContextPtr rmesa ) r200LightingSpaceChange( ctx ); + if (rmesa->radeon.radeonScreen->kernel_mm) { + radeon_init_query_stateobj(&rmesa->radeon, R200_QUERYOBJ_CMDSIZE); + rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_CMD_0] = CP_PACKET0(RADEON_RB3D_ZPASS_DATA, 0); + rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_DATA_0] = 0; + } + rmesa->radeon.hw.all_dirty = GL_TRUE; rcommonInitCmdBuf(&rmesa->radeon); diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index 83e70b586d..240fb45078 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -39,6 +39,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/image.h" #include "main/imports.h" #include "main/macros.h" +#include "main/simple_list.h" #include "swrast/s_context.h" #include "swrast/s_fog.h" @@ -200,10 +201,32 @@ static void r200SetVertexFormat( GLcontext *ctx ) } } +static void r200_predict_emit_size( r200ContextPtr rmesa ) +{ + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); + const int vertex_array_size = 7; + const int prim_size = 3; + if (!rmesa->radeon.swtcl.emit_prediction) { + const int state_size = radeonCountStateEmitSize(&rmesa->radeon); + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, + state_size + + vertex_array_size + prim_size, + __FUNCTION__)) + rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon); + else + rmesa->radeon.swtcl.emit_prediction = state_size; + rmesa->radeon.swtcl.emit_prediction += vertex_array_size + prim_size + + rmesa->radeon.cmdbuf.cs->cdw; + } +} + static void r200RenderStart( GLcontext *ctx ) { r200SetVertexFormat( ctx ); + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); } @@ -267,21 +290,26 @@ void r200ChooseVertexState( GLcontext *ctx ) void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - rcommonEnsureCmdBufSpace(&rmesa->radeon, - rmesa->radeon.hw.max_state_size + (12*sizeof(int)), - __FUNCTION__); + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); radeonEmitState(&rmesa->radeon); r200EmitVertexAOS( rmesa, rmesa->radeon.swtcl.vertex_size, - rmesa->radeon.dma.current, + first_elem(&rmesa->radeon.dma.reserved)->bo, current_offset); r200EmitVbufPrim( rmesa, rmesa->radeon.swtcl.hw_primitive, rmesa->radeon.swtcl.numverts); + if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", + rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); + + rmesa->radeon.swtcl.emit_prediction = 0; } @@ -329,17 +357,27 @@ static void r200ResetLineStipple( GLcontext *ctx ); #define HAVE_POLYGONS 1 #define HAVE_ELTS 0 +static void* r200_alloc_verts( r200ContextPtr rmesa, GLuint n, GLuint size) +{ + void *rv; + do { + r200_predict_emit_size( rmesa ); + rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ); + } while(!rv); + return rv; +} + #undef LOCAL_VARS #undef ALLOC_VERTS #define CTX_ARG r200ContextPtr rmesa #define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size -#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ) +#define ALLOC_VERTS( n, size ) r200_alloc_verts(rmesa, n, size) #define LOCAL_VARS \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ const char *r200verts = (char *)rmesa->radeon.swtcl.verts; #define VERT(x) (radeonVertex *)(r200verts + ((x) * vertsize * sizeof(int))) #define VERTEX radeonVertex -#define DO_DEBUG_VERTS (1 && (R200_DEBUG & DEBUG_VERTS)) +#define DO_DEBUG_VERTS (1 && (R200_DEBUG & RADEON_VERTS)) #undef TAG #define TAG(x) r200_##x @@ -443,7 +481,7 @@ do { \ #define LOCAL_VARS(n) \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ - GLuint color[n], spec[n]; \ + GLuint color[n] = {0}, spec[n] = {0}; \ GLuint coloroffset = rmesa->swtcl.coloroffset; \ GLuint specoffset = rmesa->swtcl.specoffset; \ (void) color; (void) spec; (void) coloroffset; (void) specoffset; @@ -650,7 +688,7 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ) TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE ); _swsetup_Wakeup( ctx ); rmesa->radeon.swtcl.RenderIndex = ~0; - if (R200_DEBUG & DEBUG_FALLBACKS) { + if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n", bit, getFallbackString(bit)); } @@ -682,7 +720,7 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ) r200ChooseVertexState( ctx ); r200ChooseRenderState( ctx ); } - if (R200_DEBUG & DEBUG_FALLBACKS) { + if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "R200 end rasterization fallback: 0x%x %s\n", bit, getFallbackString(bit)); } @@ -889,6 +927,7 @@ void r200InitSwtcl( GLcontext *ctx ) init_rast_tab(); firsttime = 0; } + rmesa->radeon.swtcl.emit_prediction = 0; tnl->Driver.Render.Start = r200RenderStart; tnl->Driver.Render.Finish = r200RenderFinish; diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index 580370933e..c702910ef2 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -51,6 +51,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_swtcl.h" #include "r200_maos.h" +#include "radeon_common_context.h" + #define HAVE_POINTS 1 @@ -109,7 +111,7 @@ static GLboolean discrete_prim[0x10] = { #define ELT_INIT(prim, hw_prim) \ r200TclPrimitive( ctx, prim, hw_prim | R200_VF_PRIM_WALK_IND ) -#define GET_MESA_ELTS() rmesa->tcl.Elts +#define GET_MESA_ELTS() TNL_CONTEXT(ctx)->vb.Elts /* Don't really know how many elts will fit in what's left of cmdbuf, @@ -146,7 +148,7 @@ static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) { GLushort *dest = (GLushort *)(rmesa->radeon.tcl.elt_dma_bo->ptr + - rmesa->tcl.elt_used); + rmesa->radeon.tcl.elt_dma_offset + rmesa->tcl.elt_used); rmesa->tcl.elt_used += nr*2; @@ -156,11 +158,10 @@ static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) if (rmesa->radeon.dma.flush) rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); - rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->radeon.tcl.aos_count), __FUNCTION__); - r200EmitAOS( rmesa, rmesa->radeon.tcl.aos_count, 0 ); + r200EmitMaxVtxIndex(rmesa, rmesa->radeon.tcl.aos[0].count); return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr ); } } @@ -187,9 +188,6 @@ static void r200EmitPrim( GLcontext *ctx, r200TclPrimitive( ctx, prim, hwprim ); // fprintf(stderr,"Emit prim %d\n", rmesa->radeon.tcl.aos_count); - rcommonEnsureCmdBufSpace( &rmesa->radeon, - AOS_BUFSZ(rmesa->radeon.tcl.aos_count) + - rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ ); r200EmitAOS( rmesa, rmesa->radeon.tcl.aos_count, @@ -206,6 +204,7 @@ static void r200EmitPrim( GLcontext *ctx, r200EmitPrim( ctx, prim, hwprim, start, count ); \ (void) rmesa; } while (0) +#define MAX_CONVERSION_SIZE 40 /* Try & join small primitives */ #if 0 @@ -368,6 +367,66 @@ r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) } } +/** + * Predict total emit size for next rendering operation so there is no flush in middle of rendering + * Prediction has to aim towards the best possible value that is worse than worst case scenario + */ +static GLuint r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint space_required; + GLuint state_size; + GLuint nr_aos = 0; + int i; + /* predict number of aos to emit */ + for (i = 0; i < 15; ++i) + { + if (vimap_rev[i] != 255) + { + ++nr_aos; + } + } + + { + /* count the prediction for state size */ + space_required = 0; + state_size = radeonCountStateEmitSize( &rmesa->radeon ); + /* vtx may be changed in r200EmitArrays so account for it if not dirty */ + if (!rmesa->hw.vtx.dirty) + state_size += rmesa->hw.vtx.check(rmesa->radeon.glCtx, &rmesa->hw.vtx); + /* predict size for elements */ + for (i = 0; i < VB->PrimitiveCount; ++i) + { + if (!VB->Primitive[i].count) + continue; + /* If primitive.count is less than MAX_CONVERSION_SIZE + rendering code may decide convert to elts. + In that case we have to make pessimistic prediction. + and use larger of 2 paths. */ + const GLuint elts = ELTS_BUFSZ(nr_aos); + const GLuint index = INDEX_BUFSZ; + const GLuint vbuf = VBUF_BUFSZ; + if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) + || vbuf > index + elts) + space_required += vbuf; + else + space_required += index + elts; + space_required += AOS_BUFSZ(nr_aos); + } + } + + radeon_print(RADEON_RENDER,RADEON_VERBOSE, + "%s space %u, aos %d\n", + __func__, space_required, AOS_BUFSZ(nr_aos) ); + /* flush the buffer in case we need more than is left. */ + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required + state_size, __FUNCTION__)) + return space_required + radeonCountStateEmitSize( &rmesa->radeon ); + else + return space_required + state_size; +} + /**********************************************************************/ /* Render pipeline stage */ @@ -396,8 +455,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, if (rmesa->radeon.TclFallback) return GL_TRUE; /* fallback to software t&l */ - if (R200_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); + radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s\n", __FUNCTION__); if (VB->Count == 0) return GL_FALSE; @@ -482,10 +540,10 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, /* Do the actual work: */ radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ ); + GLuint emit_end = r200EnsureEmitSize( ctx, vimap_rev ) + + rmesa->radeon.cmdbuf.cs->cdw; r200EmitArrays( ctx, vimap_rev ); - rmesa->tcl.Elts = VB->Elts; - for (i = 0 ; i < VB->PrimitiveCount ; i++) { GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); @@ -495,11 +553,14 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, if (!length) continue; - if (rmesa->tcl.Elts) + if (VB->Elts) r200EmitEltPrimitive( ctx, start, start+length, prim ); else r200EmitPrimitive( ctx, start, start+length, prim ); } + if ( emit_end < rmesa->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end); return GL_FALSE; /* finished the pipe */ } @@ -590,7 +651,7 @@ static void transition_to_hwtnl( GLcontext *ctx ) rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VTX_XY_FMT|R200_VTX_Z_FMT); rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] |= R200_VTX_W0_FMT; - if (R200_DEBUG & DEBUG_FALLBACKS) + if (R200_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "R200 end tcl fallback\n"); } @@ -632,7 +693,7 @@ void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) if (mode) { rmesa->radeon.TclFallback |= bit; if (oldfallback == 0) { - if (R200_DEBUG & DEBUG_FALLBACKS) + if (R200_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "R200 begin tcl fallback %s\n", getFallbackString( bit )); transition_to_swtnl( ctx ); @@ -641,7 +702,7 @@ void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) else { rmesa->radeon.TclFallback &= ~bit; if (oldfallback == bit) { - if (R200_DEBUG & DEBUG_FALLBACKS) + if (R200_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "R200 end tcl fallback %s\n", getFallbackString( bit )); transition_to_hwtnl( ctx ); diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index 9f79157915..36d9e37d87 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -287,7 +287,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target, GLuint unit = ctx->Texture.CurrentUnit; struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - if ( R200_DEBUG & DEBUG_STATE ) { + if ( R200_DEBUG & RADEON_STATE ) { fprintf( stderr, "%s( %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) ); } @@ -359,7 +359,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, { radeonTexObj* t = radeon_tex_obj(texObj); - if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { + if ( R200_DEBUG & (RADEON_STATE|RADEON_TEXTURE) ) { fprintf( stderr, "%s( %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) ); } @@ -409,7 +409,7 @@ static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) r200ContextPtr rmesa = R200_CONTEXT(ctx); radeonTexObj* t = radeon_tex_obj(texObj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, _mesa_lookup_enum_by_nr(texObj->Target)); @@ -470,7 +470,7 @@ static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx, radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, t, _mesa_lookup_enum_by_nr(target)); } diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 4e53672aee..c94834752e 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -321,7 +321,7 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin assert( (texUnit->_ReallyEnabled == 0) || (texUnit->_Current != NULL) ); - if ( R200_DEBUG & DEBUG_TEXTURE ) { + if ( R200_DEBUG & RADEON_TEXTURE ) { fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit ); } @@ -1084,6 +1084,8 @@ static void disable_tex_obj_state( r200ContextPtr rmesa, R200_STATECHANGE( rmesa, vtx ); rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3)); + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit); if (rmesa->radeon.TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) { TCL_FALLBACK( rmesa->radeon.glCtx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE); } @@ -1276,7 +1278,7 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) } if (mixed_fallback) { - if (R200_DEBUG & DEBUG_FALLBACKS) + if (R200_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "fallback mixed texgen, 0x%x (0x%x 0x%x 0x%x 0x%x)\n", texUnit->TexGenEnabled, texUnit->GenS.Mode, texUnit->GenT.Mode, texUnit->GenR.Mode, texUnit->GenQ.Mode); @@ -1302,7 +1304,7 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) texUnit->GenR.ObjectPlane, texUnit->GenQ.ObjectPlane ); if (needtgenable & (S_BIT | T_BIT)) { - if (R200_DEBUG & DEBUG_FALLBACKS) + if (R200_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "fallback mixed texgen / obj plane, 0x%x\n", texUnit->TexGenEnabled); return GL_FALSE; @@ -1330,7 +1332,7 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) texUnit->GenR.EyePlane, texUnit->GenQ.EyePlane ); if (needtgenable & (S_BIT | T_BIT)) { - if (R200_DEBUG & DEBUG_FALLBACKS) + if (R200_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "fallback mixed texgen / eye plane, 0x%x\n", texUnit->TexGenEnabled); return GL_FALSE; @@ -1380,7 +1382,7 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) default: /* Unsupported mode, fallback: */ - if (R200_DEBUG & DEBUG_FALLBACKS) + if (R200_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "fallback unsupported texgen, %d\n", texUnit->GenS.Mode); return GL_FALSE; diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 620f29b5c6..11405d7cae 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -423,7 +423,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 | VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 | VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) { - if (R200_DEBUG & DEBUG_FALLBACKS) { + if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "can't handle vert prog inputs 0x%x\n", mesa_vp->Base.InputsRead); } @@ -436,7 +436,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) | (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) | (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) { - if (R200_DEBUG & DEBUG_FALLBACKS) { + if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "can't handle vert prog outputs 0x%x\n", mesa_vp->Base.OutputsWritten); } @@ -551,7 +551,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte if (mesa_vp->Base.InputsRead & (1 << i)) { array_count++; if (array_count > 12) { - if (R200_DEBUG & DEBUG_FALLBACKS) { + if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "more than 12 attribs used in vert prog\n"); } return GL_FALSE; @@ -571,13 +571,13 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte } if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) { - if (R200_DEBUG & DEBUG_FALLBACKS) { + if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "can't handle vert prog without position output\n"); } return GL_FALSE; } if (free_inputs & 1) { - if (R200_DEBUG & DEBUG_FALLBACKS) { + if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "can't handle vert prog without position input\n"); } return GL_FALSE; @@ -1070,7 +1070,7 @@ else { mesa_vp->Base.NumTemporaries + u_temp_used; } if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) { - if (R200_DEBUG & DEBUG_FALLBACKS) { + if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used); } return GL_FALSE; @@ -1078,7 +1078,7 @@ else { u_temp_i = R200_VSF_MAX_TEMPS - 1; if(o_inst - vp->instr >= R200_VSF_MAX_INST) { mesa_vp->Base.NumNativeInstructions = 129; - if (R200_DEBUG & DEBUG_FALLBACKS) { + if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "more than 128 native instructions\n"); } return GL_FALSE; diff --git a/src/mesa/drivers/dri/r200/radeon_debug.c b/src/mesa/drivers/dri/r200/radeon_debug.c new file mode 120000 index 0000000000..c98c2e074c --- /dev/null +++ b/src/mesa/drivers/dri/r200/radeon_debug.c @@ -0,0 +1 @@ +../radeon/radeon_debug.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r200/radeon_debug.h b/src/mesa/drivers/dri/r200/radeon_debug.h new file mode 120000 index 0000000000..bd8aa28e89 --- /dev/null +++ b/src/mesa/drivers/dri/r200/radeon_debug.h @@ -0,0 +1 @@ +../radeon/radeon_debug.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r200/radeon_queryobj.c b/src/mesa/drivers/dri/r200/radeon_queryobj.c new file mode 120000 index 0000000000..1d6ebc1c48 --- /dev/null +++ b/src/mesa/drivers/dri/r200/radeon_queryobj.c @@ -0,0 +1 @@ +../radeon/radeon_queryobj.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r200/radeon_queryobj.h b/src/mesa/drivers/dri/r200/radeon_queryobj.h new file mode 120000 index 0000000000..8f6f842b0a --- /dev/null +++ b/src/mesa/drivers/dri/r200/radeon_queryobj.h @@ -0,0 +1 @@ +../radeon/radeon_queryobj.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 7460410ee6..fe775eac99 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -27,16 +27,19 @@ COMMON_SOURCES = \ ../common/dri_util.c RADEON_COMMON_SOURCES = \ - radeon_texture.c \ + radeon_bo_legacy.c \ + radeon_buffer_objects.c \ radeon_common_context.c \ radeon_common.c \ + radeon_cs_legacy.c \ radeon_dma.c \ + radeon_debug.c \ + radeon_fbo.c \ radeon_lock.c \ - radeon_bo_legacy.c \ - radeon_cs_legacy.c \ radeon_mipmap_tree.c \ radeon_span.c \ - radeon_fbo.c + radeon_queryobj.c \ + radeon_texture.c DRIVER_SOURCES = \ radeon_screen.c \ @@ -48,17 +51,8 @@ DRIVER_SOURCES = \ r300_render.c \ r300_tex.c \ r300_texstate.c \ - radeon_program.c \ - radeon_program_alu.c \ - radeon_program_pair.c \ - radeon_nqssadce.c \ r300_vertprog.c \ r300_fragprog_common.c \ - r300_fragprog.c \ - r300_fragprog_swizzle.c \ - r300_fragprog_emit.c \ - r500_fragprog.c \ - r500_fragprog_emit.c \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ @@ -68,15 +62,22 @@ DRIVER_SOURCES = \ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) -DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \ - -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 \ +DRIVER_DEFINES = -DRADEON_R300 # -DRADEON_BO_TRACK \ -Wall DRI_LIB_DEPS += $(RADEON_LDFLAGS) +PIPE_DRIVERS = compiler/libr300compiler.a + ##### TARGETS ##### include ../Makefile.template symlinks: + +# Mark the archive phony so that we always check for recompilation +.PHONY : compiler/libr300compiler.a + +compiler/libr300compiler.a: + cd compiler && $(MAKE) diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile new file mode 100644 index 0000000000..d973844192 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -0,0 +1,75 @@ +# src/mesa/drivers/dri/r300/compiler/Makefile + +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = r300compiler + +C_SOURCES = \ + radeon_code.c \ + radeon_compiler.c \ + radeon_nqssadce.c \ + radeon_program.c \ + radeon_program_alu.c \ + radeon_program_pair.c \ + r3xx_fragprog.c \ + r300_fragprog.c \ + r300_fragprog_swizzle.c \ + r300_fragprog_emit.c \ + r500_fragprog.c \ + r500_fragprog_emit.c \ + r3xx_vertprog.c \ + r3xx_vertprog_dump.c \ + \ + memory_pool.c + + +### Basic defines ### + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + +INCLUDES = \ + -I. \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + + +##### TARGETS ##### + +default: depend lib$(LIBNAME).a + +lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/configs/current + $(MKLIB) -o $(LIBNAME) -static $(OBJECTS) + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + +# Remove .o and backup files +clean: + rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak + +# Dummy target +install: + @echo -n "" + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + + +sinclude depend diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c new file mode 100644 index 0000000000..37aa2b6579 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.c @@ -0,0 +1,95 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "memory_pool.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + + +#define POOL_LARGE_ALLOC 4096 +#define POOL_ALIGN 4 + + +struct memory_block { + struct memory_block * next; +}; + +void memory_pool_init(struct memory_pool * pool) +{ + memset(pool, 0, sizeof(struct memory_pool)); +} + + +void memory_pool_destroy(struct memory_pool * pool) +{ + while(pool->blocks) { + struct memory_block * block = pool->blocks; + pool->blocks = block->next; + free(block); + } +} + +static void refill_pool(struct memory_pool * pool) +{ + unsigned int blocksize = pool->total_allocated; + struct memory_block * newblock; + + if (!blocksize) + blocksize = 2*POOL_LARGE_ALLOC; + + newblock = (struct memory_block*)malloc(blocksize); + newblock->next = pool->blocks; + pool->blocks = newblock; + + pool->head = (unsigned char*)(newblock + 1); + pool->end = ((unsigned char*)newblock) + blocksize; + pool->total_allocated += blocksize; +} + + +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) +{ + if (bytes < POOL_LARGE_ALLOC) { + if (pool->head + bytes > pool->end) + refill_pool(pool); + + assert(pool->head + bytes <= pool->end); + + void * ptr = pool->head; + + pool->head += bytes; + pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); + + return ptr; + } else { + struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block)); + + block->next = pool->blocks; + pool->blocks = block; + + return (block + 1); + } +} + + diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h new file mode 100644 index 0000000000..ce23c319ad --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h @@ -0,0 +1,49 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef MEMORY_POOL_H +#define MEMORY_POOL_H + +struct memory_block; + +/** + * Provides a pool of memory that can quickly be allocated from, at the + * cost of being unable to explicitly free one of the allocated blocks. + * Instead, the entire pool can be freed at once. + * + * The idea is to allow one to quickly allocate a flexible amount of + * memory during operations like shader compilation while avoiding + * reference counting headaches. + */ +struct memory_pool { + unsigned char * head; + unsigned char * end; + unsigned int total_allocated; + struct memory_block * blocks; +}; + + +void memory_pool_init(struct memory_pool * pool); +void memory_pool_destroy(struct memory_pool * pool); +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); + +#endif /* MEMORY_POOL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c new file mode 100644 index 0000000000..6c9fba4914 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -0,0 +1,416 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r300_fragprog.h" + +#include "shader/prog_parameter.h" + +#include "../r300_reg.h" + +static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) +{ + struct prog_src_register reg = { 0, }; + + reg.File = PROGRAM_STATE_VAR; + reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + */ +GLboolean r300_transform_TEX( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + + if (inst->I.Opcode != OPCODE_TEX && + inst->I.Opcode != OPCODE_TXB && + inst->I.Opcode != OPCODE_TXP && + inst->I.Opcode != OPCODE_KIL) + return GL_FALSE; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->I.Opcode != OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + inst->I.Opcode = OPCODE_MOV; + + if (comparefunc == GL_ALWAYS) { + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); + } + + return GL_TRUE; + } else { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); + struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); + int pass, fail; + + inst_rcp->I.Opcode = OPCODE_RCP; + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + inst_cmp->I.DstReg = inst->I.DstReg; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = rc_find_free_temporary(c); + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst_mad->I.Opcode = OPCODE_MAD; + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; + inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + else + inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + + inst_cmp->I.Opcode = OPCODE_CMP; + /* DstReg has been filled out above */ + inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; + inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; + inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); + } + } + + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + */ + if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) { + struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev); + + inst_mul->I.Opcode = OPCODE_MUL; + inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mul->I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR; + inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit); + + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index; + } + + /* Cannot write texture to output registers or with masks */ + if (inst->I.Opcode != OPCODE_KIL && + (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg = inst->I.DstReg; + inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + } + + + /* Cannot read texture coordinate from constants file */ + if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; + } + + return GL_TRUE; +} + +/* just some random things... */ +void r300FragmentProgramDump(struct rX00_fragment_program_code *c) +{ + struct r300_fragment_program_code *code = &c->code.r300; + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n <= (code->config & 3); n++) { + uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; + int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT; + int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT; + int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; + int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; + + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n, + alu_offset, tex_offset, alu_end, tex_end, code_addr); + + if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { + fprintf(stderr, " TEX:\n"); + for (i = tex_offset; + i <= tex_offset + tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = alu_offset; + i <= alu_offset + alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_addr >> (j * 6); + int rega = code->alu.inst[i].alpha_addr >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + code->alu.inst[i].rgb_addr, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].alpha_addr); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_inst >> (j * 7); + int rega = code->alu.inst[i].alpha_inst >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].rgb_inst, arga[0], arga[1], + arga[2], code->alu.inst[i].alpha_inst); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h new file mode 100644 index 0000000000..0ac46dbd9c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "radeon_compiler.h" +#include "radeon_program.h" + + +extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); + +extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); + +extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index b75656e7ee..c7227bbd15 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -40,57 +40,43 @@ #include "r300_fragprog.h" +#include "../r300_reg.h" + #include "radeon_program_pair.h" #include "r300_fragprog_swizzle.h" -#include "r300_reg.h" +struct r300_emit_state { + struct r300_fragment_program_compiler * compiler; + + unsigned current_node : 2; + unsigned node_first_tex : 8; + unsigned node_first_alu : 8; + uint32_t node_flags; +}; + #define PROG_CODE \ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r300_fragment_program_code *code = &c->code->r300 + struct r300_emit_state * emit = (struct r300_emit_state*)data; \ + struct r300_fragment_program_compiler *c = emit->compiler; \ + struct r300_fragment_program_code *code = &c->code->code.r300 #define error(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ __FILE__, __FUNCTION__, ##args); \ } while(0) -static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex) -{ - PROG_CODE; - - for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { - if (code->constant[*hwindex].File == file && - code->constant[*hwindex].Index == index) - break; - } - - if (*hwindex >= code->const_nr) { - if (*hwindex >= R300_PFS_NUM_CONST_REGS) { - error("Out of hw constants!\n"); - return GL_FALSE; - } - - code->const_nr++; - code->constant[*hwindex].File = file; - code->constant[*hwindex].Index = index; - } - - return GL_TRUE; -} - - /** * Mark a temporary register as used. */ static void use_temporary(struct r300_fragment_program_code *code, GLuint index) { - if (index > code->max_temp_idx) - code->max_temp_idx = index; + if (index > code->pixsize) + code->pixsize = index; } -static GLuint translate_rgb_opcode(GLuint opcode) +static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R300_ALU_OUTC_CMP; @@ -109,7 +95,7 @@ static GLuint translate_rgb_opcode(GLuint opcode) } } -static GLuint translate_alpha_opcode(GLuint opcode) +static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R300_ALU_OUTA_CMP; @@ -145,63 +131,62 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) int ip = code->alu.length++; int j; - code->node[code->cur_node].alu_end++; - code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode); - code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode); + code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); + code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); if (!inst->RGB.Src[j].Constant) use_temporary(code, inst->RGB.Src[j].Index); - code->alu.inst[ip].inst1 |= src << (6*j); + code->alu.inst[ip].rgb_addr |= src << (6*j); src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); if (!inst->Alpha.Src[j].Constant) use_temporary(code, inst->Alpha.Src[j].Index); - code->alu.inst[ip].inst3 |= src << (6*j); + code->alu.inst[ip].alpha_addr |= src << (6*j); GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); arg |= inst->RGB.Arg[j].Abs << 6; arg |= inst->RGB.Arg[j].Negate << 5; - code->alu.inst[ip].inst0 |= arg << (7*j); + code->alu.inst[ip].rgb_inst |= arg << (7*j); arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); arg |= inst->Alpha.Arg[j].Abs << 6; arg |= inst->Alpha.Arg[j].Negate << 5; - code->alu.inst[ip].inst2 |= arg << (7*j); + code->alu.inst[ip].alpha_inst |= arg << (7*j); } if (inst->RGB.Saturate) - code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP; + code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; if (inst->Alpha.Saturate) - code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP; + code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; if (inst->RGB.WriteMask) { use_temporary(code, inst->RGB.DestIndex); - code->alu.inst[ip].inst1 |= + code->alu.inst[ip].rgb_addr |= (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); } if (inst->RGB.OutputWriteMask) { - code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); - code->node[code->cur_node].flags |= R300_RGBA_OUT; + code->alu.inst[ip].rgb_addr |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); + emit->node_flags |= R300_RGBA_OUT; } if (inst->Alpha.WriteMask) { use_temporary(code, inst->Alpha.DestIndex); - code->alu.inst[ip].inst3 |= + code->alu.inst[ip].alpha_addr |= (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG; } if (inst->Alpha.OutputWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT; - code->node[code->cur_node].flags |= R300_RGBA_OUT; + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT; + emit->node_flags |= R300_RGBA_OUT; } if (inst->Alpha.DepthWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; - code->node[code->cur_node].flags |= R300_W_OUT; - c->fp->writes_depth = GL_TRUE; + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; + emit->node_flags |= R300_W_OUT; + c->code->writes_depth = GL_TRUE; } return GL_TRUE; @@ -211,31 +196,50 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) /** * Finish the current node without advancing to the next one. */ -static GLboolean finish_node(struct r300_fragment_program_compiler *c) +static GLboolean finish_node(struct r300_emit_state * emit) { - struct r300_fragment_program_code *code = &c->code->r300; - struct r300_fragment_program_node *node = &code->node[code->cur_node]; + struct r300_fragment_program_compiler * c = emit->compiler; + struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; - if (node->alu_end < 0) { + if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ struct radeon_pair_instruction inst; _mesa_bzero(&inst, sizeof(inst)); - if (!emit_alu(c, &inst)) + if (!emit_alu(emit, &inst)) return GL_FALSE; } - if (node->tex_end < 0) { - if (code->cur_node == 0) { - node->tex_end = 0; - } else { - error("Node %i has no TEX instructions", code->cur_node); + unsigned alu_offset = emit->node_first_alu; + unsigned alu_end = code->alu.length - alu_offset - 1; + unsigned tex_offset = emit->node_first_tex; + unsigned tex_end = code->tex.length - tex_offset - 1; + + if (code->tex.length == emit->node_first_tex) { + if (emit->current_node > 0) { + error("Node %i has no TEX instructions", emit->current_node); return GL_FALSE; } + + tex_end = 0; } else { - if (code->cur_node == 0) - code->first_node_has_tex = 1; + if (emit->current_node == 0) + code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; } + /* Write the config register. + * Note: The order in which the words for each node are written + * is not correct here and needs to be fixed up once we're entirely + * done + * + * Also note that the register specification from AMD is slightly + * incorrect in its description of this register. */ + code->code_addr[emit->current_node] = + (alu_offset << R300_ALU_START_SHIFT) | + (alu_end << R300_ALU_SIZE_SHIFT) | + (tex_offset << R300_TEX_START_SHIFT) | + (tex_end << R300_TEX_SIZE_SHIFT) | + emit->node_flags; + return GL_TRUE; } @@ -248,30 +252,28 @@ static GLboolean begin_tex(void* data) { PROG_CODE; - if (code->cur_node == 0) { - if (code->node[0].alu_end < 0 && - code->node[0].tex_end < 0) - return GL_TRUE; + if (code->alu.length == emit->node_first_alu && + code->tex.length == emit->node_first_tex) { + return GL_TRUE; } - if (code->cur_node == 3) { + if (emit->current_node == 3) { error("Too many texture indirections"); return GL_FALSE; } - if (!finish_node(c)) + if (!finish_node(emit)) return GL_FALSE; - struct r300_fragment_program_node *node = &code->node[++code->cur_node]; - node->alu_offset = code->alu.length; - node->alu_end = -1; - node->tex_offset = code->tex.length; - node->tex_end = -1; + emit->current_node++; + emit->node_first_tex = code->tex.length; + emit->node_first_alu = code->alu.length; + emit->node_flags = 0; return GL_TRUE; } -static GLboolean emit_tex(void* data, struct prog_instruction* inst) +static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst) { PROG_CODE; @@ -281,31 +283,30 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst) } GLuint unit = inst->TexSrcUnit; - GLuint dest = inst->DstReg.Index; + GLuint dest = inst->DestIndex; GLuint opcode; switch(inst->Opcode) { - case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; - case OPCODE_TEX: opcode = R300_TEX_OP_LD; break; - case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; - case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; default: error("Unknown texture opcode %i", inst->Opcode); return GL_FALSE; } - if (inst->Opcode == OPCODE_KIL) { + if (inst->Opcode == RADEON_OPCODE_KIL) { unit = 0; dest = 0; } else { use_temporary(code, dest); } - use_temporary(code, inst->SrcReg[0].Index); + use_temporary(code, inst->SrcIndex); - code->node[code->cur_node].tex_end++; code->tex.inst[code->tex.length++] = - (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | + (inst->SrcIndex << R300_SRC_ADDR_SHIFT) | (dest << R300_DST_ADDR_SHIFT) | (unit << R300_TEX_ID_SHIFT) | (opcode << R300_TEX_INST_SHIFT); @@ -314,7 +315,6 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst) static const struct radeon_pair_handler pair_handler = { - .EmitConst = &emit_const, .EmitPaired = &emit_alu, .EmitTex = &emit_tex, .BeginTexBlock = &begin_tex, @@ -325,20 +325,36 @@ static const struct radeon_pair_handler pair_handler = { * Final compilation step: Turn the intermediate radeon_program into * machine-readable instructions. */ -GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { - struct r300_fragment_program_code *code = &compiler->code->r300; - - _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); - code->node[0].alu_end = -1; - code->node[0].tex_end = -1; + struct r300_emit_state emit; + struct r300_fragment_program_code *code = &compiler->code->code.r300; - if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) - return GL_FALSE; + memset(&emit, 0, sizeof(emit)); + emit.compiler = compiler; - if (!finish_node(compiler)) - return GL_FALSE; + _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); - return GL_TRUE; + radeonPairProgram(compiler, &pair_handler, &emit); + if (compiler->Base.Error) + return; + + /* Finish the program */ + finish_node(&emit); + + code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + code->code_offset = + (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | + (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); + + if (emit.current_node < 3) { + int shift = 3 - emit.current_node; + int i; + for(i = emit.current_node; i >= 0; --i) + code->code_addr[shift + i] = code->code_addr[i]; + for(i = 0; i < shift; ++i) + code->code_addr[i] = 0; + } } - diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index fc9d855bce..1b14cc3888 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -33,8 +33,9 @@ #include "r300_fragprog_swizzle.h" -#include "r300_reg.h" +#include "../r300_reg.h" #include "radeon_nqssadce.h" +#include "radeon_compiler.h" #define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) @@ -174,18 +175,15 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, } } - struct prog_instruction *inst; - - _mesa_insert_instructions(s->Program, s->IP, 1); - inst = s->Program->Instructions + s->IP++; - inst->Opcode = OPCODE_MOV; - inst->DstReg = dst; - inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); - inst->SrcReg[0] = src; - inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; + struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg = dst; + inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); + inst->I.SrcReg[0] = src; + inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ - dst.WriteMask &= ~inst->DstReg.WriteMask; + dst.WriteMask &= ~inst->I.DstReg.WriteMask; } } diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h index 231bf4eef5..231bf4eef5 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c new file mode 100644 index 0000000000..76c3a7ecfd --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -0,0 +1,149 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_statevars.h" + +#include "radeon_nqssadce.h" +#include "radeon_program_alu.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + + +static void nqssadce_init(struct nqssadce_state* s) +{ + struct r300_fragment_program_compiler * c = s->UserData; + s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW; + s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W; +} + +static void rewrite_depth_out(struct r300_fragment_program_compiler * c) +{ + struct rc_instruction *rci; + + for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { + struct prog_instruction * inst = &rci->I; + + if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth) + continue; + + if (inst->DstReg.WriteMask & WRITEMASK_Z) { + inst->DstReg.WriteMask = WRITEMASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; + } + } +} + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +{ + rewrite_depth_out(c); + + if (c->is_r500) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(&c->Base, 4, transformations); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(&c->Base, 3, transformations); + } + + if (c->Base.Debug) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); + } + + if (c->is_r500) { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle + }; + radeonNqssaDce(&c->Base, &nqssadce, c); + } else { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle + }; + radeonNqssaDce(&c->Base, &nqssadce, c); + } + + if (c->Base.Debug) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); + } + + if (c->is_r500) { + r500BuildFragmentProgramHwCode(c); + } else { + r300BuildFragmentProgramHwCode(c); + } + + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); + + if (c->Base.Debug) { + if (c->is_r500) { + r500FragmentProgramDump(c->code); + } else { + r300FragmentProgramDump(c->code); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c new file mode 100644 index 0000000000..dad27fc98e --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,656 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "../r300_reg.h" + +#include "radeon_nqssadce.h" +#include "radeon_program.h" +#include "radeon_program_alu.h" + +#include "shader/prog_print.h" + + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(vpi->SrcReg[x].File), \ + NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + + +static unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & WRITEMASK_XYZW; +} + +static unsigned long t_dst_class(gl_register_file file) +{ + + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case PROGRAM_OUTPUT: + return PVS_DST_REG_OUT; + case PROGRAM_ADDRESS: + return PVS_DST_REG_A0; + /* + case PROGRAM_INPUT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(gl_register_file file) +{ + switch (file) { + case PROGRAM_BUILTIN: + case PROGRAM_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case PROGRAM_INPUT: + return PVS_SRC_REG_INPUT; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_STATE_VAR: + return PVS_SRC_REG_CONSTANT; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b) +{ + unsigned long aclass = t_src_class(a.File); + unsigned long bclass = t_src_class(b.File); + + if (aclass != bclass) + return GL_FALSE; + if (aclass == PVS_SRC_REG_TEMPORARY) + return GL_FALSE; + + if (a.RelAddr || b.RelAddr) + return GL_TRUE; + if (a.Index != b.Index) + return GL_TRUE; + + return GL_FALSE; +} + +static INLINE unsigned long t_swizzle(GLubyte swizzle) +{ + /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + if (src->File == PROGRAM_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src->RelAddr << 4); +} + +static GLboolean valid_dst(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } + + return GL_TRUE; +} + +static void ei_vector1(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); +} + +static void ei_vector2(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); +} + +static void ei_math1(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); +} + +static void ei_lit(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); +} + +static void ei_mad(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + /* Remarks about hardware limitations of MAD + * (please preserve this comment, as this information is _NOT_ + * in the documentation provided by AMD). + * + * As described in the documentation, MAD with three unique temporary + * source registers requires the use of the macro version. + * + * However (and this is not mentioned in the documentation), apparently + * the macro version is _NOT_ a full superset of the normal version. + * In particular, the macro version does not always work when relative + * addressing is used in the source operands. + * + * This limitation caused incorrect rendering in Sauerbraten's OpenGL + * assembly shader path when using medium quality animations + * (i.e. animations with matrix blending instead of quaternion blending). + * + * Unfortunately, I (nha) have been unable to extract a Piglit regression + * test for this issue - for some reason, it is possible to have vertex + * programs whose prefix is *exactly* the same as the prefix of the + * offending program in Sauerbraten up to the offending instruction + * without causing any trouble. + * + * Bottom line: Only use the macro version only when really necessary; + * according to AMD docs, this should improve performance by one clock + * as a nice side bonus. + */ + if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY && + vpi->SrcReg[1].File == PROGRAM_TEMPORARY && + vpi->SrcReg[2].File == PROGRAM_TEMPORARY && + vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && + vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && + vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + GL_FALSE, + GL_TRUE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } else { + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = t_src(vp, &vpi->SrcReg[2]); +} + +static void ei_pow(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); +} + + +static void translate_vertex_program(struct r300_vertex_program_compiler * compiler) +{ + struct rc_instruction *rci; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + + compiler->SetHwInputOutput(compiler); + + for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { + struct prog_instruction *vpi = &rci->I; + GLuint *inst = compiler->code->body.d + compiler->code->length; + + /* Skip instructions writing to non-existing destination */ + if (!valid_dst(compiler->code, &vpi->DstReg)) + continue; + + if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + rc_error(&compiler->Base, "Vertex program has too many instructions\n"); + return; + } + + switch (vpi->Opcode) { + case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + default: + rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode); + return; + } + + compiler->code->length += 4; + + if (compiler->Base.Error) + return; + } +} + +struct temporary_allocation { + GLuint Allocated:1; + GLuint HwTemp:15; + struct rc_instruction * LastRead; +}; + +static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) +{ + struct rc_instruction *inst; + GLuint num_orig_temps = 0; + GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + struct temporary_allocation * ta; + GLuint i, j; + + compiler->code->num_temporaries = 0; + memset(hwtemps, 0, sizeof(hwtemps)); + + /* Pass 1: Count original temporaries and allocate structures */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + if (inst->I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->I.SrcReg[i].Index + 1; + } + } + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + if (inst->I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->I.DstReg.Index + 1; + } + } + } + + ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, + sizeof(struct temporary_allocation) * num_orig_temps); + memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); + + /* Pass 2: Determine original temporary lifetimes */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) + ta[inst->I.SrcReg[i].Index].LastRead = inst; + } + } + + /* Pass 3: Register allocation */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + GLuint orig = inst->I.SrcReg[i].Index; + inst->I.SrcReg[i].Index = ta[orig].HwTemp; + + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = GL_FALSE; + } + } + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + GLuint orig = inst->I.DstReg.Index; + + if (!ta[orig].Allocated) { + for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { + if (!hwtemps[j]) + break; + } + if (j >= VSF_MAX_FRAGMENT_TEMPS) { + fprintf(stderr, "Out of hw temporaries\n"); + } else { + ta[orig].Allocated = GL_TRUE; + ta[orig].HwTemp = j; + hwtemps[j] = GL_TRUE; + + if (j >= compiler->code->num_temporaries) + compiler->code->num_temporaries = j + 1; + } + } + + inst->I.DstReg.Index = ta[orig].HwTemp; + } + } + } +} + + +/** + * Vertex engine cannot read two inputs or two constants at the same time. + * Introduce intermediate MOVs to temporary registers to account for this. + */ +static GLboolean transform_source_conflicts( + struct radeon_compiler *c, + struct rc_instruction* inst, + void* unused) +{ + GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode); + + if (num_operands == 3) { + if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2]) + || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = tmpreg; + inst_mov->I.SrcReg[0] = inst->I.SrcReg[2]; + + reset_srcreg(&inst->I.SrcReg[2]); + inst->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[2].Index = tmpreg; + } + } + + if (num_operands >= 2) { + if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = tmpreg; + inst_mov->I.SrcReg[0] = inst->I.SrcReg[1]; + + reset_srcreg(&inst->I.SrcReg[1]); + inst->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[1].Index = tmpreg; + } + } + + return GL_TRUE; +} + +static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) +{ + int i; + + for(i = 0; i < 32; ++i) { + if ((compiler->RequiredOutputs & (1 << i)) && + !(compiler->Base.Program.OutputsWritten & (1 << i))) { + struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = i; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst->I.SrcReg[0].File = PROGRAM_CONSTANT; + inst->I.SrcReg[0].Index = 0; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + compiler->Base.Program.OutputsWritten |= 1 << i; + } + } +} + +static void nqssadceInit(struct nqssadce_state* s) +{ + struct r300_vertex_program_compiler * compiler = s->UserData; + int i; + + for(i = 0; i < VERT_RESULT_MAX; ++i) { + if (compiler->RequiredOutputs & (1 << i)) + s->Outputs[i].Sourced = WRITEMASK_XYZW; + } +} + +static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +{ + (void) opcode; + (void) reg; + + return GL_TRUE; +} + + + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) +{ + addArtificialOutputs(compiler); + + { + struct radeon_program_transformation transformations[] = { + { &r300_transform_vertex_alu, 0 }, + }; + radeonLocalTransform(&compiler->Base, 1, transformations); + } + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stderr); + } + + { + /* Note: This pass has to be done seperately from ALU rewrite, + * otherwise non-native ALU instructions with source conflits + * will not be treated properly. + */ + struct radeon_program_transformation transformations[] = { + { &transform_source_conflicts, 0 }, + }; + radeonLocalTransform(&compiler->Base, 1, transformations); + } + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after source conflict resolve:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stderr); + } + + { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadceInit, + .IsNativeSwizzle = &swizzleIsNative, + .BuildSwizzle = NULL + }; + radeonNqssaDce(&compiler->Base, &nqssadce, compiler); + + /* We need this step for reusing temporary registers */ + allocate_temporary_registers(compiler); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after NQSSADCE:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stderr); + } + } + + translate_vertex_program(compiler); + + rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants); + + compiler->code->InputsRead = compiler->Base.Program.InputsRead; + compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten; + + if (compiler->Base.Debug) { + fprintf(stderr, "Final vertex program code:\n"); + r300_vertex_program_dump(compiler->code); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c new file mode 100644 index 0000000000..980ef3eaea --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c @@ -0,0 +1,177 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_code.h" + +#include <stdio.h> + +static char* r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +static char* r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +/* XXX refactor to avoid clashing symbols */ +static char* r300_vs_src_debug[] = { + "t", + "i", + "c", + "a", +}; + +static char* r300_vs_dst_debug[] = { + "t", + "a0", + "o", + "ox", + "a", + "i", + "u", + "u", +}; + +static char* r300_vs_swiz_debug[] = { + "X", + "Y", + "Z", + "W", + "0", + "1", + "U", + "U", +}; + + +static void r300_vs_op_dump(uint32_t op) +{ + fprintf(stderr, " dst: %d%s op: ", + (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if (op & 0x80) { + if (op & 0x1) { + fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); + } +} + +static void r300_vs_src_dump(uint32_t src) +{ + fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", + (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3], + src & (1 << 25) ? "-" : " ", + r300_vs_swiz_debug[(src >> 13) & 0x7], + src & (1 << 26) ? "-" : " ", + r300_vs_swiz_debug[(src >> 16) & 0x7], + src & (1 << 27) ? "-" : " ", + r300_vs_swiz_debug[(src >> 19) & 0x7], + src & (1 << 28) ? "-" : " ", + r300_vs_swiz_debug[(src >> 22) & 0x7]); +} + +void r300_vertex_program_dump(struct r300_vertex_program_code * vs) +{ + unsigned instrcount = vs->length / 4; + unsigned i; + + for(i = 0; i < instrcount; i++) { + unsigned offset = i*4; + unsigned src; + + fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); + r300_vs_op_dump(vs->body.d[offset]); + + for(src = 0; src < 3; ++src) { + fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]); + r300_vs_src_dump(vs->body.d[offset+1+src]); + } + } +} diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 4d58cf2162..7e2faed690 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -27,158 +27,141 @@ #include "r500_fragprog.h" -static void reset_srcreg(struct prog_src_register* reg) -{ - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; -} +#include "../r300_reg.h" -static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) +static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) { - gl_state_index fail_value_tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 - }; struct prog_src_register reg = { 0, }; - fail_value_tokens[2] = tmu; reg.File = PROGRAM_STATE_VAR; - reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); + reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); reg.Swizzle = SWIZZLE_WWWW; return reg; } /** * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - * + * - implement texture compare (shadow extensions) + * - extract non-native source / destination operands */ GLboolean r500_transform_TEX( - struct radeon_transform_context *t, - struct prog_instruction* orig_inst, void* data) + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) { struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; - struct prog_instruction inst = *orig_inst; - struct prog_instruction* tgt; - GLboolean destredirect = GL_FALSE; - - if (inst.Opcode != OPCODE_TEX && - inst.Opcode != OPCODE_TXB && - inst.Opcode != OPCODE_TXP && - inst.Opcode != OPCODE_KIL) + + if (inst->I.Opcode != OPCODE_TEX && + inst->I.Opcode != OPCODE_TXB && + inst->I.Opcode != OPCODE_TXP && + inst->I.Opcode != OPCODE_KIL) return GL_FALSE; /* ARB_shadow & EXT_shadow_funcs */ - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + if (inst->I.Opcode != OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(t->Program, 1); + inst->I.Opcode = OPCODE_MOV; - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = inst.DstReg; if (comparefunc == GL_ALWAYS) { - tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_1111; + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; } else { - tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); + inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); } + return GL_TRUE; + } else { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); + struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); + int pass, fail; + + inst_rcp->I.Opcode = OPCODE_RCP; + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + inst_cmp->I.DstReg = inst->I.DstReg; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = rc_find_free_temporary(c); + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst_mad->I.Opcode = OPCODE_MAD; + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; + inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + else + inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + + inst_cmp->I.Opcode = OPCODE_CMP; + /* DstReg has been filled out above */ + inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; + inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; + inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); } + } - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = radeonFindFreeTemporary(t); - inst.DstReg.WriteMask = WRITEMASK_XYZW; - } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) { - int tempreg = radeonFindFreeTemporary(t); + /* Cannot write texture to output registers */ + if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; - inst.DstReg.WriteMask = WRITEMASK_XYZW; - destredirect = GL_TRUE; - } + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg = inst->I.DstReg; + inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); - if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { - int tmpreg = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(t->Program, 1); - tgt->Opcode = OPCODE_MOV; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tmpreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tmpreg; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; } - tgt = radeonAppendInstructions(t->Program, 1); - _mesa_copy_instructions(tgt, &inst, 1); - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = radeonFindFreeTemporary(t); - int pass, fail; - - tgt = radeonAppendInstructions(t->Program, 3); - - tgt[0].Opcode = OPCODE_RCP; - tgt[0].DstReg.File = PROGRAM_TEMPORARY; - tgt[0].DstReg.Index = rcptemp; - tgt[0].DstReg.WriteMask = WRITEMASK_W; - tgt[0].SrcReg[0] = inst.SrcReg[0]; - tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; - - tgt[1].Opcode = OPCODE_MAD; - tgt[1].DstReg = inst.DstReg; - tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; - tgt[1].SrcReg[0] = inst.SrcReg[0]; - tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[1].Index = rcptemp; - tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; - tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[2].Index = inst.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; - else - tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; - - tgt[2].Opcode = OPCODE_CMP; - tgt[2].DstReg = orig_inst->DstReg; - tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; - tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; - - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } + /* Cannot read texture coordinate from constants file */ + if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; - tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; - tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); - } else if (destredirect) { - tgt = radeonAppendInstructions(t->Program, 1); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = orig_inst->DstReg; - tgt->SrcReg[0].File = PROGRAM_TEMPORARY; - tgt->SrcReg[0].Index = inst.DstReg.Index; + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; } return GL_TRUE; @@ -249,7 +232,6 @@ GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) */ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) { - struct prog_instruction *inst; GLuint negatebase[2] = { 0, 0 }; int i; @@ -260,20 +242,16 @@ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, negatebase[GET_BIT(src.Negate, i)] |= 1 << i; } - _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); - inst = s->Program->Instructions + s->IP; - for(i = 0; i <= 1; ++i) { if (!negatebase[i]) continue; - inst->Opcode = OPCODE_MOV; - inst->DstReg = dst; - inst->DstReg.WriteMask = negatebase[i]; - inst->SrcReg[0] = src; - inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; - inst++; - s->IP++; + struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg = dst; + inst->I.DstReg.WriteMask = negatebase[i]; + inst->I.SrcReg[0] = src; + inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; } } @@ -375,9 +353,9 @@ static char *to_texop(int val) return NULL; } -void r500FragmentProgramDump(union rX00_fragment_program_code *c) +void r500FragmentProgramDump(struct rX00_fragment_program_code *c) { - struct r500_fragment_program_code *code = &c->r500; + struct r500_fragment_program_code *code = &c->code.r500; fprintf(stderr, "R500 Fragment Program:\n--------\n"); int n; @@ -385,15 +363,6 @@ void r500FragmentProgramDump(union rX00_fragment_program_code *c) uint32_t inst0; char *str = NULL; - if (code->const_nr) { - fprintf(stderr, "--------\nConstants:\n"); - for (n = 0; n < code->const_nr; n++) { - fprintf(stderr, "Constant %d: %i[%i]\n", n, - code->constant[n].File, code->constant[n].Index); - } - fprintf(stderr, "--------\n"); - } - for (n = 0; n < code->inst_end+1; n++) { inst0 = inst = code->inst[n].inst0; fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 1179bf6607..9091f65cd2 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -36,17 +36,20 @@ #include "shader/prog_parameter.h" #include "shader/prog_instruction.h" -#include "r300_context.h" +#include "radeon_compiler.h" #include "radeon_nqssadce.h" -extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); +extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); -extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); +extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); -extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); +extern GLboolean r500_transform_TEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); #endif diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 30f4514897..d694725c9b 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -45,47 +45,22 @@ #include "r500_fragprog.h" +#include "../r300_reg.h" + #include "radeon_program_pair.h" #define PROG_CODE \ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r500_fragment_program_code *code = &c->code->r500 + struct r500_fragment_program_code *code = &c->code->code.r500 #define error(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ __FILE__, __FUNCTION__, ##args); \ } while(0) -/** - * Callback to register hardware constants. - */ -static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex) -{ - PROG_CODE; - - for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { - if (code->constant[*hwindex].File == file && - code->constant[*hwindex].Index == idx) - break; - } - - if (*hwindex >= code->const_nr) { - if (*hwindex >= R500_PFS_NUM_CONST_REGS) { - error("Out of hw constants!\n"); - return GL_FALSE; - } - - code->const_nr++; - code->constant[*hwindex].File = file; - code->constant[*hwindex].Index = idx; - } - - return GL_TRUE; -} - -static GLuint translate_rgb_op(GLuint opcode) +static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; @@ -106,7 +81,7 @@ static GLuint translate_rgb_op(GLuint opcode) } } -static GLuint translate_alpha_op(GLuint opcode) +static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R500_ALPHA_OP_CMP; @@ -189,8 +164,8 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) int ip = ++code->inst_end; - code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode); - code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode); + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) code->inst[ip].inst0 = R500_INST_TYPE_OUT; @@ -202,7 +177,7 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); if (inst->Alpha.DepthWriteMask) { code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; - c->fp->writes_depth = GL_TRUE; + c->code->writes_depth = GL_TRUE; } code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); @@ -234,19 +209,19 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) return GL_TRUE; } -static GLuint translate_strq_swizzle(struct prog_src_register src) +static GLuint translate_strq_swizzle(GLuint swizzle) { GLuint swiz = 0; int i; for (i = 0; i < 4; i++) - swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2; + swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; return swiz; } /** * Emit a single TEX instruction */ -static GLboolean emit_tex(void *data, struct prog_instruction *inst) +static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst) { PROG_CODE; @@ -258,7 +233,7 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst) int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_TEX - | (inst->DstReg.WriteMask << 11) + | (inst->WriteMask << 11) | R500_INST_TEX_SEM_WAIT; code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; @@ -267,25 +242,25 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst) code->inst[ip].inst1 |= R500_TEX_UNSCALED; switch (inst->Opcode) { - case OPCODE_KIL: + case RADEON_OPCODE_KIL: code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; break; - case OPCODE_TEX: + case RADEON_OPCODE_TEX: code->inst[ip].inst1 |= R500_TEX_INST_LD; break; - case OPCODE_TXB: + case RADEON_OPCODE_TXB: code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; break; - case OPCODE_TXP: + case RADEON_OPCODE_TXP: code->inst[ip].inst1 |= R500_TEX_INST_PROJ; break; default: error("emit_tex can't handle opcode %x\n", inst->Opcode); } - code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) - | (translate_strq_swizzle(inst->SrcReg[0]) << 8) - | R500_TEX_DST_ADDR(inst->DstReg.Index) + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex) + | (translate_strq_swizzle(inst->SrcSwizzle) << 8) + | R500_TEX_DST_ADDR(inst->DestIndex) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; @@ -293,35 +268,32 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst) } static const struct radeon_pair_handler pair_handler = { - .EmitConst = emit_const, .EmitPaired = emit_paired, .EmitTex = emit_tex, .MaxHwTemps = 128 }; -GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { - struct r500_fragment_program_code *code = &compiler->code->r500; + struct r500_fragment_program_code *code = &compiler->code->code.r500; _mesa_bzero(code, sizeof(*code)); code->max_temp_idx = 1; - code->inst_offset = 0; code->inst_end = -1; - if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) - return GL_FALSE; + radeonPairProgram(compiler, &pair_handler, compiler); + if (compiler->Base.Error) + return; if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { /* This may happen when dead-code elimination is disabled or * when most of the fragment program logic is leading to a KIL */ if (code->inst_end >= 511) { - error("Introducing fake OUT: Too many instructions"); - return GL_FALSE; + rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); + return; } int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } - - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c new file mode 100644 index 0000000000..c7923004df --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" + +#include "radeon_code.h" + +void rc_constants_init(struct rc_constant_list * c) +{ + memset(c, 0, sizeof(*c)); +} + +/** + * Copy a constants structure, assuming that the destination structure + * is not initialized. + */ +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) +{ + dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); + memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); + dst->Count = src->Count; + dst->_Reserved = src->Count; +} + +void rc_constants_destroy(struct rc_constant_list * c) +{ + free(c->Constants); + memset(c, 0, sizeof(*c)); +} + +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) +{ + unsigned index = c->Count; + + if (c->Count >= c->_Reserved) { + struct rc_constant * newlist; + + c->_Reserved = c->_Reserved * 2; + if (!c->_Reserved) + c->_Reserved = 16; + + newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); + memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); + + free(c->Constants); + c->Constants = newlist; + } + + c->Constants[index] = *constant; + c->Count++; + + return index; +} + + +/** + * Add a state vector to the constant list, while trying to avoid duplicates. + */ +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_STATE) { + if (c->Constants[index].u.State[0] == state0 && + c->Constants[index].u.State[1] == state1) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_STATE; + constant.Size = 4; + constant.u.State[0] = state0; + constant.u.State[1] = state1; + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate vector to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + memcpy(constant.u.Immediate, data, sizeof(float) * 4); + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate scalar to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) +{ + unsigned index; + int free_index = -1; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + for(unsigned comp = 0; comp < c->Constants[index].Size; ++comp) { + if (c->Constants[index].u.Immediate[comp] == data) { + *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + return index; + } + } + + if (c->Constants[index].Size < 4) + free_index = index; + } + } + + if (free_index >= 0) { + unsigned comp = c->Constants[free_index].Size++; + c->Constants[free_index].u.Immediate[comp] = data; + *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + return free_index; + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 1; + constant.u.Immediate[0] = data; + *swizzle = SWIZZLE_XXXX; + + return rc_constants_add(c, &constant); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h new file mode 100644 index 0000000000..3e88554ba1 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -0,0 +1,207 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_CODE_H +#define RADEON_CODE_H + +#include <stdint.h> + +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 + +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 + + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) + +enum { + /** + * External constants are constants whose meaning is unknown to this + * compiler. For example, a Mesa gl_program's constants are turned + * into external constants. + */ + RC_CONSTANT_EXTERNAL = 0, + + RC_CONSTANT_IMMEDIATE, + + /** + * Constant referring to state that is known by this compiler, + * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. + */ + RC_CONSTANT_STATE +}; + +enum { + RC_STATE_SHADOW_AMBIENT = 0, + + RC_STATE_R300_WINDOW_DIMENSION, + RC_STATE_R300_TEXRECT_FACTOR +}; + +struct rc_constant { + unsigned Type:2; /**< RC_CONSTANT_xxx */ + unsigned Size:3; + + union { + unsigned External; + float Immediate[4]; + unsigned State[2]; + } u; +}; + +struct rc_constant_list { + struct rc_constant * Constants; + unsigned Count; + + unsigned _Reserved; +}; + +void rc_constants_init(struct rc_constant_list * c); +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); +void rc_constants_destroy(struct rc_constant_list * c); +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + unsigned depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + unsigned texture_compare_func : 3; + } unit[16]; +}; + + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + int length; /**< total # of texture instructions used */ + uint32_t inst[R300_PFS_MAX_TEX_INST]; + } tex; + + struct { + int length; /**< total # of ALU instructions used */ + struct { + uint32_t rgb_inst; + uint32_t rgb_addr; + uint32_t alpha_inst; + uint32_t alpha_addr; + } inst[R300_PFS_MAX_ALU_INST]; + } alu; + + uint32_t config; /* US_CONFIG */ + uint32_t pixsize; /* US_PIXSIZE */ + uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t code_addr[4]; /* US_CODE_ADDR */ +}; + + +struct r500_fragment_program_code { + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } inst[R500_PFS_MAX_INST]; + + int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ + + int max_temp_idx; +}; + +struct rX00_fragment_program_code { + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; + + unsigned writes_depth:1; + + struct rc_constant_list constants; +}; + + +#define VSF_MAX_FRAGMENT_LENGTH (255*4) +#define VSF_MAX_FRAGMENT_TEMPS (14) + +#define VSF_MAX_INPUTS 32 +#define VSF_MAX_OUTPUTS 32 + +struct r300_vertex_program_code { + int length; + union { + uint32_t d[VSF_MAX_FRAGMENT_LENGTH]; + float f[VSF_MAX_FRAGMENT_LENGTH]; + } body; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VSF_MAX_INPUTS]; + int outputs[VSF_MAX_OUTPUTS]; + + struct rc_constant_list constants; + + uint32_t InputsRead; + uint32_t OutputsWritten; +}; + +void r300_vertex_program_dump(struct r300_vertex_program_code * vs); + +#endif /* RADEON_CODE_H */ + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c new file mode 100644 index 0000000000..da950d5289 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -0,0 +1,262 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdarg.h> + +#include "radeon_program.h" + + +void rc_init(struct radeon_compiler * c) +{ + memset(c, 0, sizeof(*c)); + + memory_pool_init(&c->Pool); + c->Program.Instructions.Prev = &c->Program.Instructions; + c->Program.Instructions.Next = &c->Program.Instructions; + c->Program.Instructions.I.Opcode = OPCODE_END; +} + +void rc_destroy(struct radeon_compiler * c) +{ + rc_constants_destroy(&c->Program.Constants); + memory_pool_destroy(&c->Pool); + free(c->ErrorMsg); +} + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + if (!c->Debug) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void rc_error(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + c->Error = GL_TRUE; + + if (!c->ErrorMsg) { + /* Only remember the first error */ + char buf[1024]; + int written; + + va_start(ap, fmt); + written = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (written < sizeof(buf)) { + c->ErrorMsg = strdup(buf); + } else { + c->ErrorMsg = malloc(written + 1); + + va_start(ap, fmt); + vsnprintf(c->ErrorMsg, written + 1, fmt, ap); + va_end(ap); + } + } + + if (c->Debug) { + fprintf(stderr, "r300compiler error: "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } +} + +/** + * Rewrite the program such that everything that source the given input + * register will source new_input instead. + */ +void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input) +{ + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << input); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + unsigned i; + + for(i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) { + inst->I.SrcReg[i].File = new_input.File; + inst->I.SrcReg[i].Index = new_input.Index; + inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle); + if (!inst->I.SrcReg[i].Abs) { + inst->I.SrcReg[i].Negate ^= new_input.Negate; + inst->I.SrcReg[i].Abs = new_input.Abs; + } + + c->Program.InputsRead |= 1 << new_input.Index; + } + } + } +} + + +/** + * Rewrite the program such that everything that writes into the given + * output register will instead write to new_output. The new_output + * writemask is honoured. + */ +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) +{ + struct rc_instruction * inst; + + c->Program.OutputsWritten &= ~(1 << output); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { + inst->I.DstReg.Index = new_output; + inst->I.DstReg.WriteMask &= writemask; + + c->Program.OutputsWritten |= 1 << new_output; + } + } + } +} + + +/** + * Rewrite the program such that a given output is duplicated. + */ +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) +{ + unsigned tempreg = rc_find_free_temporary(c); + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = tempreg; + } + } + } + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = output; + + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = tempreg; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = dup_output; + + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = tempreg; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + c->Program.OutputsWritten |= 1 << dup_output; +} + + +/** + * Introduce standard code fragment to deal with fragment.position. + */ +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input) +{ + unsigned tempregi = rc_find_free_temporary(c); + + c->Program.InputsRead &= ~(1 << wpos); + c->Program.InputsRead |= 1 << new_input; + + /* perspective divide */ + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_rcp->I.Opcode = OPCODE_RCP; + + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = tempregi; + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + + inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT; + inst_rcp->I.SrcReg[0].Index = new_input; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp); + inst_mul->I.Opcode = OPCODE_MUL; + + inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mul->I.DstReg.Index = tempregi; + inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ; + + inst_mul->I.SrcReg[0].File = PROGRAM_INPUT; + inst_mul->I.SrcReg[0].Index = new_input; + + inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mul->I.SrcReg[1].Index = tempregi; + inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + + /* viewport transformation */ + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->I.Opcode = OPCODE_MAD; + + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = tempregi; + inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ; + + inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[0].Index = tempregi; + inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR; + inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR; + inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index; + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + struct rc_instruction * inst; + for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + unsigned i; + + for(i = 0; i < numsrcs; i++) { + if (inst->I.SrcReg[i].File == PROGRAM_INPUT && + inst->I.SrcReg[i].Index == wpos) { + inst->I.SrcReg[i].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[i].Index = tempregi; + } + } + } +} + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h new file mode 100644 index 0000000000..e63ab8840a --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -0,0 +1,108 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_COMPILER_H +#define RADEON_COMPILER_H + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" + +#include "memory_pool.h" +#include "radeon_code.h" + + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + struct prog_instruction I; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; +}; + +struct radeon_compiler { + struct memory_pool Pool; + struct rc_program Program; + unsigned Debug:1; + unsigned Error:1; + char * ErrorMsg; +}; + +void rc_init(struct radeon_compiler * c); +void rc_destroy(struct radeon_compiler * c); + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...); +void rc_error(struct radeon_compiler * c, const char * fmt, ...); + +void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program); + +void rc_calculate_inputs_outputs(struct radeon_compiler * c); + +void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input); +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input); + +struct r300_fragment_program_compiler { + struct radeon_compiler Base; + struct rX00_fragment_program_code *code; + struct r300_fragment_program_external_state state; + unsigned is_r500; + unsigned OutputDepth; + unsigned OutputColor; + + void * UserData; + void (*AllocateHwInputs)( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata); +}; + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + + +struct r300_vertex_program_compiler { + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + GLbitfield RequiredOutputs; + + void * UserData; + void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); + +#endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c index 840c9733b1..aaaa50ad1f 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c @@ -36,6 +36,8 @@ #include "radeon_nqssadce.h" +#include "radeon_compiler.h" + /** * Return the @ref register_state for the given register (or 0 for untracked @@ -76,9 +78,10 @@ struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register s } -static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, - struct prog_instruction *inst, GLint src, GLuint sourced) +static void track_used_srcreg(struct nqssadce_state* s, + GLint src, GLuint sourced) { + struct prog_instruction * inst = &s->IP->I; int i; GLuint deswz_source = 0; @@ -95,12 +98,11 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { struct prog_dst_register dstreg = inst->DstReg; dstreg.File = PROGRAM_TEMPORARY; - dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + dstreg.Index = rc_find_free_temporary(s->Compiler); dstreg.WriteMask = sourced; s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); - inst = s->Program->Instructions + s->IP; inst->SrcReg[src].File = PROGRAM_TEMPORARY; inst->SrcReg[src].Index = dstreg.Index; inst->SrcReg[src].Swizzle = 0; @@ -117,37 +119,36 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, struct register_state *regstate; - if (inst->SrcReg[src].RelAddr) + if (inst->SrcReg[src].RelAddr) { regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); - else + if (regstate) + regstate->Sourced |= WRITEMASK_X; + } else { regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); - - if (regstate) - regstate->Sourced |= deswz_source & 0xf; - - return inst; + if (regstate) + regstate->Sourced |= deswz_source & 0xf; + } } -static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) +static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex) { - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode); int i; for(i = 0; i < nsrc; ++i) - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) - inst->SrcReg[i].Index = newindex; + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex) + inst->I.SrcReg[i].Index = newindex; } static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) { - GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); - int ip; - for(ip = 0; ip < s->IP; ++ip) { - struct prog_instruction* inst = s->Program->Instructions + ip; - if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) - inst->DstReg.Index = newindex; + GLuint newindex = rc_find_free_temporary(s->Compiler); + struct rc_instruction * inst; + for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex) + inst->I.DstReg.Index = newindex; unalias_srcregs(inst, oldindex, newindex); } - unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); + unalias_srcregs(s->IP, oldindex, newindex); } @@ -156,7 +157,8 @@ static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) */ static void process_instruction(struct nqssadce_state* s) { - struct prog_instruction *inst = s->Program->Instructions + s->IP; + struct prog_instruction *inst = &s->IP->I; + GLuint WriteMask; if (inst->Opcode == OPCODE_END) return; @@ -164,7 +166,7 @@ static void process_instruction(struct nqssadce_state* s) if (inst->Opcode != OPCODE_KIL) { struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); if (!regstate) { - _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", + rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n", inst->DstReg.File, inst->DstReg.Index); return; } @@ -173,7 +175,9 @@ static void process_instruction(struct nqssadce_state* s) regstate->Sourced &= ~inst->DstReg.WriteMask; if (inst->DstReg.WriteMask == 0) { - _mesa_delete_instructions(s->Program, s->IP, 1); + struct rc_instruction * inst_remove = s->IP; + s->IP = s->IP->Prev; + rc_remove_instruction(inst_remove); return; } @@ -181,16 +185,15 @@ static void process_instruction(struct nqssadce_state* s) unalias_temporary(s, inst->DstReg.Index); } - /* Attention: Due to swizzle emulation code, the following - * might change the instruction stream under us, so we have - * to be careful with the inst pointer. */ + WriteMask = inst->DstReg.WriteMask; + switch (inst->Opcode) { case OPCODE_ARL: case OPCODE_DDX: case OPCODE_DDY: case OPCODE_FRC: case OPCODE_MOV: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + track_used_srcreg(s, 0, WriteMask); break; case OPCODE_ADD: case OPCODE_MAX: @@ -198,14 +201,14 @@ static void process_instruction(struct nqssadce_state* s) case OPCODE_MUL: case OPCODE_SGE: case OPCODE_SLT: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + track_used_srcreg(s, 0, WriteMask); + track_used_srcreg(s, 1, WriteMask); break; case OPCODE_CMP: case OPCODE_MAD: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); + track_used_srcreg(s, 0, WriteMask); + track_used_srcreg(s, 1, WriteMask); + track_used_srcreg(s, 2, WriteMask); break; case OPCODE_COS: case OPCODE_EX2: @@ -213,83 +216,79 @@ static void process_instruction(struct nqssadce_state* s) case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_SIN: - inst = track_used_srcreg(s, inst, 0, 0x1); + track_used_srcreg(s, 0, 0x1); break; case OPCODE_DP3: - inst = track_used_srcreg(s, inst, 0, 0x7); - inst = track_used_srcreg(s, inst, 1, 0x7); + track_used_srcreg(s, 0, 0x7); + track_used_srcreg(s, 1, 0x7); break; case OPCODE_DP4: - inst = track_used_srcreg(s, inst, 0, 0xf); - inst = track_used_srcreg(s, inst, 1, 0xf); + track_used_srcreg(s, 0, 0xf); + track_used_srcreg(s, 1, 0xf); break; case OPCODE_KIL: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: - inst = track_used_srcreg(s, inst, 0, 0xf); + track_used_srcreg(s, 0, 0xf); break; case OPCODE_DST: - inst = track_used_srcreg(s, inst, 0, 0x6); - inst = track_used_srcreg(s, inst, 1, 0xa); + track_used_srcreg(s, 0, 0x6); + track_used_srcreg(s, 1, 0xa); break; case OPCODE_EXP: case OPCODE_LOG: case OPCODE_POW: - inst = track_used_srcreg(s, inst, 0, 0x3); + track_used_srcreg(s, 0, 0x3); break; case OPCODE_LIT: - inst = track_used_srcreg(s, inst, 0, 0xb); + track_used_srcreg(s, 0, 0xb); break; default: - _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); + rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode); return; } + + s->IP = s->IP->Prev; } -static void calculateInputsOutputs(struct gl_program *p) +void rc_calculate_inputs_outputs(struct radeon_compiler * c) { - struct prog_instruction *inst; - GLuint InputsRead, OutputsWritten; + struct rc_instruction *inst; - inst = p->Instructions; - InputsRead = 0; - OutputsWritten = 0; - while (inst->Opcode != OPCODE_END) + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - int i, num_src_regs; + int i; + int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode); - num_src_regs = _mesa_num_inst_src_regs(inst->Opcode); for (i = 0; i < num_src_regs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT) - InputsRead |= 1 << inst->SrcReg[i].Index; + if (inst->I.SrcReg[i].File == PROGRAM_INPUT) + c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index; } - if (inst->DstReg.File == PROGRAM_OUTPUT) - OutputsWritten |= 1 << inst->DstReg.Index; - - ++inst; + if (_mesa_num_inst_dst_regs(inst->I.Opcode)) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index; + } } - - p->InputsRead = InputsRead; - p->OutputsWritten = OutputsWritten; } -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) +void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data) { struct nqssadce_state s; _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = p; + s.Compiler = c; s.Descr = descr; + s.UserData = data; s.Descr->Init(&s); - s.IP = p->NumInstructions; + s.IP = c->Program.Instructions.Prev; - while(s.IP > 0) { - s.IP--; + while(s.IP != &c->Program.Instructions && !c->Error) process_instruction(&s); - } - calculateInputsOutputs(p); + rc_calculate_inputs_outputs(c); } diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h index 8626f21c25..b3fc77a35a 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h @@ -30,7 +30,6 @@ #include "radeon_program.h" - struct register_state { /** * Bitmask indicating which components of the register are sourced @@ -44,14 +43,13 @@ struct register_state { * read from, etc. */ struct nqssadce_state { - GLcontext *Ctx; - struct gl_program *Program; + struct radeon_compiler *Compiler; struct radeon_nqssadce_descr *Descr; /** * All instructions after this instruction pointer have been dealt with. */ - int IP; + struct rc_instruction * IP; /** * Which registers are read by subsequent instructions? @@ -59,6 +57,8 @@ struct nqssadce_state { struct register_state Temps[MAX_PROGRAM_TEMPS]; struct register_state Outputs[VERT_RESULT_MAX]; struct register_state Address; + + void * UserData; }; @@ -83,11 +83,9 @@ struct radeon_nqssadce_descr { * The transformation will work recursively on the emitted instruction(s). */ void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - - void *Data; }; -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); +void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data); struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); #endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c new file mode 100644 index 0000000000..b636f90a96 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include "radeon_compiler.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void radeonLocalTransform( + struct radeon_compiler * c, + int num_transformations, + struct radeon_program_transformation* transformations) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + + while(inst != &c->Program.Instructions) { + struct rc_instruction * current = inst; + int i; + + inst = inst->Next; + + for(i = 0; i < num_transformations; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(c, current, t->userData)) + break; + } + } +} + + +GLint rc_find_free_temporary(struct radeon_compiler * c) +{ + GLboolean used[MAX_PROGRAM_TEMPS]; + GLuint i; + + memset(used, 0, sizeof(used)); + + for (struct rc_instruction * rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { + const struct prog_instruction *inst = &rcinst->I; + const GLuint nsrc = _mesa_num_inst_src_regs(inst->Opcode); + const GLuint ndst = _mesa_num_inst_dst_regs(inst->Opcode); + GLuint k; + + for (k = 0; k < nsrc; k++) { + if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) + used[inst->SrcReg[k].Index] = GL_TRUE; + } + + if (ndst) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) + used[inst->DstReg.Index] = GL_TRUE; + } + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!used[i]) + return i; + } + + return -1; +} + + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) +{ + struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); + + inst->Prev = 0; + inst->Next = 0; + + _mesa_init_instructions(&inst->I, 1); + + return inst; +} + + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ + struct rc_instruction * inst = rc_alloc_instruction(c); + + inst->Prev = after; + inst->Next = after->Next; + + inst->Prev->Next = inst; + inst->Next->Prev = inst; + + return inst; +} + +void rc_remove_instruction(struct rc_instruction * inst) +{ + inst->Prev->Next = inst->Next; + inst->Next->Prev = inst->Prev; +} + + +void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program) +{ + struct prog_instruction *source; + unsigned int i; + + for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) { + struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + dest->I = *source; + } + + c->Program.ShadowSamplers = program->ShadowSamplers; + c->Program.InputsRead = program->InputsRead; + c->Program.OutputsWritten = program->OutputsWritten; + + int isNVProgram = 0; + + if (program->Target == GL_VERTEX_PROGRAM_ARB) { + struct gl_vertex_program * vp = (struct gl_vertex_program *) program; + isNVProgram = vp->IsNVProgram; + } + + if (isNVProgram) { + /* NV_vertex_program has a fixed-sized constant environment. + * This could be handled more efficiently for programs that + * do not use relative addressing. + */ + for(i = 0; i < 96; ++i) { + struct rc_constant constant; + + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + + rc_constants_add(&c->Program.Constants, &constant); + } + } else { + for(i = 0; i < program->Parameters->NumParameters; ++i) { + struct rc_constant constant; + + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + + rc_constants_add(&c->Program.Constants, &constant); + } + } +} + + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog) +{ + GLuint indent = 0; + GLuint linenum = 1; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program\n"); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + /* Massive hack: We rely on the fact that the printers do not actually + * use the gl_program argument (last argument) in debug mode */ + indent = _mesa_fprint_instruction_opt( + stderr, &inst->I, + indent, PROG_PRINT_DEBUG, 0); + + linenum++; + } +} diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index 88474d43a2..561958608c 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -34,12 +34,9 @@ #include "shader/program.h" #include "shader/prog_instruction.h" - -enum { - CLAUSE_MIXED = 0, - CLAUSE_ALU, - CLAUSE_TEX -}; +struct radeon_compiler; +struct rc_instruction; +struct rc_program; enum { PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ @@ -83,19 +80,12 @@ static inline GLuint combine_swizzles(GLuint src, GLuint swz) return ret; } +static INLINE void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} -/** - * Transformation context that is passed to local transformations. - * - * Care must be taken with some operations during transformation, - * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary - */ -struct radeon_transform_context { - GLcontext *Ctx; - struct gl_program *Program; - struct prog_instruction *OldInstructions; - GLuint OldNumInstructions; -}; /** * A transformation that can be passed to \ref radeonLocalTransform. @@ -109,23 +99,23 @@ struct radeon_transform_context { */ struct radeon_program_transformation { GLboolean (*function)( - struct radeon_transform_context*, - struct prog_instruction*, + struct radeon_compiler*, + struct rc_instruction*, void*); void *userData; }; void radeonLocalTransform( - GLcontext* ctx, - struct gl_program *program, + struct radeon_compiler *c, int num_transformations, struct radeon_program_transformation* transformations); -/** - * Find a usable free temporary register during program transformation - */ -GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx); +GLint rc_find_free_temporary(struct radeon_compiler * c); + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_remove_instruction(struct rc_instruction * inst); -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count); +void rc_print_program(const struct rc_program *prog); #endif diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 8283723bad..f23ce301ca 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -35,49 +35,52 @@ #include "radeon_program_alu.h" -#include "shader/prog_parameter.h" +#include "radeon_compiler.h" -static struct prog_instruction *emit1(struct gl_program* p, +static struct rc_instruction *emit1( + struct radeon_compiler * c, struct rc_instruction * after, gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, struct prog_src_register SrcReg) { - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg; + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg; return fpi; } -static struct prog_instruction *emit2(struct gl_program* p, +static struct rc_instruction *emit2( + struct radeon_compiler * c, struct rc_instruction * after, gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) { - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg0; + fpi->I.SrcReg[1] = SrcReg1; return fpi; } -static struct prog_instruction *emit3(struct gl_program* p, +static struct rc_instruction *emit3( + struct radeon_compiler * c, struct rc_instruction * after, gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, struct prog_src_register SrcReg2) { - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; - fpi->SrcReg[2] = SrcReg2; + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg0; + fpi->I.SrcReg[1] = SrcReg1; + fpi->I.SrcReg[2] = SrcReg2; return fpi; } @@ -88,6 +91,7 @@ static struct prog_dst_register dstreg(int file, int index) dst.Index = index; dst.WriteMask = WRITEMASK_XYZW; dst.CondMask = COND_TR; + dst.RelAddr = 0; dst.CondSwizzle = SWIZZLE_NOOP; dst.CondSrc = 0; dst.pad = 0; @@ -96,10 +100,11 @@ static struct prog_dst_register dstreg(int file, int index) static struct prog_dst_register dstregtmpmask(int index, int mask) { - struct prog_dst_register dst; + struct prog_dst_register dst = {0}; dst.File = PROGRAM_TEMPORARY; dst.Index = index; dst.WriteMask = mask; + dst.RelAddr = 0; dst.CondMask = COND_TR; dst.CondSwizzle = SWIZZLE_NOOP; dst.CondSrc = 0; @@ -171,44 +176,63 @@ static struct prog_src_register scalar(struct prog_src_register reg) return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); } -static void transform_ABS(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) { - struct prog_src_register src = inst->SrcReg[0]; + struct prog_src_register src = inst->I.SrcReg[0]; src.Abs = 1; src.Negate = NEGATE_NONE; - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); + emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src); + rc_remove_instruction(inst); } -static void transform_DPH(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) { - struct prog_src_register src0 = inst->SrcReg[0]; + struct prog_src_register src0 = inst->I.SrcReg[0]; + struct prog_src_register src1 = inst->I.SrcReg[1]; + src0.Negate &= ~NEGATE_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~NEGATE_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_DPH(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct prog_src_register src0 = inst->I.SrcReg[0]; src0.Negate &= ~NEGATE_W; src0.Swizzle &= ~(7 << (3 * 3)); src0.Swizzle |= SWIZZLE_ONE << (3 * 3); - emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); + emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]); + rc_remove_instruction(inst); } /** * [1, src0.y*src1.y, src0.z, src1.w] * So basically MUL with lotsa swizzling. */ -static void transform_DST(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_DST(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), - swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); + emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg, + swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), + swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); + rc_remove_instruction(inst); } -static void transform_FLR(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_FLR(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); - emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + int tempreg = rc_find_free_temporary(c); + emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]); + emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg, + inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + rc_remove_instruction(inst); } /** @@ -229,152 +253,159 @@ static void transform_FLR(struct radeon_transform_context* t, * 5 slots, if the subsequent optimization passes are clever enough * to pair instructions correctly. */ -static void transform_LIT(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) { - static const GLfloat LitConst[4] = { -127.999999 }; - GLuint constant; GLuint constant_swizzle; GLuint temp; - int needTemporary = 0; struct prog_src_register srctemp; - constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); - if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { - needTemporary = 1; - } else if (inst->DstReg.File != PROGRAM_TEMPORARY) { - // LIT is typically followed by DP3/DP4, so there's no point - // in creating special code for this case - needTemporary = 1; - } + if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) { + struct rc_instruction * inst_mov; - if (needTemporary) { - temp = radeonFindFreeTemporary(t); - } else { - temp = inst->DstReg.Index; + inst_mov = emit1(c, inst, + OPCODE_MOV, 0, inst->I.DstReg, + srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c))); + + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; } + + temp = inst->I.DstReg.Index; srctemp = srcreg(PROGRAM_TEMPORARY, temp); // tmp.x = max(0.0, Src.x); // tmp.y = max(0.0, Src.y); // tmp.w = clamp(Src.z, -128+eps, 128-eps); - emit2(t->Program, OPCODE_MAX, 0, + emit2(c, inst->Prev, OPCODE_MAX, 0, dstregtmpmask(temp, WRITEMASK_XYW), - inst->SrcReg[0], + inst->I.SrcReg[0], swizzle(srcreg(PROGRAM_CONSTANT, constant), SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); - emit2(t->Program, OPCODE_MIN, 0, + emit2(c, inst->Prev, OPCODE_MIN, 0, dstregtmpmask(temp, WRITEMASK_Z), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); // tmp.w = Pow(tmp.y, tmp.w) - emit1(t->Program, OPCODE_LG2, 0, + emit1(c, inst->Prev, OPCODE_LG2, 0, dstregtmpmask(temp, WRITEMASK_W), swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit2(t->Program, OPCODE_MUL, 0, + emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); - emit1(t->Program, OPCODE_EX2, 0, + emit1(c, inst->Prev, OPCODE_EX2, 0, dstregtmpmask(temp, WRITEMASK_W), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, dstregtmpmask(temp, WRITEMASK_Z), negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), builtin_zero); // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, + emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, dstregtmpmask(temp, WRITEMASK_XYW), swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); - if (needTemporary) - emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp); + rc_remove_instruction(inst); } -static void transform_LRP(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_LRP(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_ADD, 0, + emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - inst->SrcReg[1], negate(inst->SrcReg[2])); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, - inst->DstReg, - inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]); + inst->I.SrcReg[1], negate(inst->I.SrcReg[2])); + emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, + inst->I.DstReg, + inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]); + + rc_remove_instruction(inst); } -static void transform_POW(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_POW(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); tempdst.WriteMask = WRITEMASK_W; tempsrc.Swizzle = SWIZZLE_WWWW; - emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0])); - emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1])); - emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc); + emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0])); + emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1])); + emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc); + + rc_remove_instruction(inst); } -static void transform_RSQ(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_RSQ(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0])); + inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]); } -static void transform_SGE(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SGE(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); + + rc_remove_instruction(inst); } -static void transform_SLT(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SLT(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); + + rc_remove_instruction(inst); } -static void transform_SUB(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SUB(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); + inst->I.Opcode = OPCODE_ADD; + inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]); } -static void transform_SWZ(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SWZ(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]); + inst->I.Opcode = OPCODE_MOV; } -static void transform_XPD(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_XPD(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), + swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); + emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg, + swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), + swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + + rc_remove_instruction(inst); } @@ -392,31 +423,64 @@ static void transform_XPD(struct radeon_transform_context* t, * * @note should be applicable to R300 and R500 fragment programs. */ -GLboolean radeonTransformALU(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction* inst, void* unused) { - switch(inst->Opcode) { - case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; - case OPCODE_DST: transform_DST(t, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; - case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; - case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; - case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE; - case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; - case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; + switch(inst->I.Opcode) { + case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; + case OPCODE_DST: transform_DST(c, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; + case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; + case OPCODE_POW: transform_POW(c, inst); return GL_TRUE; + case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE; + case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE; + case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; default: return GL_FALSE; } } -static void sincos_constants(struct radeon_transform_context* t, GLuint *constants) +static void transform_r300_vertex_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Note: r500 can take absolute values, but r300 cannot. */ + inst->I.Opcode = OPCODE_MAX; + inst->I.SrcReg[1] = inst->I.SrcReg[0]; + inst->I.SrcReg[1].Negate ^= NEGATE_XYZW; +} + +/** + * For use with radeonLocalTransform, this transforms non-native ALU + * instructions of the r300 up to r500 vertex engine. + */ +GLboolean r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->I.Opcode) { + case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE; + case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; + default: + return GL_FALSE; + } +} + +static void sincos_constants(struct radeon_compiler* c, GLuint *constants) { static const GLfloat SinCosConsts[2][4] = { { @@ -434,11 +498,8 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan }; int i; - for(i = 0; i < 2; ++i) { - GLuint swz; - constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); - ASSERT(swz == SWIZZLE_NOOP); - } + for(i = 0; i < 2; ++i) + constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); } /** @@ -449,23 +510,24 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x * MAD dest, tmp.y, weight, tmp.x */ -static void sin_approx(struct radeon_transform_context* t, +static void sin_approx( + struct radeon_compiler* c, struct rc_instruction * before, struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) { - GLuint tempreg = radeonFindFreeTemporary(t); + GLuint tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + emit2(c, before->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), srcreg(PROGRAM_CONSTANT, constants[0])); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), + emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), + emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); - emit3(t->Program, OPCODE_MAD, 0, dst, + emit3(c, before->Prev, OPCODE_MAD, 0, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); @@ -476,78 +538,80 @@ static void sin_approx(struct radeon_transform_context* t, * using only the basic instructions * MOV, ADD, MUL, MAD, FRC */ -GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformTrigSimple(struct radeon_compiler* c, + struct rc_instruction* inst, void* unused) { - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) + if (inst->I.Opcode != OPCODE_COS && + inst->I.Opcode != OPCODE_SIN && + inst->I.Opcode != OPCODE_SCS) return GL_FALSE; GLuint constants[2]; - GLuint tempreg = radeonFindFreeTemporary(t); + GLuint tempreg = rc_find_free_temporary(c); - sincos_constants(t, constants); + sincos_constants(c, constants); - if (inst->Opcode == OPCODE_COS) { + if (inst->I.Opcode == OPCODE_COS) { // MAD tmp.x, src, 1/(2*PI), 0.75 // FRC tmp.x, tmp.x // MAD tmp.z, tmp.x, 2*PI, -PI - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - sin_approx(t, inst->DstReg, + sin_approx(c, inst, inst->I.DstReg, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), constants); - } else if (inst->Opcode == OPCODE_SIN) { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + } else if (inst->I.Opcode == OPCODE_SIN) { + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - sin_approx(t, inst->DstReg, + sin_approx(c, inst, inst->I.DstReg, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), constants); } else { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), srcreg(PROGRAM_TEMPORARY, tempreg)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), srcreg(PROGRAM_TEMPORARY, tempreg), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - struct prog_dst_register dst = inst->DstReg; + struct prog_dst_register dst = inst->I.DstReg; - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; - sin_approx(t, dst, + dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X; + sin_approx(c, inst, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), constants); - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; - sin_approx(t, dst, + dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y; + sin_approx(c, inst, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), constants); } + rc_remove_instruction(inst); + return GL_TRUE; } @@ -560,50 +624,52 @@ GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, * * @warning This transformation implicitly changes the semantics of SIN and COS! */ -GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformTrigScale(struct radeon_compiler* c, + struct rc_instruction* inst, void* unused) { - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) + if (inst->I.Opcode != OPCODE_COS && + inst->I.Opcode != OPCODE_SIN && + inst->I.Opcode != OPCODE_SCS) return GL_FALSE; - static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; + static const GLfloat RCP_2PI = 0.15915494309189535; GLuint temp; GLuint constant; GLuint constant_swizzle; - temp = radeonFindFreeTemporary(t); - constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), srcreg(PROGRAM_TEMPORARY, temp)); - if (inst->Opcode == OPCODE_COS) { - emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg, + if (inst->I.Opcode == OPCODE_COS) { + emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SIN) { - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, - inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SCS) { - struct prog_dst_register moddst = inst->DstReg; + } else if (inst->I.Opcode == OPCODE_SIN) { + emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, + inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->I.Opcode == OPCODE_SCS) { + struct prog_dst_register moddst = inst->I.DstReg; - if (inst->DstReg.WriteMask & WRITEMASK_X) { + if (inst->I.DstReg.WriteMask & WRITEMASK_X) { moddst.WriteMask = WRITEMASK_X; - emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst, + emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); } - if (inst->DstReg.WriteMask & WRITEMASK_Y) { + if (inst->I.DstReg.WriteMask & WRITEMASK_Y) { moddst.WriteMask = WRITEMASK_Y; - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst, + emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); } } + rc_remove_instruction(inst); + return GL_TRUE; } @@ -615,21 +681,15 @@ GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, * @warning This explicitly changes the form of DDX and DDY! */ -GLboolean radeonTransformDeriv(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformDeriv(struct radeon_compiler* c, + struct rc_instruction* inst, void* unused) { - if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY) + if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY) return GL_FALSE; - struct prog_src_register B = inst->SrcReg[1]; - - B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, - SWIZZLE_ONE, SWIZZLE_ONE); - B.Negate = NEGATE_XYZW; - - emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], B); + inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); + inst->I.SrcReg[1].Negate = NEGATE_XYZW; return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h index b45958115c..147efec6fc 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h @@ -31,23 +31,28 @@ #include "radeon_program.h" GLboolean radeonTransformALU( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +GLboolean r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction * inst, void*); GLboolean radeonTransformTrigSimple( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, void*); GLboolean radeonTransformTrigScale( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, void*); GLboolean radeonTransformDeriv( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, void*); #endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index d6fb474cf2..4c26db5d24 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -35,17 +35,19 @@ #include "radeon_program_pair.h" -#include "radeon_common.h" - +#include "memory_pool.h" +#include "radeon_compiler.h" #include "shader/prog_print.h" #define error(fmt, args...) do { \ - _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \ + rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \ __FILE__, __FUNCTION__, ##args); \ - s->Error = GL_TRUE; \ } while(0) struct pair_state_instruction { + struct prog_instruction Instruction; + GLuint IP; /**< Position of this instruction in original program */ + GLuint IsTex:1; /**< Is a texture instruction */ GLuint NeedRGB:1; /**< Needs the RGB ALU */ GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ @@ -73,7 +75,7 @@ struct pair_state_instruction { * Used to keep track of which instructions read a value. */ struct reg_value_reader { - GLuint IP; /**< IP of the instruction that performs this access */ + struct pair_state_instruction *Reader; struct reg_value_reader *Next; }; @@ -82,7 +84,7 @@ struct reg_value_reader { * PROGRAM_TEMPORARY. */ struct reg_value { - GLuint IP; /**< IP of the instruction that writes this value */ + struct pair_state_instruction *Writer; struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ /** @@ -116,11 +118,8 @@ struct pair_register_translation { }; struct pair_state { - GLcontext *Ctx; - struct gl_program *Program; + struct r300_fragment_program_compiler * Compiler; const struct radeon_pair_handler *Handler; - GLboolean Error; - GLboolean Debug; GLboolean Verbose; void *UserData; @@ -130,11 +129,6 @@ struct pair_state { struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; - /** - * Derived information about program instructions. - */ - struct pair_state_instruction *Instructions; - struct { GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ } HwTemps[128]; @@ -147,14 +141,6 @@ struct pair_state { struct pair_state_instruction *ReadyRGB; struct pair_state_instruction *ReadyAlpha; struct pair_state_instruction *ReadyTEX; - - /** - * Pool of @ref reg_value structures for fast allocation. - */ - struct reg_value *ValuePool; - GLuint ValuePoolUsed; - struct reg_value_reader *ReaderPool; - GLuint ReaderPoolUsed; }; @@ -183,7 +169,7 @@ static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) struct pair_register_translation *t = get_register(s, file, index); if (!t) { - _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index); + error("get_hw_reg: %i[%i]\n", file, index); return 0; } @@ -221,15 +207,13 @@ static void add_pairinst_to_list(struct pair_state_instruction **list, struct pa } /** - * The instruction at the given IP has become ready. Link it into the ready + * The given instruction has become ready. Link it into the ready * instructions. */ -static void instruction_ready(struct pair_state *s, int ip) +static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; - if (s->Verbose) - _mesa_printf("instruction_ready(%i)\n", ip); + _mesa_printf("instruction_ready(%i)\n", pairinst->IP); if (pairinst->IsTex) add_pairinst_to_list(&s->ReadyTEX, pairinst); @@ -296,12 +280,12 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) * Classify an instruction according to which ALUs etc. it needs */ static void classify_instruction(struct pair_state *s, - struct prog_instruction *inst, struct pair_state_instruction *pairinst) + struct pair_state_instruction *psi) { - pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; - pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; + psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; + psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; - switch(inst->Opcode) { + switch(psi->Instruction.Opcode) { case OPCODE_ADD: case OPCODE_CMP: case OPCODE_DDX: @@ -319,24 +303,24 @@ static void classify_instruction(struct pair_state *s, case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_SIN: - pairinst->IsTranscendent = 1; - pairinst->NeedAlpha = 1; + psi->IsTranscendent = 1; + psi->NeedAlpha = 1; break; case OPCODE_DP4: - pairinst->NeedAlpha = 1; + psi->NeedAlpha = 1; /* fall through */ case OPCODE_DP3: - pairinst->NeedRGB = 1; + psi->NeedRGB = 1; break; case OPCODE_KIL: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: case OPCODE_END: - pairinst->IsTex = 1; + psi->IsTex = 1; break; default: - error("Unknown opcode %d\n", inst->Opcode); + error("Unknown opcode %d\n", psi->Instruction.Opcode); break; } } @@ -348,30 +332,34 @@ static void classify_instruction(struct pair_state *s, */ static void scan_instructions(struct pair_state *s) { - struct prog_instruction *inst; - struct pair_state_instruction *pairinst; + struct rc_instruction *source; GLuint ip; - for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; - inst->Opcode != OPCODE_END; - ++inst, ++pairinst, ++ip) { - final_rewrite(s, inst); - classify_instruction(s, inst, pairinst); + for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0; + source != &s->Compiler->Base.Program.Instructions; + source = source->Next, ++ip) { + struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst)); + memset(pairinst, 0, sizeof(struct pair_state_instruction)); + + pairinst->Instruction = source->I; + pairinst->IP = ip; + final_rewrite(s, &pairinst->Instruction); + classify_instruction(s, pairinst); - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode); int j; for(j = 0; j < nsrc; j++) { struct pair_register_translation *t = - get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); + get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index); if (!t) continue; t->RefCount++; - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) { int i; for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); + GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i); if (swz >= 4) continue; /* constant or NIL swizzle */ if (!t->Value[swz]) @@ -381,36 +369,37 @@ static void scan_instructions(struct pair_state *s) * also rewrites the value. The code below adds * a dependency for the DstReg, which is a superset * of the SrcReg dependency. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == inst->SrcReg[j].Index && - GET_BIT(inst->DstReg.WriteMask, swz)) + if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY && + pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index && + GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz)) continue; - struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; + struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r)); pairinst->NumDependencies++; t->Value[swz]->NumReaders++; - r->IP = ip; + r->Reader = pairinst; r->Next = t->Value[swz]->Readers; t->Value[swz]->Readers = r; } } } - int ndst = _mesa_num_inst_dst_regs(inst->Opcode); + int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode); if (ndst) { struct pair_register_translation *t = - get_register(s, inst->DstReg.File, inst->DstReg.Index); + get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index); if (t) { t->RefCount++; - if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) { int j; for(j = 0; j < 4; ++j) { - if (!GET_BIT(inst->DstReg.WriteMask, j)) + if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j)) continue; - struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; - v->IP = ip; + struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v)); + memset(v, 0, sizeof(struct reg_value)); + v->Writer = pairinst; if (t->Value[j]) { pairinst->NumDependencies++; t->Value[j]->Next = v; @@ -426,7 +415,7 @@ static void scan_instructions(struct pair_state *s) _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); if (!pairinst->NumDependencies) - instruction_ready(s, ip); + instruction_ready(s, pairinst); } /* Clear the PROGRAM_TEMPORARY state */ @@ -438,70 +427,23 @@ static void scan_instructions(struct pair_state *s) } -/** - * Reserve hardware temporary registers for the program inputs. - * - * @note This allocation is performed explicitly, because the order of inputs - * is determined by the RS hardware. - */ -static void allocate_input_registers(struct pair_state *s) -{ - GLuint InputsRead = s->Program->InputsRead; - int i; - GLuint hwindex = 0; - - /* Primary colour */ - if (InputsRead & FRAG_BIT_COL0) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); - InputsRead &= ~FRAG_BIT_COL1; - - /* Texcoords */ - for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* Fogcoords treated as a texcoord */ - if (InputsRead & FRAG_BIT_FOGC) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++); - InputsRead &= ~FRAG_BIT_FOGC; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); - InputsRead &= ~FRAG_BIT_WPOS; - - /* Anything else */ - if (InputsRead) - error("Don't know how to handle inputs 0x%x\n", InputsRead); -} - - -static void decrement_dependencies(struct pair_state *s, int ip) +static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; ASSERT(pairinst->NumDependencies > 0); if (!--pairinst->NumDependencies) - instruction_ready(s, ip); + instruction_ready(s, pairinst); } /** * Update the dependency tracking state based on what the instruction * at the given IP does. */ -static void commit_instruction(struct pair_state *s, int ip) +static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst) { - struct prog_instruction *inst = s->Program->Instructions + ip; - struct pair_state_instruction *pairinst = s->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; if (s->Verbose) - _mesa_printf("commit_instruction(%i)\n", ip); + _mesa_printf("commit_instruction(%i)\n", pairinst->IP); if (inst->DstReg.File == PROGRAM_TEMPORARY) { struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; @@ -516,11 +458,11 @@ static void commit_instruction(struct pair_state *s, int ip) if (t->Value[i]->NumReaders) { struct reg_value_reader *r; for(r = pairinst->Values[i]->Readers; r; r = r->Next) - decrement_dependencies(s, r->IP); + decrement_dependencies(s, r->Reader); } else if (t->Value[i]->Next) { /* This happens when the only reader writes * the register at the same time */ - decrement_dependencies(s, t->Value[i]->Next->IP); + decrement_dependencies(s, t->Value[i]->Next->Writer); } } } @@ -554,7 +496,7 @@ static void commit_instruction(struct pair_state *s, int ip) if (!--t->Value[swz]->NumReaders) { if (t->Value[swz]->Next) - decrement_dependencies(s, t->Value[swz]->Next->IP); + decrement_dependencies(s, t->Value[swz]->Next->Writer); } } } @@ -585,36 +527,52 @@ static void emit_all_tex(struct pair_state *s) // Allocate destination hardware registers in one block to avoid conflicts. for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; if (inst->Opcode != OPCODE_KIL) get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); } - if (s->Debug) + if (s->Compiler->Base.Debug) _mesa_printf(" BEGIN_TEX\n"); if (s->Handler->BeginTexBlock) - s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData); + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData); for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; - commit_instruction(s, ip); + struct prog_instruction *inst = &pairinst->Instruction; + commit_instruction(s, pairinst); if (inst->Opcode != OPCODE_KIL) inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); - if (s->Debug) { + if (s->Compiler->Base.Debug) { _mesa_printf(" "); _mesa_print_instruction(inst); - fflush(stdout); + fflush(stderr); + } + + struct radeon_pair_texture_instruction rpti; + + switch(inst->Opcode) { + case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break; + case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break; + case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break; + default: + case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break; } - s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); + + rpti.DestIndex = inst->DstReg.Index; + rpti.WriteMask = inst->DstReg.WriteMask; + rpti.TexSrcUnit = inst->TexSrcUnit; + rpti.TexSrcTarget = inst->TexSrcTarget; + rpti.SrcIndex = inst->SrcReg[0].Index; + rpti.SrcSwizzle = inst->SrcReg[0].Swizzle; + + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti); } - if (s->Debug) + if (s->Compiler->Base.Debug) _mesa_printf(" END_TEX\n"); } @@ -637,7 +595,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio index = get_hw_reg(s, src.File, src.Index); } else { constant = 1; - s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index); + index = src.Index; } for(i = 0; i < 3; ++i) { @@ -684,10 +642,12 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio * Fill the given ALU instruction's opcodes and source operands into the given pair, * if possible. */ -static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +static GLboolean fill_instruction_into_pair( + struct pair_state *s, + struct radeon_pair_instruction *pair, + struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); @@ -768,16 +728,18 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ * we are absolutely certain that we're going to emit a certain * instruction pairing. */ -static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +static void fill_dest_into_pair( + struct pair_state *s, + struct radeon_pair_instruction *pair, + struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; if (inst->DstReg.File == PROGRAM_OUTPUT) { - if (inst->DstReg.Index == FRAG_RESULT_COLOR) { + if (inst->DstReg.Index == s->Compiler->OutputColor) { pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { + } else if (inst->DstReg.Index == s->Compiler->OutputDepth) { pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); } } else { @@ -804,24 +766,24 @@ static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruc static void emit_alu(struct pair_state *s) { struct radeon_pair_instruction pair; + struct pair_state_instruction *psi; if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - int ip; if (s->ReadyFullALU) { - ip = s->ReadyFullALU - s->Instructions; + psi = s->ReadyFullALU; s->ReadyFullALU = s->ReadyFullALU->NextReady; } else if (s->ReadyRGB) { - ip = s->ReadyRGB - s->Instructions; + psi = s->ReadyRGB; s->ReadyRGB = s->ReadyRGB->NextReady; } else { - ip = s->ReadyAlpha - s->Instructions; + psi = s->ReadyAlpha; s->ReadyAlpha = s->ReadyAlpha->NextReady; } _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); + fill_instruction_into_pair(s, &pair, psi); + fill_dest_into_pair(s, &pair, psi); + commit_instruction(s, psi); } else { struct pair_state_instruction **prgb; struct pair_state_instruction **palpha; @@ -830,65 +792,65 @@ static void emit_alu(struct pair_state *s) * many source slots; try all possible pairings if necessary */ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { - int rgbip = *prgb - s->Instructions; - int alphaip = *palpha - s->Instructions; + struct pair_state_instruction * psirgb = *prgb; + struct pair_state_instruction * psialpha = *palpha; _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, rgbip); - if (!fill_instruction_into_pair(s, &pair, alphaip)) + fill_instruction_into_pair(s, &pair, psirgb); + if (!fill_instruction_into_pair(s, &pair, psialpha)) continue; *prgb = (*prgb)->NextReady; *palpha = (*palpha)->NextReady; - fill_dest_into_pair(s, &pair, rgbip); - fill_dest_into_pair(s, &pair, alphaip); - commit_instruction(s, rgbip); - commit_instruction(s, alphaip); + fill_dest_into_pair(s, &pair, psirgb); + fill_dest_into_pair(s, &pair, psialpha); + commit_instruction(s, psirgb); + commit_instruction(s, psialpha); goto success; } } /* No success in pairing; just take the first RGB instruction */ - int ip = s->ReadyRGB - s->Instructions; + psi = s->ReadyRGB; s->ReadyRGB = s->ReadyRGB->NextReady; + _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); + fill_instruction_into_pair(s, &pair, psi); + fill_dest_into_pair(s, &pair, psi); + commit_instruction(s, psi); success: ; } - if (s->Debug) + if (s->Compiler->Base.Debug) radeonPrintPairInstruction(&pair); - s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair); } +/* Callback function for assigning input registers to hardware registers */ +static void alloc_helper(void * data, unsigned input, unsigned hwreg) +{ + struct pair_state * s = data; + alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg); +} -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, +void radeonPairProgram( + struct r300_fragment_program_compiler * compiler, const struct radeon_pair_handler* handler, void *userdata) { struct pair_state s; _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = _mesa_clone_program(ctx, program); + s.Compiler = compiler; s.Handler = handler; s.UserData = userdata; - s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; - s.Verbose = GL_FALSE && s.Debug; + s.Verbose = GL_FALSE && s.Compiler->Base.Debug; - s.Instructions = (struct pair_state_instruction*)_mesa_calloc( - sizeof(struct pair_state_instruction)*s.Program->NumInstructions); - s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4); - s.ReaderPool = (struct reg_value_reader*)_mesa_calloc( - sizeof(struct reg_value_reader)*s.Program->NumInstructions*12); - - if (s.Debug) + if (s.Compiler->Base.Debug) _mesa_printf("Emit paired program\n"); scan_instructions(&s); - allocate_input_registers(&s); + s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s); - while(!s.Error && + while(!s.Compiler->Base.Error && (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { if (s.ReadyTEX) emit_all_tex(&s); @@ -897,16 +859,8 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, emit_alu(&s); } - if (s.Debug) + if (s.Compiler->Base.Debug) _mesa_printf(" END\n"); - - _mesa_free(s.Instructions); - _mesa_free(s.ValuePool); - _mesa_free(s.ReaderPool); - - _mesa_reference_program(ctx, &s.Program, NULL); - - return !s.Error; } diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 4624a24629..ff76178551 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -30,6 +30,8 @@ #include "radeon_program.h" +struct r300_fragment_program_compiler; + /** * Represents a paired instruction, as found in R300 and R500 @@ -82,18 +84,32 @@ struct radeon_pair_instruction { }; +enum { + RADEON_OPCODE_TEX = 0, + RADEON_OPCODE_TXB, + RADEON_OPCODE_TXP, + RADEON_OPCODE_KIL +}; + +struct radeon_pair_texture_instruction { + GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */ + + GLuint DestIndex:8; + GLuint WriteMask:4; + + GLuint TexSrcUnit:5; + GLuint TexSrcTarget:3; + + GLuint SrcIndex:8; + GLuint SrcSwizzle:12; +}; + + /** * */ struct radeon_pair_handler { /** - * Fill in the proper hardware index for the given constant register. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex); - - /** * Write a paired instruction to the hardware. * * @return GL_FALSE on error. @@ -107,7 +123,7 @@ struct radeon_pair_handler { * * @return GL_FALSE on error. */ - GLboolean (*EmitTex)(void*, struct prog_instruction*); + GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*); /** * Called before a block of contiguous, independent texture @@ -115,10 +131,11 @@ struct radeon_pair_handler { */ GLboolean (*BeginTexBlock)(void*); - GLuint MaxHwTemps; + unsigned MaxHwTemps; }; -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, +void radeonPairProgram( + struct r300_fragment_program_compiler * compiler, const struct radeon_pair_handler*, void *userdata); void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index af535037d0..da5b7ba642 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -54,6 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_mipmap_tree.h" #include "r300_state.h" #include "radeon_reg.h" +#include "radeon_queryobj.h" /** # of dwords reserved for additional instructions that may need to be written * during flushing. @@ -73,48 +74,40 @@ static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) +int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int cnt; + int extra = 1; + cnt = vpu_count(atom->cmd); + + if (r300->radeon.radeonScreen->kernel_mm) { + extra = 5; + } + + return cnt ? (cnt * 4) + extra : 0; +} + + void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); drm_r300_cmd_header_t cmd; - uint32_t addr, ndw, i; - - if (!r300->radeon.radeonScreen->kernel_mm) { - uint32_t dwords; - dwords = (*atom->check) (ctx, atom); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(atom->cmd, dwords); - END_BATCH(); - return; - } + uint32_t addr, ndw; cmd.u = atom->cmd[0]; addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; - ndw = cmd.vpu.count * 4; - if (ndw) { + ndw = atom->check(ctx, atom); - if (r300->vap_flush_needed) { - BEGIN_BATCH_NO_AUTOSTATE(15 + ndw); + BEGIN_BATCH_NO_AUTOSTATE(ndw); - /* flush processing vertices */ - OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0); - OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); - OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff); - OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); - r300->vap_flush_needed = GL_FALSE; - } else { - BEGIN_BATCH_NO_AUTOSTATE(5 + ndw); - } - OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); - OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); - for (i = 0; i < ndw; i++) { - OUT_BATCH(atom->cmd[i+1]); - } - OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); - END_BATCH(); - } + ndw -= 5; + OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); + OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(&atom->cmd[1], ndw); + OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + END_BATCH(); } void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) @@ -122,17 +115,10 @@ void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); drm_r300_cmd_header_t cmd; - uint32_t addr, ndw, i, sz; - int type, clamp, stride; + uint32_t addr, ndw, sz; + int type, clamp; - if (!r300->radeon.radeonScreen->kernel_mm) { - uint32_t dwords; - dwords = (*atom->check) (ctx, atom); - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(atom->cmd, dwords); - END_BATCH(); - return; - } + ndw = atom->check(ctx, atom); cmd.u = atom->cmd[0]; sz = cmd.r500fp.count; @@ -143,20 +129,34 @@ void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) addr |= (type << 16); addr |= (clamp << 17); - stride = type ? 4 : 6; - - ndw = sz * stride; - if (ndw) { + BEGIN_BATCH_NO_AUTOSTATE(ndw); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); + OUT_BATCH(addr); + ndw-=3; + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(&atom->cmd[1], ndw); + END_BATCH(); +} - BEGIN_BATCH_NO_AUTOSTATE(3 + ndw); - OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); - OUT_BATCH(addr); - OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR); - for (i = 0; i < ndw; i++) { - OUT_BATCH(atom->cmd[i+1]); - } - END_BATCH(); +static int check_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); + int dw = 0, i; + if (atom->cmd[0] == CP_PACKET2) { + return dw; } + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + if (!t && !r300->radeon.radeonScreen->kernel_mm) { + dw += 0; + } else if (t && t->image_override && !t->bo) { + if (!r300->radeon.radeonScreen->kernel_mm) + dw += 2; + } else + dw += 4; + } + return dw; } static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) @@ -164,47 +164,46 @@ static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); - int notexture = 0; - - if (numtmus) { - int i; - - for(i = 0; i < numtmus; ++i) { - radeonTexObj *t = r300->hw.textures[i]; - - if (!t) - notexture = 1; - } - - if (r300->radeon.radeonScreen->kernel_mm && notexture) { - return; - } - for(i = 0; i < numtmus; ++i) { - radeonTexObj *t = r300->hw.textures[i]; - if (t && !t->image_override) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - } else if (!t) { - /* Texture unit hasn't a texture bound nothings to do */ - } else { /* override cases */ - if (t->bo) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - } else if (!r300->radeon.radeonScreen->kernel_mm) { - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH(t->override_offset); - END_BATCH(); - } else { - /* Texture unit hasn't a texture bound nothings to do */ - } - } + int i; + + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + if (t && !t->image_override) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!t) { + /* Texture unit hasn't a texture bound. + * We assign the current color buffer as a fakery to make + * KIL work on KMS (without it, the CS checker will complain). + */ + if (r300->radeon.radeonScreen->kernel_mm) { + struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&r300->radeon); + if (rrb && rrb->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } + } + } else { /* override cases */ + if (t->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!r300->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH(t->override_offset); + END_BATCH(); + } else { + /* Texture unit hasn't a texture bound nothings to do */ + } } } } @@ -227,8 +226,8 @@ void r300_emit_scissor(GLcontext *ctx) if (r300->radeon.state.scissor.enabled) { x1 = r300->radeon.state.scissor.rect.x1; y1 = r300->radeon.state.scissor.rect.y1; - x2 = r300->radeon.state.scissor.rect.x2 - 1; - y2 = r300->radeon.state.scissor.rect.y2 - 1; + x2 = r300->radeon.state.scissor.rect.x2; + y2 = r300->radeon.state.scissor.rect.y2; } else { x1 = 0; y1 = 0; @@ -247,6 +246,17 @@ void r300_emit_scissor(GLcontext *ctx) OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT)); END_BATCH(); } +static int check_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t dw = 6 + 3 + 16; + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + dw -= 3 + 16; + } + return dw; +} static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) { @@ -256,7 +266,7 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) uint32_t cbpitch; uint32_t offset = r300->radeon.state.color.draw_offset; uint32_t dw = 6; - int i; + int i; rrb = radeon_get_colorbuffer(&r300->radeon); if (!rrb || !rrb->bo) { @@ -264,7 +274,7 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) return; } - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr,"rrb is %p %d %dx%d\n", rrb, offset, rrb->base.Width, rrb->base.Height); cbpitch = (rrb->pitch / rrb->cpp); if (rrb->cpp == 4) @@ -338,13 +348,23 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) } } +static int check_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t dw; + dw = 6; + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + return dw; +} + static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); struct radeon_renderbuffer *rrb; uint32_t zbpitch; - uint32_t dw; + uint32_t dw = atom->check(ctx, atom); rrb = radeon_get_depthbuffer(&r300->radeon); if (!rrb) @@ -360,9 +380,6 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) } } - dw = 6; - if (r300->radeon.radeonScreen->kernel_mm) - dw += 2; BEGIN_BATCH_NO_AUTOSTATE(dw); OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -374,46 +391,6 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) END_BATCH(); } -static void emit_gb_misc(GLcontext *ctx, struct radeon_state_atom * atom) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH(atom->cmd[0]); - OUT_BATCH(atom->cmd[1]); - OUT_BATCH(atom->cmd[2]); - OUT_BATCH(atom->cmd[3]); - END_BATCH(); - } -} - -static void emit_threshold_misc(GLcontext *ctx, struct radeon_state_atom * atom) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - BEGIN_BATCH_NO_AUTOSTATE(3); - OUT_BATCH(atom->cmd[0]); - OUT_BATCH(atom->cmd[1]); - OUT_BATCH(atom->cmd[2]); - END_BATCH(); - } -} - -static void emit_shade_misc(GLcontext *ctx, struct radeon_state_atom * atom) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - BATCH_LOCALS(&r300->radeon); - - if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH(atom->cmd[0]); - OUT_BATCH(atom->cmd[1]); - END_BATCH(); - } -} - static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -431,6 +408,7 @@ static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; } + BEGIN_BATCH_NO_AUTOSTATE(atom->cmd_size); OUT_BATCH(atom->cmd[0]); atom->cmd[1] &= ~0xf; atom->cmd[1] |= format; @@ -438,6 +416,12 @@ static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom OUT_BATCH(atom->cmd[2]); OUT_BATCH(atom->cmd[3]); OUT_BATCH(atom->cmd[4]); + END_BATCH(); +} + +static int check_never(GLcontext *ctx, struct radeon_state_atom *atom) +{ + return 0; } static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) @@ -456,28 +440,29 @@ static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom) return cnt ? cnt + 1 : 0; } -int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) -{ - int cnt; - - cnt = vpu_count(atom->cmd); - return cnt ? (cnt * 4) + 1 : 0; -} - int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; - + r300ContextPtr r300 = R300_CONTEXT(ctx); + int extra = 1; cnt = r500fp_count(atom->cmd); - return cnt ? (cnt * 6) + 1 : 0; + if (r300->radeon.radeonScreen->kernel_mm) + extra = 3; + + return cnt ? (cnt * 6) + extra : 0; } int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; + r300ContextPtr r300 = R300_CONTEXT(ctx); + int extra = 1; + cnt = r500fp_count(atom->cmd); + if (r300->radeon.radeonScreen->kernel_mm) + extra = 3; cnt = r500fp_count(atom->cmd); - return cnt ? (cnt * 4) + 1 : 0; + return cnt ? (cnt * 4) + extra : 0; } #define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ @@ -509,7 +494,7 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->radeon.hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ mtu = r300->radeon.glCtx->Const.MaxTextureUnits; - if (RADEON_DEBUG & DEBUG_TEXTURE) { + if (RADEON_DEBUG & RADEON_TEXTURE) { fprintf(stderr, "Using %d maximum texture units..\n", mtu); } @@ -567,11 +552,14 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(gb_enable, always, 2, 0); r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1); - ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); + } else { + ALLOC_STATE(gb_misc, never, R300_GB_MISC_CMDSIZE, 0); + } r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 3); - r300->hw.gb_misc.emit = emit_gb_misc; ALLOC_STATE(gb_misc2, always, R300_GB_MISC2_CMDSIZE, 0); - r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2); + r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2); ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1); ALLOC_STATE(ga_point_s0, always, 5, 0); @@ -586,9 +574,12 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1); ALLOC_STATE(ga_line_stipple, always, 4, 0); r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3); - ALLOC_STATE(shade, always, 2, 0); + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + ALLOC_STATE(shade, always, 2, 0); + } else { + ALLOC_STATE(shade, never, 2, 0); + } r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 1); - r300->hw.shade.emit = emit_shade_misc; ALLOC_STATE(shade2, always, 4, 0); r300->hw.shade2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4278, 3); ALLOC_STATE(polygon_mode, always, 4, 0); @@ -637,11 +628,14 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0); - r300->hw.r500fp.emit = emit_r500fp; + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.r500fp.emit = emit_r500fp; + ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0); - r300->hw.r500fp_const.emit = emit_r500fp; + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.r500fp_const.emit = emit_r500fp; } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3); @@ -684,25 +678,36 @@ void r300InitCmdBuf(r300ContextPtr r300) } ALLOC_STATE(rop, always, 2, 0); r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1); - ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0); + ALLOC_STATE(cb, cb_offset, R300_CB_CMDSIZE, 0); r300->hw.cb.emit = &emit_cb_offset; ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9); ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1); - ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); - r300->hw.rb3d_discard_src_pixel_lte_threshold.emit = emit_threshold_misc; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) { + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); + } else { + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, never, 3, 0); + } + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); r300->hw.zs.cmd[R300_ZS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3); + if (is_r500) { + if (r300->radeon.radeonScreen->kernel_mm) + ALLOC_STATE(zsb, always, R300_ZSB_CMDSIZE, 0); + else + ALLOC_STATE(zsb, never, R300_ZSB_CMDSIZE, 0); + r300->hw.zsb.cmd[R300_ZSB_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R500_ZB_STENCILREFMASK_BF, 1); + } ALLOC_STATE(zstencil_format, always, 5, 0); r300->hw.zstencil_format.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4); r300->hw.zstencil_format.emit = emit_zstencil_format; - ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); + ALLOC_STATE(zb, zb_offset, R300_ZB_CMDSIZE, 0); r300->hw.zb.emit = emit_zb_offset; ALLOC_STATE(zb_depthclearvalue, always, 2, 0); r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1); @@ -715,47 +720,72 @@ void r300InitCmdBuf(r300ContextPtr r300) /* VPU only on TCL */ if (has_tcl) { - int i; + int i; + if (r300->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE(vap_flush, always, 10, 0); + /* flush processing vertices */ + r300->hw.vap_flush.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); + r300->hw.vap_flush.cmd[1] = 0; + r300->hw.vap_flush.cmd[2] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DSTCACHE_CTLSTAT, 1); + r300->hw.vap_flush.cmd[3] = R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D; + r300->hw.vap_flush.cmd[4] = cmdpacket0(r300->radeon.radeonScreen, RADEON_WAIT_UNTIL, 1); + r300->hw.vap_flush.cmd[5] = RADEON_WAIT_3D_IDLECLEAN; + r300->hw.vap_flush.cmd[6] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); + r300->hw.vap_flush.cmd[7] = 0xffffff; + r300->hw.vap_flush.cmd[8] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1); + r300->hw.vap_flush.cmd[9] = 0; + } else { + ALLOC_STATE(vap_flush, never, 10, 0); + } + + ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); r300->hw.vpi.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); - r300->hw.vpi.emit = emit_vpu; + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpi.emit = emit_vpu; if (is_r500) { - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); - r300->hw.vpp.emit = emit_vpu; - - ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); - r300->hw.vps.emit = emit_vpu; + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpp.emit = emit_vpu; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vps.emit = emit_vpu; for (i = 0; i < 6; i++) { - ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); - r300->hw.vpucp[i].cmd[0] = - cmdvpu(r300->radeon.radeonScreen, - R500_PVS_UCP_START + i, 1); - r300->hw.vpucp[i].emit = emit_vpu; + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[0] = + cmdvpu(r300->radeon.radeonScreen, + R500_PVS_UCP_START + i, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpucp[i].emit = emit_vpu; } } else { - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); - r300->hw.vpp.emit = emit_vpu; - - ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[0] = - cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); - r300->hw.vps.emit = emit_vpu; + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpp.emit = emit_vpu; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vps.emit = emit_vpu; for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); r300->hw.vpucp[i].cmd[0] = cmdvpu(r300->radeon.radeonScreen, - R300_PVS_UCP_START + i, 1); - r300->hw.vpucp[i].emit = emit_vpu; + R300_PVS_UCP_START + i, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpucp[i].emit = emit_vpu; } } } @@ -779,7 +809,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0); - ALLOC_STATE(tex.offset, variable, 1, 0); + ALLOC_STATE(tex.offset, tex_offsets, 1, 0); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0); r300->hw.tex.offset.emit = &emit_tex_offsets; @@ -792,6 +822,17 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0); + radeon_init_query_stateobj(&r300->radeon, R300_QUERYOBJ_CMDSIZE); + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) { + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RV530_FG_ZBREG_DEST, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL; + } else { + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_REG_DEST, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = R300_RASTER_PIPE_SELECT_ALL; + } + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZPASS_DATA, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_1] = 0; + r300->radeon.hw.is_dirty = GL_TRUE; r300->radeon.hw.all_dirty = GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h index 53bcc0eeb4..1b703e518a 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h @@ -38,6 +38,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" +#define CACHE_FLUSH_BUFSZ (4*2) +#define PRE_EMIT_STATE_BUFSZ (2+2) +#define AOS_BUFSZ(nr) (3+(nr >>1)*3 + (nr&1)*2 + (nr*2)) +#define FIREAOS_BUFSZ (3) +#define SCISSORS_BUFSZ (3) + extern void r300InitCmdBuf(r300ContextPtr r300); void r300_emit_scissor(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 6f3aab986d..6fcf209af6 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -64,24 +64,27 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" -#include "r300_render.h" #include "r300_swtcl.h" #include "radeon_bocs_wrapper.h" - +#include "radeon_buffer_objects.h" +#include "radeon_queryobj.h" #include "vblank.h" #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ #define need_GL_VERSION_2_0 +#define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_vertex_program #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate #define need_GL_EXT_blend_minmax +#define need_GL_EXT_framebuffer_blit #define need_GL_EXT_framebuffer_object #define need_GL_EXT_fog_coord #define need_GL_EXT_gpu_program_parameters +#define need_GL_EXT_provoking_vertex #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side #define need_GL_ATI_separate_stencil @@ -94,6 +97,7 @@ const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ {"GL_ARB_depth_texture", NULL}, {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, {"GL_ARB_multitexture", NULL}, {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, {"GL_ARB_shadow", NULL}, @@ -113,6 +117,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_packed_depth_stencil", NULL}, {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, + {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, {"GL_EXT_shadow_funcs", NULL}, {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, @@ -141,6 +146,7 @@ const struct dri_extension card_extensions[] = { const struct dri_extension mm_extensions[] = { + { "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions }, { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, { NULL, NULL } }; @@ -154,7 +160,6 @@ const struct dri_extension gl_20_extension[] = { }; static const struct tnl_pipeline_stage *r300_pipeline[] = { - /* Catch any t&l fallbacks */ &_tnl_vertex_transform_stage, @@ -165,21 +170,7 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { &_tnl_texture_transform_stage, &_tnl_point_attenuation_stage, &_tnl_vertex_program_stage, - - /* Try again to go to tcl? - * - no good for asymmetric-twoside (do with multipass) - * - no good for asymmetric-unfilled (do with multipass) - * - good for material - * - good for texgen - * - need to manipulate a bit of state - * - * - worth it/not worth it? - */ - - /* Else do them here. - */ - &_r300_render_stage, - &_tnl_render_stage, /* FALLBACK */ + &_tnl_render_stage, 0, }; @@ -225,11 +216,8 @@ static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmes static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon) { - r300ContextPtr r300 = (r300ContextPtr)radeon; BATCH_LOCALS(radeon); - r300->vap_flush_needed = GL_TRUE; - cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); BEGIN_BATCH_NO_AUTOSTATE(2); OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH); @@ -246,6 +234,81 @@ static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) r300->radeon.Fallback &= ~bit; } +static void r300_emit_query_finish(radeonContextPtr radeon) +{ + r300ContextPtr r300 = (r300ContextPtr)radeon; + struct radeon_query_object *query = radeon->query.current; + BATCH_LOCALS(radeon); + + BEGIN_BATCH_NO_AUTOSTATE(3 * 2 *r300->radeon.radeonScreen->num_gb_pipes + 2); + switch (r300->radeon.radeonScreen->num_gb_pipes) { + case 4: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+3*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 3: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_2); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+2*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 2: + if (r300->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV380) { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + } else { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_1); + } + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+1*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 1: + default: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + break; + } + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + END_BATCH(); + query->curr_offset += r300->radeon.radeonScreen->num_gb_pipes * sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +static void rv530_emit_query_finish_single_z(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(8); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_BATCH(); + + query->curr_offset += sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +static void rv530_emit_query_finish_double_z(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(14); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_BATCH(); + + query->curr_offset += 2 * sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + static void r300_init_vtbl(radeonContextPtr radeon) { radeon->vtbl.get_lock = r300_get_lock; @@ -254,6 +317,13 @@ static void r300_init_vtbl(radeonContextPtr radeon) radeon->vtbl.swtcl_flush = r300_swtcl_flush; radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms; radeon->vtbl.fallback = r300_fallback; + if (radeon->radeonScreen->chip_family == CHIP_FAMILY_RV530) { + if (radeon->radeonScreen->num_z_pipes == 2) + radeon->vtbl.emit_query_finish = rv530_emit_query_finish_double_z; + else + radeon->vtbl.emit_query_finish = rv530_emit_query_finish_single_z; + } else + radeon->vtbl.emit_query_finish = r300_emit_query_finish; } static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) @@ -295,13 +365,10 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) /* currently bogus data */ if (r300->options.hw_tcl_enabled) { - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeAluInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; + ctx->Const.VertexProgram.MaxNativeTemps = 32; ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; } @@ -325,6 +392,7 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT; ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; } + } static void r300ParseOptions(r300ContextPtr r300, radeonScreenPtr screen) @@ -361,12 +429,16 @@ static void r300InitGLExtensions(GLcontext *ctx) if (r300->options.stencil_two_side_disabled) _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); - if (ctx->Mesa_DXTn && !r300->options.s3tc_force_enabled) { + if (r300->options.s3tc_force_enabled) { _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); _mesa_enable_extension(ctx, "GL_S3_s3tc"); } else if (r300->options.s3tc_force_disabled) { _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } + + if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) { + _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); + } } /* Create the device specific rendering context. @@ -391,6 +463,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300ParseOptions(r300, screen); + r300->radeon.radeonScreen = screen; r300_init_vtbl(&r300->radeon); _mesa_init_driver_functions(&functions); @@ -398,6 +471,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitStateFuncs(&functions); r300InitTextureFuncs(&functions); r300InitShaderFuncs(&functions); + radeonInitQueryObjFunctions(&functions); + radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r300->radeon, &functions, glVisual, driContextPriv, @@ -451,11 +526,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitState(r300); r300InitShaderFunctions(r300); - if (screen->chip_family == CHIP_FAMILY_RS600 || screen->chip_family == CHIP_FAMILY_RS690 || - screen->chip_family == CHIP_FAMILY_RS740) { - r300->radeon.texture_row_align = 64; - } - r300InitGLExtensions(ctx); return GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index f7af7d4e57..518d5cdbf4 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -44,28 +44,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/mtypes.h" #include "shader/prog_instruction.h" +#include "compiler/radeon_code.h" struct r300_context; typedef struct r300_context r300ContextRec; typedef struct r300_context *r300ContextPtr; -/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . - I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble - with other compilers ... GLUE! -*/ -#define WARN_ONCE(a, ...) { \ - static int warn##__LINE__=1; \ - if(warn##__LINE__){ \ - fprintf(stderr, "*********************************WARN_ONCE*********************************\n"); \ - fprintf(stderr, "File %s function %s line %d\n", \ - __FILE__, __FUNCTION__, __LINE__); \ - fprintf(stderr, a, ## __VA_ARGS__);\ - fprintf(stderr, "***************************************************************************\n"); \ - warn##__LINE__=0;\ - } \ - } - #include "r300_vertprog.h" @@ -249,6 +234,10 @@ typedef struct r300_context *r300ContextPtr; #define R300_ZS_CNTL_2 3 #define R300_ZS_CMDSIZE 4 +#define R300_ZSB_CMD_0 0 +#define R300_ZSB_CNTL_0 1 +#define R300_ZSB_CMDSIZE 2 + #define R300_ZB_CMD_0 0 #define R300_ZB_OFFSET 1 #define R300_ZB_PITCH 2 @@ -289,6 +278,12 @@ typedef struct r300_context *r300ContextPtr; #define R300_TEX_CMDSIZE (MAX_TEXTURE_UNITS+1) */ +#define R300_QUERYOBJ_CMD_0 0 +#define R300_QUERYOBJ_DATA_0 1 +#define R300_QUERYOBJ_CMD_1 2 +#define R300_QUERYOBJ_DATA_1 3 +#define R300_QUERYOBJ_CMDSIZE 4 + /** * Cache for hardware register state. */ @@ -352,6 +347,7 @@ struct r300_hw_state { struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */ struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */ struct radeon_state_atom zs; /* zstencil control (4F00) */ + struct radeon_state_atom zsb; /* zstencil bf */ struct radeon_state_atom zstencil_format; struct radeon_state_atom zb; /* z buffer (4F20) */ struct radeon_state_atom zb_depthclearvalue; /* (4F28) */ @@ -359,6 +355,7 @@ struct r300_hw_state { struct radeon_state_atom zb_hiz_offset; /* (4F44) */ struct radeon_state_atom zb_hiz_pitch; /* (4F54) */ + struct radeon_state_atom vap_flush; struct radeon_state_atom vpi; /* vp instructions */ struct radeon_state_atom vpp; /* vp parameters */ struct radeon_state_atom vps; /* vertex point size (?) */ @@ -379,7 +376,6 @@ struct r300_hw_state { struct radeon_state_atom border_color; } tex; struct radeon_state_atom txe; /* tex enable (4104) */ - radeonTexObj *textures[R300_MAX_TEXTURE_UNITS]; }; @@ -389,46 +385,25 @@ struct r300_hw_state { /* Vertex shader state */ -/* Perhaps more if we store programs in vmem? */ -/* drm_r300_cmd_header_t->vpu->count is unsigned char */ -#define VSF_MAX_FRAGMENT_LENGTH (255*4) - -/* Can be tested with colormat currently. */ -#define VSF_MAX_FRAGMENT_TEMPS (14) - -#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) -#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) - #define COLOR_IS_RGBA #define TAG(x) r300##x #include "tnl_dd/t_dd_vertex.h" #undef TAG +struct r300_vertex_program_key { + GLbitfield FpReads; + GLuint FogAttr; + GLuint WPosAttr; +}; + struct r300_vertex_program { struct gl_vertex_program *Base; struct r300_vertex_program *next; - struct r300_vertex_program_key { - GLuint FpReads; - GLuint FogAttr; - GLuint WPosAttr; - } key; - - struct r300_vertex_shader_hw_code { - int length; - union { - GLuint d[VSF_MAX_FRAGMENT_LENGTH]; - float f[VSF_MAX_FRAGMENT_LENGTH]; - } body; - } hw_code; - - GLboolean translated; - GLboolean error; + struct r300_vertex_program_key key; + struct r300_vertex_program_code code; - int pos_end; - int num_temporaries; /* Number of temp vars used by program */ - int inputs[VERT_ATTRIB_MAX]; - int outputs[VERT_RESULT_MAX]; + GLboolean error; }; struct r300_vertex_program_cont { @@ -441,131 +416,18 @@ struct r300_vertex_program_cont { struct r300_vertex_program *progs; }; -#define R300_PFS_MAX_ALU_INST 64 -#define R300_PFS_MAX_TEX_INST 32 -#define R300_PFS_MAX_TEX_INDIRECT 4 -#define R300_PFS_NUM_TEMP_REGS 32 -#define R300_PFS_NUM_CONST_REGS 32 - -#define R500_PFS_MAX_INST 512 -#define R500_PFS_NUM_TEMP_REGS 128 -#define R500_PFS_NUM_CONST_REGS 256 - -struct r300_pfs_compile_state; -struct r500_pfs_compile_state; - -/** - * Stores state that influences the compilation of a fragment program. - */ -struct r300_fragment_program_external_state { - struct { - /** - * If the sampler is used as a shadow sampler, - * this field is: - * 0 - GL_LUMINANCE - * 1 - GL_INTENSITY - * 2 - GL_ALPHA - * depending on the depth texture mode. - */ - GLuint depth_texture_mode : 2; - - /** - * If the sampler is used as a shadow sampler, - * this field is (texture_compare_func - GL_NEVER). - * [e.g. if compare function is GL_LEQUAL, this field is 3] - * - * Otherwise, this field is 0. - */ - GLuint texture_compare_func : 3; - } unit[16]; -}; - - -struct r300_fragment_program_node { - int tex_offset; /**< first tex instruction */ - int tex_end; /**< last tex instruction, relative to tex_offset */ - int alu_offset; /**< first ALU instruction */ - int alu_end; /**< last ALU instruction, relative to alu_offset */ - int flags; -}; - -/** - * Stores an R300 fragment program in its compiled-to-hardware form. - */ -struct r300_fragment_program_code { - struct { - int length; /**< total # of texture instructions used */ - GLuint inst[R300_PFS_MAX_TEX_INST]; - } tex; - - struct { - int length; /**< total # of ALU instructions used */ - struct { - GLuint inst0; - GLuint inst1; - GLuint inst2; - GLuint inst3; - } inst[R300_PFS_MAX_ALU_INST]; - } alu; - - struct r300_fragment_program_node node[4]; - int cur_node; - int first_node_has_tex; - - /** - * Remember which program register a given hardware constant - * belongs to. - */ - struct prog_src_register constant[R300_PFS_NUM_CONST_REGS]; - int const_nr; - - int max_temp_idx; -}; - - -struct r500_fragment_program_code { - struct { - GLuint inst0; - GLuint inst1; - GLuint inst2; - GLuint inst3; - GLuint inst4; - GLuint inst5; - } inst[R500_PFS_MAX_INST]; - - int inst_offset; - int inst_end; - - /** - * Remember which program register a given hardware constant - * belongs to. - */ - struct prog_src_register constant[R500_PFS_NUM_CONST_REGS]; - int const_nr; - - int max_temp_idx; -}; /** * Store everything about a fragment program that is needed * to render with that program. */ struct r300_fragment_program { - struct gl_program *Base; - - GLboolean translated; GLboolean error; - + struct r300_fragment_program *next; struct r300_fragment_program_external_state state; - union rX00_fragment_program_code { - struct r300_fragment_program_code r300; - struct r500_fragment_program_code r500; - } code; - GLboolean writes_depth; - GLuint optimization; - - struct r300_fragment_program *next; + struct rX00_fragment_program_code code; + GLbitfield InputsRead; /* attribute that we are sending the WPOS in */ gl_frag_attrib wpos_attr; @@ -583,12 +445,6 @@ struct r300_fragment_program_cont { struct r300_fragment_program *progs; }; -struct r300_fragment_program_compiler { - r300ContextPtr r300; - struct r300_fragment_program *fp; - union rX00_fragment_program_code *code; - struct gl_program *program; -}; #define R300_MAX_AOS_ARRAYS 16 @@ -610,8 +466,6 @@ struct r300_swtcl_info { struct r300_vtable { void (* SetupRSUnit)(GLcontext *ctx); void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); - GLboolean (* BuildFragmentProgramHwCode)(struct r300_fragment_program_compiler *compiler); - void (* FragmentProgramDump)(union rX00_fragment_program_code *code); void (* SetupPixelShader)(GLcontext *ctx); }; @@ -619,11 +473,12 @@ struct r300_vertex_buffer { struct vertex_attribute { /* generic */ GLubyte element; - GLvoid *data; - GLboolean free_needed; GLuint stride; GLuint dwords; GLubyte size; /* number of components */ + GLboolean is_named_bo; + struct radeon_bo *bo; + GLint bo_offset; /* hw specific */ uint32_t data_type:4; @@ -638,12 +493,14 @@ struct r300_vertex_buffer { }; struct r300_index_buffer { - GLvoid *ptr; + struct radeon_bo *bo; + int bo_offset; + GLboolean is_32bit; - GLboolean free_needed; GLuint count; }; + /** * \brief R300 context structure. */ @@ -669,11 +526,10 @@ struct r300_context { uint32_t s3tc_force_disabled:1; uint32_t stencil_two_side_disabled:1; } options; - + struct r300_swtcl_info swtcl; struct r300_vertex_buffer vbuf; struct r300_index_buffer ind_buf; - GLboolean vap_flush_needed; uint32_t fallback; diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 9769ff5399..e9968f9ffe 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -31,6 +31,7 @@ #include "main/state.h" #include "main/api_validate.h" #include "main/enums.h" +#include "main/simple_list.h" #include "r300_reg.h" #include "r300_context.h" @@ -38,6 +39,10 @@ #include "r300_render.h" #include "r300_state.h" #include "r300_tex.h" +#include "r300_cmdbuf.h" + +#include "radeon_buffer_objects.h" +#include "radeon_common_context.h" #include "tnl/tnl.h" #include "tnl/t_vp_build.h" @@ -45,32 +50,59 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" -static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf, struct gl_buffer_object **bo, GLuint *nr_bo) + +static int getTypeSize(GLenum type) +{ + switch (type) { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_index_buffer *ind_buf = &r300->ind_buf; GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; - if (!mesa_ind_buf) { - ind_buf->ptr = NULL; - return; - } - - ind_buf->count = mesa_ind_buf->count; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - bo[*nr_bo] = mesa_ind_buf->obj; - (*nr_bo)++; ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, + "%s: Fixing index buffer format. type %d\n", + __func__, mesa_ind_buf->type); + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); GLubyte *in = (GLubyte *)src_ptr; - GLuint *out = _mesa_malloc(sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1)); - int i; - ind_buf->ptr = out; + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + + assert(r300->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; @@ -80,16 +112,16 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *out++ = in[i]; } - ind_buf->free_needed = GL_TRUE; - ind_buf->is_32bit = GL_FALSE; - } else if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) { #if MESA_BIG_ENDIAN + } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ GLushort *in = (GLushort *)src_ptr; - GLuint *out = _mesa_malloc(sizeof(GLushort) * - ((mesa_ind_buf->count + 1) & ~1)); - int i; + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, + &r300->ind_buf.bo_offset, size, 4); - ind_buf->ptr = out; + assert(r300->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; @@ -98,42 +130,61 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer if (i < mesa_ind_buf->count) { *out++ = in[i]; } - - ind_buf->free_needed = GL_TRUE; -#else - ind_buf->ptr = src_ptr; - ind_buf->free_needed = GL_FALSE; #endif - ind_buf->is_32bit = GL_FALSE; - } else { - ind_buf->ptr = src_ptr; - ind_buf->free_needed = GL_FALSE; - ind_buf->is_32bit = GL_TRUE; + } + + r300->ind_buf.is_32bit = GL_FALSE; + r300->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); } } -static int getTypeSize(GLenum type) + +static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { - switch (type) { - case GL_DOUBLE: - return sizeof(GLdouble); - case GL_FLOAT: - return sizeof(GLfloat); - case GL_INT: - return sizeof(GLint); - case GL_UNSIGNED_INT: - return sizeof(GLuint); - case GL_SHORT: - return sizeof(GLshort); - case GL_UNSIGNED_SHORT: - return sizeof(GLushort); - case GL_BYTE: - return sizeof(GLbyte); - case GL_UNSIGNED_BYTE: - return sizeof(GLubyte); - default: - assert(0); - return 0; + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (!mesa_ind_buf) { + r300->ind_buf.bo = NULL; + return; + } + radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__); + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + + assert(r300->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); + _mesa_memcpy(dst_ptr, src_ptr, size); + + r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + r300->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } else { + r300FixupIndexBuffer(ctx, mesa_ind_buf); } } @@ -161,27 +212,123 @@ static int getTypeSize(GLenum type) } \ } while (0) -static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input, struct gl_buffer_object **bo, GLuint *nr_bo) +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r300ConvertAttrib(GLcontext *ctx, int count, const struct gl_client_array *input, struct vertex_attribute *attr) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_vertex_buffer *vbuf = &r300->vbuf; - struct vertex_attribute r300_attr; - const void *src_ptr; - GLenum type; + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; GLuint stride; + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + count = 1; + if (input->BufferObj->Name) { if (!input->BufferObj->Pointer) { - bo[*nr_bo] = input->BufferObj; - (*nr_bo)++; ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); - assert(input->BufferObj->Pointer != NULL); + mapped_named_bo = GL_TRUE; } src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); - } else + } else { src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, + "%s: Converting vertex attributes, attribute data format %x," + "stride %d, components %d\n" + , __FUNCTION__, input->Type + , stride, input->Size); + + assert(src_ptr != NULL); + + switch (input->Type) { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *input, int count, struct vertex_attribute *attr) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, size, 32); + + if (!input->BufferObj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, "%s. Vertex alignment doesn't match hw requirements.\n", __func__); + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) { + _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + struct vertex_attribute r300_attr; + GLenum type; + GLuint stride; + + radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__); stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || @@ -189,62 +336,29 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st getTypeSize(input->Type) != 4 || #endif stride < 4) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) { - fprintf(stderr, "%s: Converting vertex attributes, attribute data format %x,", __FUNCTION__, input->Type); - fprintf(stderr, "stride %d, components %d\n", stride, input->Size); - } - - GLfloat *dst_ptr, *tmp; - - /* Convert value for first element only */ - if (input->StrideB == 0) - count = 1; - - tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count); - - switch (input->Type) { - case GL_DOUBLE: - CONVERT(GLdouble, (GLfloat)); - break; - case GL_UNSIGNED_INT: - CONVERT(GLuint, UINT_TO_FLOAT); - break; - case GL_INT: - CONVERT(GLint, INT_TO_FLOAT); - break; - case GL_UNSIGNED_SHORT: - CONVERT(GLushort, USHORT_TO_FLOAT); - break; - case GL_SHORT: - CONVERT(GLshort, SHORT_TO_FLOAT); - break; - case GL_UNSIGNED_BYTE: - assert(input->Format != GL_BGRA); - CONVERT(GLubyte, UBYTE_TO_FLOAT); - break; - case GL_BYTE: - CONVERT(GLbyte, BYTE_TO_FLOAT); - break; - default: - assert(0); - break; - } type = GL_FLOAT; - r300_attr.free_needed = GL_TRUE; - r300_attr.data = tmp; + if (input->StrideB == 0) { r300_attr.stride = 0; } else { r300_attr.stride = sizeof(GLfloat) * input->Size; } r300_attr.dwords = input->Size; + r300_attr.is_named_bo = GL_FALSE; } else { type = input->Type; - r300_attr.free_needed = GL_FALSE; - r300_attr.data = (GLvoid *)src_ptr; - r300_attr.stride = input->StrideB; - r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; + r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; + if (!input->BufferObj->Name) { + + if (input->StrideB == 0) { + r300_attr.stride = 0; + } else { + r300_attr.stride = (getTypeSize(type) * input->Size + 3) & ~3; + } + + r300_attr.is_named_bo = GL_FALSE; + } } r300_attr.size = input->Size; @@ -333,15 +447,15 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st ++vbuf->num_attribs; } -static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count, struct gl_buffer_object **bo, GLuint *nr_bo) +static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_buffer *vbuf = &r300->vbuf; - + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__); { int i, tmp; - tmp = r300->selected_vp->Base->Base.InputsRead; + tmp = r300->selected_vp->code.InputsRead; i = 0; vbuf->num_attribs = 0; while (tmp) { @@ -351,7 +465,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar ++i; } - r300TranslateAttrib(ctx, i, count, arrays[i], bo, nr_bo); + r300TranslateAttrib(ctx, i, count, arrays[i]); tmp >>= 1; ++i; @@ -361,45 +475,139 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS); if (r300->fallback) return; +} - { - int i; +static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + GLuint stride; + int ret; + int i, index; + radeon_print(RADEON_RENDER, RADEON_VERBOSE, + "%s: count %d num_attribs %d\n", + __func__, count, vbuf->num_attribs); + + for (index = 0; index < vbuf->num_attribs; index++) { + struct radeon_aos *aos = &r300->radeon.tcl.aos[index]; + i = vbuf->attribs[index].element; + + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) { - for (i = 0; i < vbuf->num_attribs; i++) { - rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], - vbuf->attribs[i].data, vbuf->attribs[i].dwords, - vbuf->attribs[i].stride, count); + r300ConvertAttrib(ctx, count, input[i], &vbuf->attribs[index]); + } else { + if (input[i]->BufferObj->Name) { + if (stride % 4 != 0) { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + r300AlignDataToDword(ctx, input[i], count, &vbuf->attribs[index]); + vbuf->attribs[index].is_named_bo = GL_FALSE; + } else { + vbuf->attribs[index].stride = input[i]->StrideB; + vbuf->attribs[index].bo_offset = (intptr_t) input[i]->Ptr; + vbuf->attribs[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + vbuf->attribs[index].is_named_bo = GL_TRUE; + } + } else { + + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } else { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&r300->radeon, &vbuf->attribs[index].bo, &vbuf->attribs[index].bo_offset, size, 32); + assert(vbuf->attribs[index].bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(vbuf->attribs[index].bo->ptr, vbuf->attribs[index].bo_offset); + switch (vbuf->attribs[index].dwords) { + case 1: radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 3: radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + default: assert(0); break; + } + + } } - r300->radeon.tcl.aos_count = vbuf->num_attribs; + aos->count = vbuf->attribs[index].stride == 0 ? 1 : count; + aos->stride = vbuf->attribs[index].stride / sizeof(float); + aos->components = vbuf->attribs[index].dwords; + aos->bo = vbuf->attribs[index].bo; + aos->offset = vbuf->attribs[index].bo_offset; + + if (vbuf->attribs[index].is_named_bo) { + radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[index].bo, RADEON_GEM_DOMAIN_GTT, 0); + } } + + r300->radeon.tcl.aos_count = vbuf->num_attribs; + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, first_elem(&r300->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); + r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, ret); + } -static void r300FreeData(GLcontext *ctx, struct gl_buffer_object **bo, GLuint nr_bo) +static void r300FreeData(GLcontext *ctx) { + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__); + r300ContextPtr r300 = R300_CONTEXT(ctx); { - struct r300_vertex_buffer *vbuf = &R300_CONTEXT(ctx)->vbuf; int i; - for (i = 0; i < vbuf->num_attribs; i++) { - if (vbuf->attribs[i].free_needed) - _mesa_free(vbuf->attribs[i].data); + for (i = 0; i < r300->vbuf.num_attribs; i++) { + if (!r300->vbuf.attribs[i].is_named_bo) { + radeon_bo_unref(r300->vbuf.attribs[i].bo); + } + r300->radeon.tcl.aos[i].bo = NULL; } } { - struct r300_index_buffer *ind_buf = &R300_CONTEXT(ctx)->ind_buf; - if (ind_buf->free_needed) - _mesa_free(ind_buf->ptr); + if (r300->ind_buf.bo != NULL) { + radeon_bo_unref(r300->ind_buf.bo); + } } +} - { - int i; +static GLuint r300PredictTryDrawPrimsSize(GLcontext *ctx, GLuint nr_prims) +{ + struct r300_context *r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + GLboolean flushed; + GLuint dwords; + GLuint state_size; + + dwords = 2*CACHE_FLUSH_BUFSZ; + dwords += PRE_EMIT_STATE_BUFSZ; + dwords += (AOS_BUFSZ(vbuf->num_attribs) + + SCISSORS_BUFSZ*2 + + FIREAOS_BUFSZ )*nr_prims; + + state_size = radeonCountStateEmitSize(&r300->radeon); + flushed = rcommonEnsureCmdBufSpace(&r300->radeon, + dwords + state_size, + __FUNCTION__); + if (flushed) + dwords += radeonCountStateEmitSize(&r300->radeon); + else + dwords += state_size; - for (i = 0; i < nr_bo; ++i) { - ctx->Driver.UnmapBuffer(ctx, 0, bo[i]); - } - } + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + return dwords; } static GLboolean r300TryDrawPrims(GLcontext *ctx, @@ -411,8 +619,10 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, GLuint max_index ) { struct r300_context *r300 = R300_CONTEXT(ctx); - struct gl_buffer_object *bo[VERT_ATTRIB_MAX+1]; - GLuint i, nr_bo = 0; + GLuint i; + + radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: %u (%d-%d) cs begin at %d\n", + __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw ); if (ctx->NewState) _mesa_update_state( ctx ); @@ -424,23 +634,27 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); - r300FixupIndexBuffer(ctx, ib, bo, &nr_bo); - - /* ensure we have the cmd buf space in advance to cover - * the state + DMA AOS pointers */ - rcommonEnsureCmdBufSpace(&r300->radeon, - r300->radeon.hw.max_state_size + (50*sizeof(int)), - __FUNCTION__); - - r300SetVertexFormat(ctx, arrays, max_index + 1, bo, &nr_bo); + r300SetVertexFormat(ctx, arrays, max_index + 1); if (r300->fallback) return GL_FALSE; - r300SetupVAP(ctx, r300->selected_vp->Base->Base.InputsRead, r300->selected_vp->Base->Base.OutputsWritten); + r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten); r300UpdateShaderStates(r300); + /* ensure we have the cmd buf space in advance to cover + * the state + DMA AOS pointers */ + GLuint emit_end = r300PredictTryDrawPrimsSize(ctx, nr_prims) + + r300->radeon.cmdbuf.cs->cdw; + + r300SetupIndexBuffer(ctx, ib); + + r300AllocDmaRegions(ctx, arrays, max_index + 1); + + if (r300->fallback) + return GL_FALSE; + r300EmitCacheFlush(r300); radeonEmitState(&r300->radeon); @@ -450,9 +664,14 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300EmitCacheFlush(r300); - radeonReleaseArrays(ctx, ~0); + r300FreeData(ctx); - r300FreeData(ctx, bo, nr_bo); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: %u (%d-%d) cs ending at %d\n", + __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw ); + + if (emit_end < r300->radeon.cmdbuf.cs->cdw) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", r300->radeon.cmdbuf.cs->cdw - emit_end); return GL_TRUE; } @@ -462,28 +681,26 @@ static void r300DrawPrims(GLcontext *ctx, const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index) { - struct split_limits limits; GLboolean retval; - if (ib) - limits.max_verts = 0xffffffff; - else - limits.max_verts = 65535; - - limits.max_indices = 65535; - limits.max_vb_size = 1024*1024; + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } if (min_index) { + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, + "%s: Rebasing primitives. %p nr_prims %d min_index %u max_index %u\n", + __func__, prim, nr_prims, min_index, max_index); vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims ); return; } - if ((ib && ib->count > 65535)) { - vbo_split_prims (ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims, &limits); - return; - } /* Make an attempt at drawing */ retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index feb3370f37..07e6223087 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -124,41 +124,6 @@ GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes) return ret; } -GLboolean r300EmitArrays(GLcontext * ctx) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_vertex_buffer *vbuf = &r300->vbuf; - GLuint InputsRead, OutputsWritten; - - r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten); - - r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS); - if (r300->fallback & R300_RASTER_FALLBACK_MASK) - return GL_FALSE; - - { - struct vertex_buffer *mesa_vb = &TNL_CONTEXT(ctx)->vb; - GLuint attr, i; - - for (i = 0; i < vbuf->num_attribs; i++) { - attr = vbuf->attribs[i].element; - rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], mesa_vb->AttribPtr[attr]->data, - mesa_vb->AttribPtr[attr]->size, mesa_vb->AttribPtr[attr]->stride, mesa_vb->Count); - } - - r300->radeon.tcl.aos_count = vbuf->num_attribs; - - /* Fill index buffer info */ - r300->ind_buf.ptr = mesa_vb->Elts; - r300->ind_buf.is_32bit = GL_TRUE; - r300->ind_buf.free_needed = GL_FALSE; - } - - r300SetupVAP(ctx, InputsRead, OutputsWritten); - - return GL_TRUE; -} - void r300EmitCacheFlush(r300ContextPtr rmesa) { BATCH_LOCALS(&rmesa->radeon); diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 3f8c60ffae..8e57e354d1 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -104,7 +104,7 @@ static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet) return cmd.u; } -static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn, +static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn, unsigned short count) { drm_r300_cmd_header_t cmd; @@ -216,8 +216,6 @@ void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags) } } -extern GLboolean r300EmitArrays(GLcontext * ctx); - extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c deleted file mode 100644 index 55c1cfe631..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ /dev/null @@ -1,451 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "r300_fragprog.h" - -#include "shader/prog_parameter.h" - -#include "r300_context.h" -#include "r300_fragprog_swizzle.h" - -static void reset_srcreg(struct prog_src_register* reg) -{ - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; -} - -static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) -{ - gl_state_index fail_value_tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 - }; - struct prog_src_register reg = { 0, }; - - fail_value_tokens[2] = tmu; - reg.File = PROGRAM_STATE_VAR; - reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); - reg.Swizzle = SWIZZLE_WWWW; - return reg; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - * - * \todo If/when r5xx uses the radeon_program architecture, this can probably - * be reused. - */ -GLboolean r300_transform_TEX( - struct radeon_transform_context *t, - struct prog_instruction* orig_inst, void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - struct prog_instruction inst = *orig_inst; - struct prog_instruction* tgt; - GLboolean destredirect = GL_FALSE; - - if (inst.Opcode != OPCODE_TEX && - inst.Opcode != OPCODE_TXB && - inst.Opcode != OPCODE_TXP && - inst.Opcode != OPCODE_KIL) - return GL_FALSE; - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = inst.DstReg; - if (comparefunc == GL_ALWAYS) { - tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_1111; - } else { - tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); - } - return GL_TRUE; - } - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = radeonFindFreeTemporary(t); - inst.DstReg.WriteMask = WRITEMASK_XYZW; - } - - - /* Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - */ - if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - - int tempreg = radeonFindFreeTemporary(t); - int factor_index; - - tokens[2] = inst.TexSrcUnit; - factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens); - - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MUL; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tempreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - tgt->SrcReg[1].File = PROGRAM_STATE_VAR; - tgt->SrcReg[1].Index = factor_index; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tempreg; - } - - if (inst.Opcode != OPCODE_KIL) { - if (inst.DstReg.File != PROGRAM_TEMPORARY || - inst.DstReg.WriteMask != WRITEMASK_XYZW) { - int tempreg = radeonFindFreeTemporary(t); - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; - inst.DstReg.WriteMask = WRITEMASK_XYZW; - destredirect = GL_TRUE; - } else if (inst.SaturateMode) { - destredirect = GL_TRUE; - } - } - - if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { - int tmpreg = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(t->Program, 1); - tgt->Opcode = OPCODE_MOV; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tmpreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tmpreg; - } - - tgt = radeonAppendInstructions(t->Program, 1); - _mesa_copy_instructions(tgt, &inst, 1); - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = radeonFindFreeTemporary(t); - int pass, fail; - - tgt = radeonAppendInstructions(t->Program, 3); - - tgt[0].Opcode = OPCODE_RCP; - tgt[0].DstReg.File = PROGRAM_TEMPORARY; - tgt[0].DstReg.Index = rcptemp; - tgt[0].DstReg.WriteMask = WRITEMASK_W; - tgt[0].SrcReg[0] = inst.SrcReg[0]; - tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; - - tgt[1].Opcode = OPCODE_MAD; - tgt[1].DstReg = inst.DstReg; - tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; - tgt[1].SrcReg[0] = inst.SrcReg[0]; - tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[1].Index = rcptemp; - tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; - tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[2].Index = inst.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; - else - tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; - - tgt[2].Opcode = OPCODE_CMP; - tgt[2].DstReg = orig_inst->DstReg; - tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; - tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; - - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; - tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; - tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); - } else if (destredirect) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = orig_inst->DstReg; - tgt->SaturateMode = inst.SaturateMode; - tgt->SrcReg[0].File = PROGRAM_TEMPORARY; - tgt->SrcReg[0].Index = inst.DstReg.Index; - } - - return GL_TRUE; -} - -/* just some random things... */ -void r300FragmentProgramDump(union rX00_fragment_program_code *c) -{ - struct r300_fragment_program_code *code = &c->r300; - int n, i, j; - static int pc = 0; - - fprintf(stderr, "pc=%d*************************************\n", pc++); - - fprintf(stderr, "Hardware program\n"); - fprintf(stderr, "----------------\n"); - - for (n = 0; n < (code->cur_node + 1); n++) { - fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d, flags: %08x\n", n, - code->node[n].alu_offset, - code->node[n].tex_offset, - code->node[n].alu_end, code->node[n].tex_end, - code->node[n].flags); - - if (n > 0 || code->first_node_has_tex) { - fprintf(stderr, " TEX:\n"); - for (i = code->node[n].tex_offset; - i <= code->node[n].tex_offset + code->node[n].tex_end; - ++i) { - const char *instr; - - switch ((code->tex. - inst[i] >> R300_TEX_INST_SHIFT) & - 15) { - case R300_TEX_OP_LD: - instr = "TEX"; - break; - case R300_TEX_OP_KIL: - instr = "KIL"; - break; - case R300_TEX_OP_TXP: - instr = "TXP"; - break; - case R300_TEX_OP_TXB: - instr = "TXB"; - break; - default: - instr = "UNKNOWN"; - } - - fprintf(stderr, - " %s t%i, %c%i, texture[%i] (%08x)\n", - instr, - (code->tex. - inst[i] >> R300_DST_ADDR_SHIFT) & 31, - 't', - (code->tex. - inst[i] >> R300_SRC_ADDR_SHIFT) & 31, - (code->tex. - inst[i] & R300_TEX_ID_MASK) >> - R300_TEX_ID_SHIFT, - code->tex.inst[i]); - } - } - - for (i = code->node[n].alu_offset; - i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { - char srcc[3][10], dstc[20]; - char srca[3][10], dsta[20]; - char argc[3][20]; - char arga[3][20]; - char flags[5], tmp[10]; - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst1 >> (j * 6); - int rega = code->alu.inst[i].inst3 >> (j * 6); - - sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', regc & 31); - sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', rega & 31); - } - - dstc[0] = 0; - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(dstc, "t%i.%s ", - (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, - flags); - } - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(tmp, "o%i.%s", - (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, - flags); - strcat(dstc, tmp); - } - - dsta[0] = 0; - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { - sprintf(dsta, "t%i.w ", - (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); - } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { - sprintf(tmp, "o%i.w ", - (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); - strcat(dsta, tmp); - } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { - strcat(dsta, "Z"); - } - - fprintf(stderr, - "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" - " w: %3s %3s %3s -> %-20s (%08x)\n", i, - srcc[0], srcc[1], srcc[2], dstc, - code->alu.inst[i].inst1, srca[0], srca[1], - srca[2], dsta, code->alu.inst[i].inst3); - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst0 >> (j * 7); - int rega = code->alu.inst[i].inst2 >> (j * 7); - int d; - char buf[20]; - - d = regc & 31; - if (d < 12) { - switch (d % 4) { - case R300_ALU_ARGC_SRC0C_XYZ: - sprintf(buf, "%s.xyz", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_XXX: - sprintf(buf, "%s.xxx", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_YYY: - sprintf(buf, "%s.yyy", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_ZZZ: - sprintf(buf, "%s.zzz", - srcc[d / 4]); - break; - } - } else if (d < 15) { - sprintf(buf, "%s.www", srca[d - 12]); - } else if (d == 20) { - sprintf(buf, "0.0"); - } else if (d == 21) { - sprintf(buf, "1.0"); - } else if (d == 22) { - sprintf(buf, "0.5"); - } else if (d >= 23 && d < 32) { - d -= 23; - switch (d / 3) { - case 0: - sprintf(buf, "%s.yzx", - srcc[d % 3]); - break; - case 1: - sprintf(buf, "%s.zxy", - srcc[d % 3]); - break; - case 2: - sprintf(buf, "%s.Wzy", - srcc[d % 3]); - break; - } - } else { - sprintf(buf, "%i", d); - } - - sprintf(argc[j], "%s%s%s%s", - (regc & 32) ? "-" : "", - (regc & 64) ? "|" : "", - buf, (regc & 64) ? "|" : ""); - - d = rega & 31; - if (d < 9) { - sprintf(buf, "%s.%c", srcc[d / 3], - 'x' + (char)(d % 3)); - } else if (d < 12) { - sprintf(buf, "%s.w", srca[d - 9]); - } else if (d == 16) { - sprintf(buf, "0.0"); - } else if (d == 17) { - sprintf(buf, "1.0"); - } else if (d == 18) { - sprintf(buf, "0.5"); - } else { - sprintf(buf, "%i", d); - } - - sprintf(arga[j], "%s%s%s%s", - (rega & 32) ? "-" : "", - (rega & 64) ? "|" : "", - buf, (rega & 64) ? "|" : ""); - } - - fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" - " w: %8s %8s %8s op: %08x\n", - argc[0], argc[1], argc[2], - code->alu.inst[i].inst0, arga[0], arga[1], - arga[2], code->alu.inst[i].inst2); - } - } -} diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h deleted file mode 100644 index 5ce6f33cee..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/* - * Authors: - * Ben Skeggs <darktama@iinet.net.au> - * Jerome Glisse <j.glisse@gmail.com> - */ -#ifndef __R300_FRAGPROG_H_ -#define __R300_FRAGPROG_H_ - -#include "shader/program.h" -#include "shader/prog_instruction.h" - -#include "r300_context.h" -#include "radeon_program.h" - -#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 -#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 - -#if 1 - -/** - * Fragment program helper macros - */ - -/* Produce unshifted source selectors */ -#define FP_TMP(idx) (idx) -#define FP_CONST(idx) ((idx) | (1 << 5)) - -/* Produce source/dest selector dword */ -#define FP_SELC_MASK_NO 0 -#define FP_SELC_MASK_X 1 -#define FP_SELC_MASK_Y 2 -#define FP_SELC_MASK_XY 3 -#define FP_SELC_MASK_Z 4 -#define FP_SELC_MASK_XZ 5 -#define FP_SELC_MASK_YZ 6 -#define FP_SELC_MASK_XYZ 7 - -#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTC_SHIFT) | \ - (FP_SELC_MASK_##regmask << 23) | \ - (FP_SELC_MASK_##outmask << 26) | \ - ((src0) << R300_ALU_SRC0C_SHIFT) | \ - ((src1) << R300_ALU_SRC1C_SHIFT) | \ - ((src2) << R300_ALU_SRC2C_SHIFT)) - -#define FP_SELA_MASK_NO 0 -#define FP_SELA_MASK_W 1 - -#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTA_SHIFT) | \ - (FP_SELA_MASK_##regmask << 23) | \ - (FP_SELA_MASK_##outmask << 24) | \ - ((src0) << R300_ALU_SRC0A_SHIFT) | \ - ((src1) << R300_ALU_SRC1A_SHIFT) | \ - ((src2) << R300_ALU_SRC2A_SHIFT)) - -/* Produce unshifted argument selectors */ -#define FP_ARGC(source) R300_ALU_ARGC_##source -#define FP_ARGA(source) R300_ALU_ARGA_##source -#define FP_ABS(arg) ((arg) | (1 << 6)) -#define FP_NEG(arg) ((arg) ^ (1 << 5)) - -/* Produce instruction dword */ -#define FP_INSTRC(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTC_##opcode | \ - ((arg0) << R300_ALU_ARG0C_SHIFT) | \ - ((arg1) << R300_ALU_ARG1C_SHIFT) | \ - ((arg2) << R300_ALU_ARG2C_SHIFT)) - -#define FP_INSTRA(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTA_##opcode | \ - ((arg0) << R300_ALU_ARG0A_SHIFT) | \ - ((arg1) << R300_ALU_ARG1A_SHIFT) | \ - ((arg2) << R300_ALU_ARG2A_SHIFT)) - -#endif - -extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); - -extern void r300FragmentProgramDump(union rX00_fragment_program_code *c); - -extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); - -#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index f5c4c0f4a0..469c278b51 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -42,21 +42,51 @@ #include "shader/prog_parameter.h" #include "shader/prog_print.h" +#include "compiler/radeon_compiler.h" + #include "r300_state.h" -#include "r300_fragprog.h" -#include "r300_fragprog_swizzle.h" -#include "r500_fragprog.h" -#include "radeon_program.h" -#include "radeon_program_alu.h" -static void nqssadce_init(struct nqssadce_state* s) +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; + } +} + +static GLuint build_func(GLuint comparefunc) { - s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; + return comparefunc - GL_NEVER; } /** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct gl_fragment_program *fp, + struct r300_fragment_program_external_state *state) +{ + int unit; + + _mesa_bzero(state, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + + +/** * Transform the program to support fragment.position. * * Introduce a small fragment at the start of the program that will be @@ -65,104 +95,26 @@ static void nqssadce_init(struct nqssadce_state* s) * to read from a newly allocated temporary. * */ -static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp) { - GLuint InputsRead = compiler->fp->Base->InputsRead; + int i; - if (!(InputsRead & FRAG_BIT_WPOS)) { - compiler->fp->wpos_attr = FRAG_ATTRIB_MAX; + if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) { + fp->wpos_attr = FRAG_ATTRIB_MAX; return; } - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) { - if (!(InputsRead & (1 << i))) { - InputsRead &= ~(1 << FRAG_ATTRIB_WPOS); - InputsRead |= 1 << i; - compiler->fp->Base->InputsRead = InputsRead; - compiler->fp->wpos_attr = i; + if (!(compiler->Base.Program.InputsRead & (1 << i))) { + fp->wpos_attr = i; break; } } - GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - - _mesa_insert_instructions(compiler->program, 0, 3); - fpi = compiler->program->Instructions; - i = 0; - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - for (; i < compiler->program->NumInstructions; ++i) { - int reg; - for (reg = 0; reg < 3; reg++) { - if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && - fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { - fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[reg].Index = tempregi; - } - } - } + rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr); } - /** * Rewrite fragment.fogcoord to use a texture coordinate slot. * Note that fogcoord is forced into an X001 pattern, and this enforcement @@ -170,205 +122,117 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) * * See also the counterpart rewriting for vertex programs. */ -static void rewriteFog(struct r300_fragment_program_compiler *compiler) +static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp) { - struct r300_fragment_program *fp = compiler->fp; - GLuint InputsRead; + struct prog_src_register src; int i; - InputsRead = fp->Base->InputsRead; - - if (!(InputsRead & FRAG_BIT_FOGC)) { + if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) { fp->fog_attr = FRAG_ATTRIB_MAX; return; } for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) { - if (!(InputsRead & (1 << i))) { - InputsRead &= ~(1 << FRAG_ATTRIB_FOGC); - InputsRead |= 1 << i; - fp->Base->InputsRead = InputsRead; + if (!(compiler->Base.Program.InputsRead & (1 << i))) { fp->fog_attr = i; break; } } - { - struct prog_instruction *inst; - - inst = compiler->program->Instructions; - while (inst->Opcode != OPCODE_END) { - const int src_regs = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < src_regs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) { - inst->SrcReg[i].Index = fp->fog_attr; - inst->SrcReg[i].Swizzle = combine_swizzles( - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE), - inst->SrcReg[i].Swizzle); - } - } - ++inst; - } - } + memset(&src, 0, sizeof(src)); + src.File = PROGRAM_INPUT; + src.Index = fp->fog_attr; + src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src); } -static GLuint build_dtm(GLuint depthmode) -{ - switch(depthmode) { - default: - case GL_LUMINANCE: return 0; - case GL_INTENSITY: return 1; - case GL_ALPHA: return 2; - } -} - -static GLuint build_func(GLuint comparefunc) -{ - return comparefunc - GL_NEVER; -} /** - * Collect all external state that is relevant for compiling the given - * fragment program. + * Reserve hardware temporary registers for the program inputs. + * + * @note This allocation is performed explicitly, because the order of inputs + * is determined by the RS hardware. */ -static void build_state( - r300ContextPtr r300, - struct gl_fragment_program *fp, - struct r300_fragment_program_external_state *state) +static void allocate_hw_inputs( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata) { - int unit; - - _mesa_bzero(state, sizeof(*state)); - - for(unit = 0; unit < 16; ++unit) { - if (fp->Base.ShadowSamplers & (1 << unit)) { - struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - - state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); - state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); - } + GLuint InputsRead = c->Base.Program.InputsRead; + int i; + GLuint hwindex = 0; + + /* Primary colour */ + if (InputsRead & FRAG_BIT_COL0) + allocate(mydata, FRAG_ATTRIB_COL0, hwindex++); + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) + allocate(mydata, FRAG_ATTRIB_COL1, hwindex++); + InputsRead &= ~FRAG_BIT_COL1; + + /* Texcoords */ + for (i = 0; i < 8; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) + allocate(mydata, FRAG_ATTRIB_TEX0+i, hwindex++); } -} + InputsRead &= ~FRAG_BITS_TEX_ANY; -static void rewrite_depth_out(struct gl_program *prog) -{ - struct prog_instruction *inst; + /* Fogcoords treated as a texcoord */ + if (InputsRead & FRAG_BIT_FOGC) + allocate(mydata, FRAG_ATTRIB_FOGC, hwindex++); + InputsRead &= ~FRAG_BIT_FOGC; - for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) { - if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH) - continue; + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) + allocate(mydata, FRAG_ATTRIB_WPOS, hwindex++); + InputsRead &= ~FRAG_BIT_WPOS; - if (inst->DstReg.WriteMask & WRITEMASK_Z) { - inst->DstReg.WriteMask = WRITEMASK_W; - } else { - inst->DstReg.WriteMask = 0; - continue; - } - - switch (inst->Opcode) { - case OPCODE_FRC: - case OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case OPCODE_CMP: - case OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; - } - } + /* Anything else */ + if (InputsRead) + rc_error(&c->Base, "Don't know how to handle inputs 0x%x\n", InputsRead); } -void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp) + +static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_program_cont *cont, struct r300_fragment_program *fp) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_fragment_program_compiler compiler; - compiler.r300 = r300; - compiler.fp = fp; - compiler.code = &fp->code; - compiler.program = fp->Base; + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & RADEON_PIXEL) ? GL_TRUE : GL_FALSE; - if (RADEON_DEBUG & DEBUG_PIXEL) { - fflush(stdout); + compiler.code = &fp->code; + compiler.state = fp->state; + compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE; + compiler.OutputDepth = FRAG_RESULT_DEPTH; + compiler.OutputColor = FRAG_RESULT_COLOR; + compiler.AllocateHwInputs = &allocate_hw_inputs; + + if (compiler.Base.Debug) { + fflush(stderr); _mesa_printf("Fragment Program: Initial program:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); + _mesa_print_program(&cont->Base.Base); + fflush(stderr); } - insert_WPOS_trailer(&compiler); - - rewriteFog(&compiler); - - rewrite_depth_out(compiler.program); - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_program_transformation transformations[] = { - { &r500_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformDeriv, 0 }, - { &radeonTransformTrigScale, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 4, transformations); - } else { - struct radeon_program_transformation transformations[] = { - { &r300_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformTrigSimple, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 3, transformations); - } - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: After native rewrite:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } + rc_mesa_to_rc_program(&compiler.Base, &cont->Base.Base); - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } else { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } + insert_WPOS_trailer(&compiler, fp); - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } + rewriteFog(&compiler, fp); - if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler)) - fp->error = GL_TRUE; + r3xx_compile_fragment_program(&compiler); + fp->error = compiler.Base.Error; - fp->translated = GL_TRUE; + fp->InputsRead = compiler.Base.Program.InputsRead; - if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300->vtbl.FragmentProgramDump(&fp->code); + rc_destroy(&compiler.Base); } -struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx) +struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_fragment_program_cont *fp_list; @@ -389,11 +253,11 @@ struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx) fp = _mesa_calloc(sizeof(struct r300_fragment_program)); fp->state = state; - fp->translated = GL_FALSE; - fp->Base = _mesa_clone_program(ctx, &ctx->FragmentProgram._Current->Base); fp->next = fp_list->progs; fp_list->progs = fp; + translate_fragment_program(ctx, fp_list, fp); + return r300->selected_fp = fp; } diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h index 5e103ee408..3d64c08cee 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h @@ -32,8 +32,6 @@ #include "r300_context.h" -extern void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp); - -struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx); +struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx); #endif diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index ddabd53992..5cb04e2bb6 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/macros.h" #include "main/context.h" +#include "main/simple_list.h" #include "swrast/swrast.h" #include "radeon_common.h" @@ -55,7 +56,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_vertprog.h" #include "radeon_reg.h" #include "r300_emit.h" -#include "r300_fragprog.h" #include "r300_context.h" #include "vblank.h" @@ -66,6 +66,66 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define CLEARBUFFER_DEPTH 0x2 #define CLEARBUFFER_STENCIL 0x4 +#if 1 + +/** + * Fragment program helper macros + */ + +/* Produce unshifted source selectors */ +#define FP_TMP(idx) (idx) +#define FP_CONST(idx) ((idx) | (1 << 5)) + +/* Produce source/dest selector dword */ +#define FP_SELC_MASK_NO 0 +#define FP_SELC_MASK_X 1 +#define FP_SELC_MASK_Y 2 +#define FP_SELC_MASK_XY 3 +#define FP_SELC_MASK_Z 4 +#define FP_SELC_MASK_XZ 5 +#define FP_SELC_MASK_YZ 6 +#define FP_SELC_MASK_XYZ 7 + +#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTC_SHIFT) | \ + (FP_SELC_MASK_##regmask << 23) | \ + (FP_SELC_MASK_##outmask << 26) | \ + ((src0) << R300_ALU_SRC0C_SHIFT) | \ + ((src1) << R300_ALU_SRC1C_SHIFT) | \ + ((src2) << R300_ALU_SRC2C_SHIFT)) + +#define FP_SELA_MASK_NO 0 +#define FP_SELA_MASK_W 1 + +#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTA_SHIFT) | \ + (FP_SELA_MASK_##regmask << 23) | \ + (FP_SELA_MASK_##outmask << 24) | \ + ((src0) << R300_ALU_SRC0A_SHIFT) | \ + ((src1) << R300_ALU_SRC1A_SHIFT) | \ + ((src2) << R300_ALU_SRC2A_SHIFT)) + +/* Produce unshifted argument selectors */ +#define FP_ARGC(source) R300_ALU_ARGC_##source +#define FP_ARGA(source) R300_ALU_ARGA_##source +#define FP_ABS(arg) ((arg) | (1 << 6)) +#define FP_NEG(arg) ((arg) ^ (1 << 5)) + +/* Produce instruction dword */ +#define FP_INSTRC(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTC_##opcode | \ + ((arg0) << R300_ALU_ARG0C_SHIFT) | \ + ((arg1) << R300_ALU_ARG1C_SHIFT) | \ + ((arg2) << R300_ALU_ARG2C_SHIFT)) + +#define FP_INSTRA(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTA_##opcode | \ + ((arg0) << R300_ALU_ARG0A_SHIFT) | \ + ((arg1) << R300_ALU_ARG1A_SHIFT) | \ + ((arg2) << R300_ALU_ARG2A_SHIFT)) + +#endif + static void r300EmitClearState(GLcontext * ctx); static void r300ClearBuffer(r300ContextPtr r300, int flags, @@ -78,7 +138,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, GLuint cbpitch = 0; r300ContextPtr rmesa = r300; - if (RADEON_DEBUG & DEBUG_IOCTL) + if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n", __FUNCTION__, rrb, dPriv->x, dPriv->y, dPriv->w, dPriv->h); @@ -109,18 +169,21 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, } #if 1 if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) { - assert(rrbd != 0); - cbpitch = (rrbd->pitch / rrbd->cpp); + uint32_t zbpitch = (rrbd->pitch / rrbd->cpp); if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ - cbpitch |= R300_DEPTHMACROTILE_ENABLE; + zbpitch |= R300_DEPTHMACROTILE_ENABLE; } if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ - cbpitch |= R300_DEPTHMICROTILE_TILED; + zbpitch |= R300_DEPTHMICROTILE_TILED; } BEGIN_BATCH_NO_AUTOSTATE(6); OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1); + if (!r300->radeon.radeonScreen->kernel_mm) + OUT_BATCH(zbpitch); + else + OUT_BATCH_RELOC(zbpitch, rrbd->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); } #endif @@ -444,7 +507,15 @@ static void r300EmitClearState(GLcontext * ctx) R500_ALU_RGBA_A_SWIZ_0; r500fp.cmd[7] = 0; - emit_r500fp(ctx, &r500fp); + if (r300->radeon.radeonScreen->kernel_mm) { + emit_r500fp(ctx, &r500fp); + } else { + int dwords = r500fp.check(ctx,&r500fp); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(r500fp.cmd, dwords); + END_BATCH(); + } + } BEGIN_BATCH(2); @@ -488,6 +559,7 @@ static void r300EmitClearState(GLcontext * ctx) struct radeon_state_atom vpu; uint32_t _cmd[10]; R300_STATECHANGE(r300, pvs); + R300_STATECHANGE(r300, vap_flush); R300_STATECHANGE(r300, vpi); BEGIN_BATCH(4); @@ -508,12 +580,12 @@ static void r300EmitClearState(GLcontext * ctx) 0, 0xf, PVS_DST_REG_OUT); vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[4] = 0x0; vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, @@ -521,17 +593,27 @@ static void r300EmitClearState(GLcontext * ctx) vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, - - VSF_FLAG_NONE); + NEGATE_NONE); vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[8] = 0x0; - r300->vap_flush_needed = GL_TRUE; - emit_vpu(ctx, &vpu); + if (r300->radeon.radeonScreen->kernel_mm) { + int dwords = r300->hw.vap_flush.check(ctx,&r300->hw.vap_flush); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(r300->hw.vap_flush.cmd, dwords); + END_BATCH(); + emit_vpu(ctx, &vpu); + } else { + int dwords = vpu.check(ctx,&vpu); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(vpu.cmd, dwords); + END_BATCH(); + } + } } @@ -546,7 +628,7 @@ static int r300KernelClear(GLcontext *ctx, GLuint flags) /* Make sure it fits there. */ radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs); - + if (flags & BUFFER_BIT_COLOR0) { rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0); radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, @@ -623,7 +705,7 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) int i, ret; struct gl_framebuffer *fb = ctx->DrawBuffer; - if (RADEON_DEBUG & DEBUG_IOCTL) + if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "r300Clear\n"); if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { @@ -685,14 +767,13 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) } if (swrast_mask) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, swrast_mask); _swrast_Clear(ctx, swrast_mask); } } - void r300InitIoctlFuncs(struct dd_function_table *functions) { functions->Clear = r300Clear; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 357c600af9..b9ccd098dc 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1022,15 +1022,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \ R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD) /** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */ # define R300_RE_SHADE_MODEL_FLAT ( \ R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \ R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ - R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD) /* Specifies red & green components of fill color -- S312 format -- Backwards comp. */ #define R300_GA_SOLID_RG 0x427c @@ -1128,6 +1126,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* SU Depth Offset value */ #define R300_SU_DEPTH_OFFSET 0x42c4 +#define R300_SU_REG_DEST 0x42c8 +# define R300_RASTER_PIPE_SELECT_0 (1 << 0) +# define R300_RASTER_PIPE_SELECT_1 (1 << 1) +# define R300_RASTER_PIPE_SELECT_2 (1 << 2) +# define R300_RASTER_PIPE_SELECT_3 (1 << 3) +# define R300_RASTER_PIPE_SELECT_ALL 0xf + /* BEGIN: Rasterization / Interpolators - many guesses */ @@ -2014,6 +2019,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_FG_ALPHA_VALUE 0x4be0 # define R500_FG_ALPHA_VALUE_MASK 0x0000ffff +#define RV530_FG_ZBREG_DEST 0x4be8 +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0) + /* gap */ /* Fragment program parameters in 7.16 floating point */ @@ -2303,6 +2313,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_Z_WRITE_ENABLE (1 << 2) # define R300_Z_SIGNED_COMPARE (1 << 3) # define R300_STENCIL_FRONT_BACK (1 << 4) +# define R400_ZSIGNED_MAGNITUDE (1 << 5) +# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6) #define R300_ZB_ZSTENCILCNTL 0x4f04 /* functions */ @@ -2667,6 +2679,24 @@ enum { PVS_SRC_ADDR_MODE_1_SHIFT = 32, }; + +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + /*\}*/ /* BEGIN: Packet 3 commands */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 08d67b73ed..3cd38753b8 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -64,6 +64,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "vbo/vbo.h" +#include "vbo/vbo_split.h" #include "tnl/tnl.h" #include "tnl/t_vp_build.h" #include "radeon_reg.h" @@ -172,64 +173,45 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) return num_verts - verts_off; } -static void r300EmitElts(GLcontext * ctx, unsigned long n_elts) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - void *out; - GLuint size; - - size = ((rmesa->ind_buf.is_32bit ? 4 : 2) * n_elts + 3) & ~3; - - radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo, - &rmesa->radeon.tcl.elt_dma_offset, size, 4); - radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); - out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; - memcpy(out, rmesa->ind_buf.ptr, size); - radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); -} - -static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) +static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset) { BATCH_LOCALS(&rmesa->radeon); + int size; + + /* offset is in indices */ + BEGIN_BATCH(10); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); + if (rmesa->ind_buf.is_32bit) { + /* convert to bytes */ + offset *= 4; + size = vertex_count; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + /* convert to bytes */ + offset *= 2; + size = (vertex_count + 1) >> 1; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type); + } - r300_emit_scissor(rmesa->radeon.glCtx); - if (vertex_count > 0) { - int size; - - BEGIN_BATCH(10); - OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); - if (rmesa->ind_buf.is_32bit) { - size = vertex_count; - OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - ((vertex_count + 0) << 16) | type | - R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - size = (vertex_count + 1) >> 1; - OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - ((vertex_count + 0) << 16) | type); - } - - if (!rmesa->radeon.radeonScreen->kernel_mm) { - OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); - OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | - (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset, - rmesa->radeon.tcl.elt_dma_bo, - rmesa->radeon.tcl.elt_dma_offset, - RADEON_GEM_DOMAIN_GTT, 0, 0); - OUT_BATCH(size); - } else { - OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); - OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | - (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset); - OUT_BATCH(size); - radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, - rmesa->radeon.tcl.elt_dma_bo, - RADEON_GEM_DOMAIN_GTT, 0, 0); - } - END_BATCH(); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(size); + } else { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH(rmesa->ind_buf.bo_offset + offset); + OUT_BATCH(size); + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0); } + END_BATCH(); } static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) @@ -239,7 +221,7 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; int i; - if (RADEON_DEBUG & DEBUG_VERTS) + if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, offset); @@ -340,7 +322,7 @@ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) { BATCH_LOCALS(&rmesa->radeon); - r300_emit_scissor(rmesa->radeon.glCtx); + r300_emit_scissor(rmesa->radeon.glCtx); BEGIN_BATCH(3); OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); @@ -359,14 +341,16 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) if (type < 0 || num_verts <= 0) return; - /* Make space for at least 128 dwords. - * This is supposed to ensure that we can get all rendering - * commands into a single command buffer. - */ - rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__); + if (rmesa->ind_buf.bo) { + GLuint first, incr, offset = 0; + + if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) && + num_verts > 65500) { + WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim); + return; + } + - if (rmesa->ind_buf.ptr) { - r300EmitElts(ctx, num_verts); r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0); if (rmesa->radeon.radeonScreen->kernel_mm) { BEGIN_BATCH_NO_AUTOSTATE(2); @@ -374,45 +358,56 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) OUT_BATCH(rmesa->radeon.tcl.aos[0].count); END_BATCH(); } - r300FireEB(rmesa, num_verts, type); - } else { - r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start); - r300FireAOS(rmesa, num_verts, type); - } - COMMIT_BATCH(); -} -static void r300RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - int i; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - r300UpdateShaders(rmesa); - r300EmitArrays(ctx); + r300_emit_scissor(rmesa->radeon.glCtx); + while (num_verts > 0) { + int nr; + int align; + + nr = MIN2(num_verts, 65535); + nr -= (nr - first) % incr; + + /* get alignment for IB correct */ + if (nr != num_verts) { + do { + align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2); + if (align % 4) + nr -= incr; + } while(align % 4); + if (nr <= 0) { + WARN_ONCE("did the impossible happen? we never aligned nr to dword\n"); + return; + } + + } + r300FireEB(rmesa, nr, type, offset); - r300UpdateShaderStates(rmesa); + num_verts -= nr; + offset += nr; + } - r300EmitCacheFlush(rmesa); - radeonEmitState(&rmesa->radeon); + } else { + GLuint first, incr, offset = 0; - for (i = 0; i < vb->PrimitiveCount; i++) { - GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); - GLuint start = vb->Primitive[i].start; - GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - r300RunRenderPrimitive(ctx, start, end, prim); + if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) && + num_verts > 65535) { + WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim); + return; + } + r300_emit_scissor(rmesa->radeon.glCtx); + while (num_verts > 0) { + int nr; + nr = MIN2(num_verts, 65535); + nr -= (nr - first) % incr; + r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start + offset); + r300FireAOS(rmesa, nr, type); + num_verts -= nr; + offset += nr; + } } - - r300EmitCacheFlush(rmesa); - - radeonReleaseArrays(ctx, ~0); + COMMIT_BATCH(); } - static const char *getFallbackString(uint32_t bit) { switch (bit) { @@ -449,10 +444,10 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) r300ContextPtr rmesa = R300_CONTEXT(ctx); uint32_t old_fallback = rmesa->fallback; static uint32_t fallback_warn = 0; - + if (mode) { if ((fallback_warn & bit) == 0) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) _mesa_fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit)); fallback_warn |= bit; } @@ -470,7 +465,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) /* update only if we change from no raster fallbacks to some raster fallbacks */ if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) && ((bit & R300_RASTER_FALLBACK_MASK) > 0)) { - + radeon_firevertices(&rmesa->radeon); rmesa->radeon.swtcl.RenderIndex = ~0; _swsetup_Wakeup( ctx ); @@ -480,7 +475,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) /* update only if we have disabled all tcl fallbacks */ if (rmesa->options.hw_tcl_enabled) { - if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { + if ((old_fallback & R300_TCL_FALLBACK_MASK) == bit) { R300_STATECHANGE(rmesa, vap_cntl_status); rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS; } @@ -489,7 +484,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) /* update only if we have disabled all raster fallbacks */ if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { _swrast_flush( ctx ); - + tnl->Driver.Render.Start = r300RenderStart; tnl->Driver.Render.Finish = r300RenderFinish; tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; @@ -497,38 +492,10 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) tnl->Driver.Render.BuildVertices = _tnl_build_vertices; tnl->Driver.Render.CopyPV = _tnl_copy_pv; tnl->Driver.Render.Interp = _tnl_interp; - + _tnl_invalidate_vertex_state( ctx, ~0 ); _tnl_invalidate_vertices( ctx, ~0 ); } } - -} - -static GLboolean r300RunNonTCLRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (rmesa->fallback & R300_RASTER_FALLBACK_MASK) - return GL_TRUE; - if (rmesa->options.hw_tcl_enabled == GL_FALSE) - return GL_TRUE; - - r300RunRender(ctx, stage); - - return GL_FALSE; } - -const struct tnl_pipeline_stage _r300_render_stage = { - "r300 Hardware Rasterization", - NULL, - NULL, - NULL, - NULL, - r300RunNonTCLRender -}; diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 62228a3786..a4f9db13ec 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -38,7 +38,7 @@ static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont while (fp) { tmp = fp->next; - _mesa_reference_program(ctx, &fp->Base, NULL); + rc_constants_destroy(&fp->code.constants); _mesa_free(fp); fp = tmp; } @@ -50,6 +50,7 @@ static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *c while (vp) { tmp = vp->next; + rc_constants_destroy(&vp->code.constants); _mesa_reference_vertprog(ctx, &vp->Base, NULL); _mesa_free(vp); vp = tmp; @@ -122,15 +123,11 @@ static GLboolean r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { if (target == GL_FRAGMENT_PROGRAM_ARB) { - struct r300_fragment_program *fp = r300SelectFragmentShader(ctx); - if (!fp->translated) - r300TranslateFragmentShader(ctx, fp); + struct r300_fragment_program *fp = r300SelectAndTranslateFragmentShader(ctx); return !fp->error; } else { - struct r300_vertex_program *vp = r300SelectVertexShader(ctx); - if (!vp->translated) - r300TranslateVertexShader(vp); + struct r300_vertex_program *vp = r300SelectAndTranslateVertexShader(ctx); return !vp->error; } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 12fbf281d9..3060f49aaf 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -62,8 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_emit.h" #include "r300_tex.h" #include "r300_fragprog_common.h" -#include "r300_fragprog.h" -#include "r500_fragprog.h" #include "r300_render.h" #include "r300_vertprog.h" @@ -373,6 +371,7 @@ static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) p = (GLint) plane - (GLint) GL_CLIP_PLANE0; ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + R300_STATECHANGE( rmesa, vap_flush ); R300_STATECHANGE( rmesa, vpucp[p] ); rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; @@ -458,7 +457,7 @@ static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth; + return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth; } static void r300SetEarlyZState(GLcontext * ctx) @@ -473,6 +472,8 @@ static void r300SetEarlyZState(GLcontext * ctx) topZ = R300_ZTOP_DISABLE; else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill) topZ = R300_ZTOP_DISABLE; + else if (r300->radeon.query.current) + topZ = R300_ZTOP_DISABLE; if (topZ != r300->hw.zstencil_format.cmd[2]) { /* Note: This completely reemits the stencil format. @@ -589,7 +590,9 @@ static void r300SetDepthState(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, zs); - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE|R300_STENCIL_FRONT_BACK; + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= (R300_STENCIL_ENABLE | + R300_STENCIL_FRONT_BACK | + R500_STENCIL_REFMASK_FRONT_BACK); r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); if (ctx->Depth.Test) { @@ -603,11 +606,16 @@ static void r300SetDepthState(GLcontext * ctx) static void r300CatchStencilFallback(GLcontext *ctx) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); const unsigned back = ctx->Stencil._BackFace; - if (ctx->Stencil._Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] - || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back] - || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) { + if (rmesa->radeon.radeonScreen->kernel_mm && + (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)) { + r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE); + } else if (ctx->Stencil._Enabled && + (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] + || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back] + || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) { r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE); } else { r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE); @@ -914,11 +922,24 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (flag << R300_S_BACK_FUNC_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R500_STENCIL_REFMASK_FRONT_BACK; + R300_STATECHANGE(rmesa, zsb); + refmask = ((ctx->Stencil.Ref[back] & 0xff) << R300_STENCILREF_SHIFT) + | ((ctx->Stencil.ValueMask[back] & 0xff) << R300_STENCILMASK_SHIFT); + + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] &= + ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) | + (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT)); + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= refmask; + } } static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) { r300ContextPtr rmesa = R300_CONTEXT(ctx); + const unsigned back = ctx->Stencil._BackFace; r300CatchStencilFallback(ctx); @@ -930,6 +951,13 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) (ctx->Stencil. WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT; + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + R300_STATECHANGE(rmesa, zsb); + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= + (ctx->Stencil. + WriteMask[back] & R300_STENCILREF_MASK) << + R300_STENCILWRITEMASK_SHIFT; + } } static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, @@ -1046,53 +1074,6 @@ void r300UpdateViewportOffset(GLcontext * ctx) radeonUpdateScissor(ctx); } -static void -r300FetchStateParameter(GLcontext * ctx, - const gl_state_index state[STATE_LENGTH], - GLfloat * value) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - - switch (state[0]) { - case STATE_INTERNAL: - switch (state[1]) { - case STATE_R300_WINDOW_DIMENSION: { - __DRIdrawablePrivate * drawable = radeon_get_drawable(&r300->radeon); - value[0] = drawable->w * 0.5f; /* width*0.5 */ - value[1] = drawable->h * 0.5f; /* height*0.5 */ - value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ - value[3] = 1.0F; /* not used */ - break; - } - - case STATE_R300_TEXRECT_FACTOR:{ - struct gl_texture_object *t = - ctx->Texture.Unit[state[2]].CurrentTex[TEXTURE_RECT_INDEX]; - - if (t && t->Image[0][t->BaseLevel]) { - struct gl_texture_image *image = - t->Image[0][t->BaseLevel]; - value[0] = 1.0 / image->Width2; - value[1] = 1.0 / image->Height2; - } else { - value[0] = 1.0; - value[1] = 1.0; - } - value[2] = 1.0; - value[3] = 1.0; - break; - } - - default: - break; - } - break; - - default: - break; - } -} - /** * Update R300's own internal state parameters. * For now just STATE_R300_WINDOW_DIMENSION @@ -1101,7 +1082,6 @@ static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) { r300ContextPtr rmesa = R300_CONTEXT(ctx); struct gl_program_parameter_list *paramList; - GLuint i; if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) return; @@ -1109,21 +1089,12 @@ static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) if (!ctx->FragmentProgram._Current || !rmesa->selected_fp) return; - paramList = rmesa->selected_fp->Base->Parameters; + paramList = ctx->FragmentProgram._Current->Base.Parameters; if (!paramList) return; _mesa_load_state_parameters(ctx, paramList); - - for (i = 0; i < paramList->NumParameters; i++) { - if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { - r300FetchStateParameter(ctx, - paramList->Parameters[i]. - StateIndexes, - paramList->ParameterValues[i]); - } - } } /* ============================================================= @@ -1230,7 +1201,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program_code *code = &r300->selected_fp->code.r300; + struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300; R300_STATECHANGE(r300, fpt); @@ -1272,7 +1243,7 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r500_fragment_program_code *code = &r300->selected_fp->code.r500; + struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500; /* find all the texture instructions and relocate the texture units */ for (i = 0; i < code->inst_end + 1; i++) { @@ -1312,6 +1283,7 @@ static GLuint translate_lod_bias(GLfloat bias) return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK; } + static void r300SetupTextures(GLcontext * ctx) { int i, mtu; @@ -1334,7 +1306,7 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.txe.cmd[R300_TXE_ENABLE] = 0x0; mtu = r300->radeon.glCtx->Const.MaxTextureUnits; - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "mtu=%d\n", mtu); if (mtu > R300_MAX_TEXTURE_UNITS) { @@ -1359,7 +1331,7 @@ static void r300SetupTextures(GLcontext * ctx) t->pp_txformat & 0xff); } - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "Activating texture unit %d\n", i); @@ -1404,6 +1376,28 @@ static void r300SetupTextures(GLcontext * ctx) } } + /* R3xx and R4xx chips require that the texture unit corresponding to + * KIL instructions is really enabled. + * + * We do some fakery here and in the state atom emit logic to enable + * the texture without tripping up the CS checker in the kernel. + */ + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) { + last_hw_tmu++; + + r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; + + r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.size.cmd[R300_TEX_VALUE_0] = 0; /* 1x1 texture */ + r300->hw.tex.format.cmd[R300_TEX_VALUE_0] = 0; /* A8 format */ + r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0] = 0; + } + } + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1); r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = @@ -1421,19 +1415,9 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); - if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) { - // The KILL operation requires the first texture unit - // to be enabled. - r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; - r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; - r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = - cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1); - } - } r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings); - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu); } @@ -1460,11 +1444,11 @@ static void r300SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - InputsRead = r300->selected_fp->Base->InputsRead; + InputsRead = r300->selected_fp->InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1554,11 +1538,11 @@ static void r500SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - InputsRead = r300->selected_fp->Base->InputsRead; + InputsRead = r300->selected_fp->InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1703,7 +1687,7 @@ void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr(cap), state ? "GL_TRUE" : "GL_FALSE"); @@ -1780,7 +1764,7 @@ static void r300ResetHwState(r300ContextPtr r300) has_tcl = r300->options.hw_tcl_enabled; - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "%s\n", __FUNCTION__); radeon_firevertices(&r300->radeon); @@ -1997,9 +1981,7 @@ void r300UpdateShaders(r300ContextPtr rmesa) { struct r300_fragment_program *fp; - fp = r300SelectFragmentShader(ctx); - if (!fp->translated) - r300TranslateFragmentShader(ctx, fp); + fp = r300SelectAndTranslateFragmentShader(ctx); r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error); } @@ -2024,9 +2006,7 @@ void r300UpdateShaders(r300ContextPtr rmesa) } } - vp = r300SelectVertexShader(ctx); - if (!vp->translated) - r300TranslateVertexShader(vp); + vp = r300SelectAndTranslateVertexShader(ctx); r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error); } @@ -2035,24 +2015,61 @@ void r300UpdateShaders(r300ContextPtr rmesa) rmesa->radeon.NewGLState = 0; } -static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, - struct gl_program *program, struct prog_src_register srcreg) +static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, GLfloat * buffer) { static const GLfloat dummy[4] = { 0, 0, 0, 0 }; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct rc_constant * rcc = &rmesa->selected_fp->code.constants.Constants[index]; + + switch(rcc->Type) { + case RC_CONSTANT_EXTERNAL: + return ctx->FragmentProgram._Current->Base.Parameters->ParameterValues[rcc->u.External]; + case RC_CONSTANT_IMMEDIATE: + return rcc->u.Immediate; + case RC_CONSTANT_STATE: + switch(rcc->u.State[0]) { + case RC_STATE_SHADOW_AMBIENT: { + const int unit = (int) rcc->u.State[1]; + const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current; + if (texObj) { + buffer[0] = + buffer[1] = + buffer[2] = + buffer[3] = texObj->CompareFailValue; + } + return buffer; + } - switch(srcreg.File) { - case PROGRAM_LOCAL_PARAM: - return program->LocalParams[srcreg.Index]; - case PROGRAM_ENV_PARAM: - return ctx->FragmentProgram.Parameters[srcreg.Index]; - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - return program->Parameters->ParameterValues[srcreg.Index]; - default: - _mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n"); - return dummy; + case RC_STATE_R300_WINDOW_DIMENSION: { + __DRIdrawablePrivate * drawable = radeon_get_drawable(&rmesa->radeon); + buffer[0] = drawable->w * 0.5f; /* width*0.5 */ + buffer[1] = drawable->h * 0.5f; /* height*0.5 */ + buffer[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ + buffer[3] = 1.0F; /* not used */ + return buffer; + } + + case RC_STATE_R300_TEXRECT_FACTOR: { + struct gl_texture_object *t = + ctx->Texture.Unit[rcc->u.State[1]].CurrentTex[TEXTURE_RECT_INDEX]; + + if (t && t->Image[0][t->BaseLevel]) { + struct gl_texture_image *image = + t->Image[0][t->BaseLevel]; + buffer[0] = 1.0 / image->Width2; + buffer[1] = 1.0 / image->Height2; + } else { + buffer[0] = 1.0; + buffer[1] = 1.0; + } + buffer[2] = 1.0; + buffer[3] = 1.0; + return buffer; + } + } } + + return dummy; } @@ -2061,9 +2078,9 @@ static void r300SetupPixelShader(GLcontext *ctx) r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_fragment_program *fp = rmesa->selected_fp; struct r300_fragment_program_code *code; - int i, k; + int i; - code = &fp->code.r300; + code = &fp->code.code.r300; R300_STATECHANGE(rmesa, fpi[0]); R300_STATECHANGE(rmesa, fpi[1]); @@ -2074,38 +2091,24 @@ static void r300SetupPixelShader(GLcontext *ctx) rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length); rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length); for (i = 0; i < code->alu.length; i++) { - rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0; - rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1; - rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2; - rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3; + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_inst; + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_addr; + rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_inst; + rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_addr; } R300_STATECHANGE(rmesa, fp); - rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3); - rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx; - rmesa->hw.fp.cmd[R300_FP_CNTL2] = - (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | - ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | - (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | - ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); - /* I just want to say, the way these nodes are stored.. weird.. */ - for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) { - if (i < (code->cur_node + 1)) { - rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = - (code->node[i].alu_offset << R300_ALU_START_SHIFT) | - (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) | - (code->node[i].tex_offset << R300_TEX_START_SHIFT) | - (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) | - code->node[i].flags; - } else { - rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; - } - } + rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->config; + rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->pixsize; + rmesa->hw.fp.cmd[R300_FP_CNTL2] = code->code_offset; + for (i = 0; i < 4; i++) + rmesa->hw.fp.cmd[R300_FP_NODE0 + i] = code->code_addr[i]; R300_STATECHANGE(rmesa, fpp); - rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4); - for (i = 0; i < code->const_nr; i++) { - const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]); + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4); + for (i = 0; i < fp->code.constants.Count; i++) { + GLfloat buffer[4]; + const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); @@ -2137,19 +2140,19 @@ static void r500SetupPixelShader(GLcontext *ctx) ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; - code = &fp->code.r500; + code = &fp->code.code.r500; R300_STATECHANGE(rmesa, fp); rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] = - R500_US_CODE_START_ADDR(code->inst_offset) | + R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end); rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] = - R500_US_CODE_RANGE_ADDR(code->inst_offset) | + R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end); rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] = - R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */ + R500_US_CODE_OFFSET_ADDR(0); R300_STATECHANGE(rmesa, r500fp); /* Emit our shader... */ @@ -2165,14 +2168,15 @@ static void r500SetupPixelShader(GLcontext *ctx) bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6); R300_STATECHANGE(rmesa, r500fp_const); - for (i = 0; i < code->const_nr; i++) { - const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]); + for (i = 0; i < fp->code.constants.Count; i++) { + GLfloat buffer[4]; + const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]); } - bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->code.constants.Count * 4); } void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten) @@ -2273,6 +2277,15 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) _mesa_update_draw_buffer_bounds(ctx); R300_STATECHANGE(r300, cb); + R300_STATECHANGE(r300, zb); + } + + if (new_state & (_NEW_LIGHT)) { + R300_STATECHANGE(r300, shade2); + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) + r300->hw.shade2.cmd[1] |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; + else + r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; } r300->radeon.NewGLState |= new_state; @@ -2345,13 +2358,9 @@ void r300InitShaderFunctions(r300ContextPtr r300) r300->vtbl.SetupRSUnit = r500SetupRSUnit; r300->vtbl.SetupPixelShader = r500SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; - r300->vtbl.BuildFragmentProgramHwCode = r500BuildFragmentProgramHwCode; - r300->vtbl.FragmentProgramDump = r500FragmentProgramDump; } else { r300->vtbl.SetupRSUnit = r300SetupRSUnit; r300->vtbl.SetupPixelShader = r300SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; - r300->vtbl.BuildFragmentProgramHwCode = r300BuildFragmentProgramHwCode; - r300->vtbl.FragmentProgramDump = r300FragmentProgramDump; } } diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 56ed519cf4..ee2c71e1a7 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -39,12 +39,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_emit.h" #include "r300_tex.h" #include "r300_render.h" +#include "main/simple_list.h" #define EMIT_ATTR( ATTR, STYLE ) \ do { \ - rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ - rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ - rmesa->radeon.swtcl.vertex_attr_count++; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ } while (0) #define EMIT_PAD( N ) \ @@ -76,12 +77,16 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ GLuint InputsRead = 0; GLuint OutputsWritten = 0; int num_attrs = 0; - GLuint fp_reads = rmesa->selected_fp->Base->InputsRead; + GLuint fp_reads = rmesa->selected_fp->InputsRead; struct vertex_attribute *attrs = rmesa->vbuf.attribs; + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; rmesa->radeon.swtcl.vertex_attr_count = 0; + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); + /* We always want non Ndc coords format */ VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; @@ -225,6 +230,7 @@ static void r300PrepareVertices(GLcontext *ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint InputsRead, OutputsWritten; + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten); r300SetupVAP(ctx, InputsRead, OutputsWritten); @@ -238,6 +244,33 @@ static void r300PrepareVertices(GLcontext *ctx) rmesa->radeon.swtcl.vertex_size /= 4; } +static void r300_predict_emit_size( r300ContextPtr rmesa ) +{ + if (!rmesa->radeon.swtcl.emit_prediction) { + const int vertex_size = 7; + const int prim_size = 3; + const int cache_flush_size = 4; + const int pre_emit_state = 4; + const int scissor_size = 3; + const int state_size = radeonCountStateEmitSize(&rmesa->radeon); + + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, + state_size + pre_emit_state + scissor_size + + vertex_size + prim_size + cache_flush_size * 2, + __FUNCTION__)) + rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon); + else + rmesa->radeon.swtcl.emit_prediction = state_size; + + rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw + + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state; + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, + "%s, size %d\n", + __func__, rmesa->radeon.cmdbuf.cs->cdw + + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state); + } +} + static GLuint reduced_prim[] = { GL_POINTS, @@ -271,11 +304,21 @@ static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); #define HAVE_POLYGONS 1 #define HAVE_ELTS 1 +static void* r300_alloc_verts(r300ContextPtr rmesa, GLuint n, GLuint size) +{ + void *rv; + do { + r300_predict_emit_size( rmesa ); + rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ); + } while (!rv); + return rv; +} + #undef LOCAL_VARS #undef ALLOC_VERTS #define CTX_ARG r300ContextPtr rmesa #define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size -#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ) +#define ALLOC_VERTS( n, size ) r300_alloc_verts(rmesa, n, size); #define LOCAL_VARS \ r300ContextPtr rmesa = R300_CONTEXT(ctx); \ const char *r300verts = (char *)rmesa->radeon.swtcl.verts; @@ -461,6 +504,7 @@ static void r300ChooseRenderState( GLcontext *ctx ) r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint index = 0; GLuint flags = ctx->_TriangleCaps; + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; @@ -485,9 +529,9 @@ static void r300ChooseRenderState( GLcontext *ctx ) } } - void r300RenderStart(GLcontext *ctx) { + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); r300ContextPtr rmesa = R300_CONTEXT( ctx ); r300ChooseRenderState(ctx); @@ -500,7 +544,6 @@ void r300RenderStart(GLcontext *ctx) r300UpdateShaderStates(rmesa); - r300EmitCacheFlush(rmesa); /* investigate if we can put back flush optimisation if needed */ if (rmesa->radeon.dma.flush != NULL) { @@ -515,6 +558,7 @@ void r300RenderFinish(GLcontext *ctx) static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r300ContextPtr rmesa = R300_CONTEXT(ctx); + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); if (rmesa->radeon.swtcl.hw_primitive != hwprim) { R300_NEWPRIM( rmesa ); @@ -527,6 +571,7 @@ void r300RenderPrimitive(GLcontext *ctx, GLenum prim) r300ContextPtr rmesa = R300_CONTEXT(ctx); rmesa->radeon.swtcl.render_primitive = prim; + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) return; @@ -536,6 +581,8 @@ void r300RenderPrimitive(GLcontext *ctx, GLenum prim) void r300ResetLineStipple(GLcontext *ctx) { + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); } void r300InitSwtcl(GLcontext *ctx) @@ -543,11 +590,13 @@ void r300InitSwtcl(GLcontext *ctx) TNLcontext *tnl = TNL_CONTEXT(ctx); r300ContextPtr rmesa = R300_CONTEXT(ctx); static int firsttime = 1; + radeon_print(RADEON_SWRENDER, RADEON_NORMAL, "%s\n", __func__); if (firsttime) { init_rast_tab(); firsttime = 0; } + rmesa->radeon.swtcl.emit_prediction = 0; tnl->Driver.Render.Start = r300RenderStart; tnl->Driver.Render.Finish = r300RenderFinish; @@ -580,8 +629,8 @@ static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct r { BATCH_LOCALS(&rmesa->radeon); - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", + radeon_print(RADEON_SWRENDER, RADEON_TRACE, + "%s: vertex_size %d, offset 0x%x \n", __FUNCTION__, vertex_size, offset); BEGIN_BATCH(7); @@ -596,6 +645,8 @@ static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vert { BATCH_LOCALS(&rmesa->radeon); int type, num_verts; + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); type = r300PrimitiveType(rmesa, primitive); num_verts = r300NumVerts(rmesa, vertex_nr, primitive); @@ -608,21 +659,26 @@ static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vert void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) { + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); r300ContextPtr rmesa = R300_CONTEXT(ctx); - rcommonEnsureCmdBufSpace(&rmesa->radeon, - rmesa->radeon.hw.max_state_size + (12*sizeof(int)), - __FUNCTION__); + r300EmitCacheFlush(rmesa); + radeonEmitState(&rmesa->radeon); r300_emit_scissor(ctx); r300EmitVertexAOS(rmesa, rmesa->radeon.swtcl.vertex_size, - rmesa->radeon.dma.current, + first_elem(&rmesa->radeon.dma.reserved)->bo, current_offset); r300EmitVbufPrim(rmesa, rmesa->radeon.swtcl.hw_primitive, rmesa->radeon.swtcl.numverts); r300EmitCacheFlush(rmesa); + if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", + rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); + rmesa->radeon.swtcl.emit_prediction = 0; COMMIT_BATCH(); } diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 0af5bb4f46..433e5a87d4 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -137,7 +137,7 @@ static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa | R300_TX_MIN_FILTER_ANISO | R300_TX_MIN_FILTER_MIP_LINEAR | aniso_filter(anisotropy); - if (RADEON_DEBUG & DEBUG_TEXTURE) + if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy); return; } @@ -197,7 +197,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, { radeonTexObj* t = radeon_tex_obj(texObj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { fprintf(stderr, "%s( %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr(pname)); } @@ -260,7 +260,7 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) r300ContextPtr rmesa = R300_CONTEXT(ctx); radeonTexObj* t = radeon_tex_obj(texObj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, _mesa_lookup_enum_by_nr(texObj->Target)); @@ -302,7 +302,7 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, t, _mesa_lookup_enum_by_nr(target)); } diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 6f489ace7b..f030451b28 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/teximage.h" #include "main/texobj.h" #include "main/enums.h" +#include "main/simple_list.h" #include "r300_context.h" #include "r300_state.h" @@ -323,7 +324,7 @@ GLboolean r300ValidateBuffers(GLcontext * ctx) RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } - ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) return GL_FALSE; return GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index de32013032..2f7b67c143 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -40,927 +40,125 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/prog_statevars.h" #include "tnl/tnl.h" -#include "radeon_nqssadce.h" +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_nqssadce.h" #include "r300_context.h" +#include "r300_fragprog_common.h" #include "r300_state.h" -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ - t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ - (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ - t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ - -/* - * Take an already-setup and valid source then swizzle it appropriately to - * obtain a constant ZERO or ONE source. +/** + * Write parameter array for the given vertex program into dst. + * Return the total number of components written. */ -#define __CONST(x, y) \ - (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_src_class(src[x].File), \ - VSF_FLAG_NONE) | (src[x].RelAddr << 4)) - -#define FREE_TEMPS() \ - do { \ - int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ - if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ - vp->error = GL_TRUE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) - -static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst) +static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst) { - int pi; - float *dst_o = dst; - struct gl_program_parameter_list *paramList; + int i; - if (vp->IsNVProgram) { + if (vp->Base->IsNVProgram) { _mesa_load_tracked_matrices(ctx); - - for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { - *dst++ = ctx->VertexProgram.Parameters[pi][0]; - *dst++ = ctx->VertexProgram.Parameters[pi][1]; - *dst++ = ctx->VertexProgram.Parameters[pi][2]; - *dst++ = ctx->VertexProgram.Parameters[pi][3]; + } else { + if (vp->Base->Base.Parameters) { + _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters); } - return dst - dst_o; } - if (!vp->Base.Parameters) - return 0; - - _mesa_load_state_parameters(ctx, vp->Base.Parameters); - - if (vp->Base.Parameters->NumParameters * 4 > - VSF_MAX_FRAGMENT_LENGTH) { + if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) { + /* Should have checked this earlier... */ fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); _mesa_exit(-1); } - paramList = vp->Base.Parameters; - for (pi = 0; pi < paramList->NumParameters; pi++) { - switch (paramList->Parameters[pi].Type) { - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); - case PROGRAM_CONSTANT: - *dst++ = paramList->ParameterValues[pi][0]; - *dst++ = paramList->ParameterValues[pi][1]; - *dst++ = paramList->ParameterValues[pi][2]; - *dst++ = paramList->ParameterValues[pi][3]; - break; - default: - _mesa_problem(NULL, "Bad param type in %s", - __FUNCTION__); - } - - } - - return dst - dst_o; -} - -static unsigned long t_dst_mask(GLuint mask) -{ - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & VSF_FLAG_ALL; -} - -static unsigned long t_dst_class(gl_register_file file) -{ - - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_DST_REG_TEMPORARY; - case PROGRAM_OUTPUT: - return PVS_DST_REG_OUT; - case PROGRAM_ADDRESS: - return PVS_DST_REG_A0; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; - } -} + for(i = 0; i < vp->code.constants.Count; ++i) { + const float * src = 0; + const struct rc_constant * constant = &vp->code.constants.Constants[i]; -static unsigned long t_dst_index(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT) - return vp->outputs[dst->Index]; + switch(constant->Type) { + case RC_CONSTANT_EXTERNAL: + if (vp->Base->IsNVProgram) { + src = ctx->VertexProgram.Parameters[constant->u.External]; + } else { + src = vp->Base->Base.Parameters->ParameterValues[constant->u.External]; + } + break; - return dst->Index; -} + case RC_CONSTANT_IMMEDIATE: + src = constant->u.Immediate; + break; + } -static unsigned long t_src_class(gl_register_file file) -{ - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_SRC_REG_TEMPORARY; - case PROGRAM_INPUT: - return PVS_SRC_REG_INPUT; - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - case PROGRAM_STATE_VAR: - return PVS_SRC_REG_CONSTANT; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; + dst[4*i] = src[0]; + dst[4*i + 1] = src[1]; + dst[4*i + 2] = src[2]; + dst[4*i + 3] = src[3]; } -} -static INLINE unsigned long t_swizzle(GLubyte swizzle) -{ -/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ - return swizzle; + return 4 * vp->code.constants.Count; } -#if 0 -static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) +static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads) { + GLbitfield outputs = 0; int i; - if (vp == NULL) { - fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, - caller); - return; - } - - fprintf(stderr, "%s:<", caller); - for (i = 0; i < VERT_ATTRIB_MAX; i++) - fprintf(stderr, "%d ", vp->inputs[i]); - fprintf(stderr, ">\n"); - -} -#endif - -static unsigned long t_src_index(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - if (src->File == PROGRAM_INPUT) { - assert(vp->inputs[src->Index] != -1); - return vp->inputs[src->Index]; - } else { - if (src->Index < 0) { - fprintf(stderr, - "negative offsets for indirect addressing do not work.\n"); - return 0; - } - return src->Index; - } -} - -/* these two functions should probably be merged... */ - -static unsigned long t_src(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 1)), - t_swizzle(GET_SWZ(src->Swizzle, 2)), - t_swizzle(GET_SWZ(src->Swizzle, 3)), - t_src_class(src->File), - src->Negate) | (src->RelAddr << 4); -} +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if (fpreads & (1 << (fp_attr))) \ + outputs |= (1 << (vp_result)); \ + } while (0) -static unsigned long t_src_scalar(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_src_class(src->File), - src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src->RelAddr << 4); -} + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); -static GLboolean valid_dst(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); + for (i = 0; i <= 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); } - return GL_TRUE; -} - -static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = 0; - - return inst; -} - -static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = - PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - PVS_SRC_SELECT_FORCE_1, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(*u_temp_i, - PVS_SRC_SELECT_X, - PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, - PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, - /* Not 100% sure about this */ - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE - /*VSF_FLAG_ALL */ ); - inst[3] = __CONST(0, SWIZZLE_ZERO); - (*u_temp_i)--; - - return inst; -} - -static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - - inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - return inst; -} - -static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - GL_FALSE, - GL_TRUE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = t_src(vp, &src[2]); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = t_src_scalar(vp, &src[1]); - - return inst; -} - -static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} +#undef ADD_OUTPUT -static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} + if ((fpreads & (1 << FRAG_ATTRIB_COL0)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0))) + outputs |= 1 << VERT_RESULT_BFC0; + if ((fpreads & (1 << FRAG_ATTRIB_COL1)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1))) + outputs |= 1 << VERT_RESULT_BFC1; -static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} + outputs |= 1 << VERT_RESULT_HPOS; + if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + outputs |= 1 << VERT_RESULT_PSIZ; -static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; + return outputs; } -static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - -#if 0 - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = 0; -#else - inst[0] = - PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ONE); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); -#endif - - return inst; -} -static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - */ - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = - PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE); - - (*u_temp_i)--; - - return inst; -} - -static void t_inputs_outputs(struct r300_vertex_program *vp) +static void t_inputs_outputs(struct r300_vertex_program_compiler * c) { int i; int cur_reg; GLuint OutputsWritten, InputsRead; - OutputsWritten = vp->Base->Base.OutputsWritten; - InputsRead = vp->Base->Base.InputsRead; + OutputsWritten = c->Base.Program.OutputsWritten; + InputsRead = c->Base.Program.InputsRead; cur_reg = -1; for (i = 0; i < VERT_ATTRIB_MAX; i++) { if (InputsRead & (1 << i)) - vp->inputs[i] = ++cur_reg; + c->code->inputs[i] = ++cur_reg; else - vp->inputs[i] = -1; + c->code->inputs[i] = -1; } cur_reg = 0; for (i = 0; i < VERT_RESULT_MAX; i++) - vp->outputs[i] = -1; + c->code->outputs[i] = -1; assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { - vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + c->code->outputs[VERT_RESULT_HPOS] = cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { - vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++; } /* If we're writing back facing colors we need to send @@ -970,668 +168,130 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) * get written into appropriate output vectors. */ if (OutputsWritten & (1 << VERT_RESULT_COL0)) { - vp->outputs[VERT_RESULT_COL0] = cur_reg++; + c->code->outputs[VERT_RESULT_COL0] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_COL1)) { - vp->outputs[VERT_RESULT_COL1] = cur_reg++; + c->code->outputs[VERT_RESULT_COL1] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + c->code->outputs[VERT_RESULT_BFC0] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + c->code->outputs[VERT_RESULT_BFC1] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { cur_reg++; } for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { if (OutputsWritten & (1 << i)) { - vp->outputs[i] = cur_reg++; + c->code->outputs[i] = cur_reg++; } } if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { - vp->outputs[VERT_RESULT_FOGC] = cur_reg++; - } -} - -void r300TranslateVertexShader(struct r300_vertex_program *vp) -{ - struct prog_instruction *vpi = vp->Base->Base.Instructions; - int i; - GLuint *inst; - unsigned long num_operands; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; - - vp->pos_end = 0; /* Not supported yet */ - vp->hw_code.length = 0; - vp->translated = GL_TRUE; - vp->error = GL_FALSE; - - t_inputs_outputs(vp); - - for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END; - vpi++, inst += 4) { - - FREE_TEMPS(); - - if (!valid_dst(vp, &vpi->DstReg)) { - /* redirect result to unused temp */ - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = u_temp_i; - } - - num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - - /* copy the sources (src) from mesa into a local variable... is this needed? */ - for (i = 0; i < num_operands; i++) { - src[i] = vpi->SrcReg[i]; - } - - if (num_operands == 3) { /* TODO: scalars */ - if (CMP_SRCS(src[1], src[2]) - || CMP_SRCS(src[0], src[2])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[2]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[2].File), - VSF_FLAG_NONE) | (src[2]. - RelAddr << - 4); - inst[2] = __CONST(2, SWIZZLE_ZERO); - inst[3] = __CONST(2, SWIZZLE_ZERO); - inst += 4; - - src[2].File = PROGRAM_TEMPORARY; - src[2].Index = u_temp_i; - src[2].RelAddr = 0; - u_temp_i--; - } - } - - if (num_operands >= 2) { - if (CMP_SRCS(src[1], src[0])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[0].File), - VSF_FLAG_NONE) | (src[0]. - RelAddr << - 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - src[0].File = PROGRAM_TEMPORARY; - src[0].Index = u_temp_i; - src[0].RelAddr = 0; - u_temp_i--; - } - } - - switch (vpi->Opcode) { - case OPCODE_ABS: - inst = r300TranslateOpcodeABS(vp, vpi, inst, src); - break; - case OPCODE_ADD: - inst = r300TranslateOpcodeADD(vp, vpi, inst, src); - break; - case OPCODE_ARL: - inst = r300TranslateOpcodeARL(vp, vpi, inst, src); - break; - case OPCODE_DP3: - inst = r300TranslateOpcodeDP3(vp, vpi, inst, src); - break; - case OPCODE_DP4: - inst = r300TranslateOpcodeDP4(vp, vpi, inst, src); - break; - case OPCODE_DPH: - inst = r300TranslateOpcodeDPH(vp, vpi, inst, src); - break; - case OPCODE_DST: - inst = r300TranslateOpcodeDST(vp, vpi, inst, src); - break; - case OPCODE_EX2: - inst = r300TranslateOpcodeEX2(vp, vpi, inst, src); - break; - case OPCODE_EXP: - inst = r300TranslateOpcodeEXP(vp, vpi, inst, src); - break; - case OPCODE_FLR: - inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - case OPCODE_FRC: - inst = r300TranslateOpcodeFRC(vp, vpi, inst, src); - break; - case OPCODE_LG2: - inst = r300TranslateOpcodeLG2(vp, vpi, inst, src); - break; - case OPCODE_LIT: - inst = r300TranslateOpcodeLIT(vp, vpi, inst, src); - break; - case OPCODE_LOG: - inst = r300TranslateOpcodeLOG(vp, vpi, inst, src); - break; - case OPCODE_MAD: - inst = r300TranslateOpcodeMAD(vp, vpi, inst, src); - break; - case OPCODE_MAX: - inst = r300TranslateOpcodeMAX(vp, vpi, inst, src); - break; - case OPCODE_MIN: - inst = r300TranslateOpcodeMIN(vp, vpi, inst, src); - break; - case OPCODE_MOV: - inst = r300TranslateOpcodeMOV(vp, vpi, inst, src); - break; - case OPCODE_MUL: - inst = r300TranslateOpcodeMUL(vp, vpi, inst, src); - break; - case OPCODE_POW: - inst = r300TranslateOpcodePOW(vp, vpi, inst, src); - break; - case OPCODE_RCP: - inst = r300TranslateOpcodeRCP(vp, vpi, inst, src); - break; - case OPCODE_RSQ: - inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src); - break; - case OPCODE_SGE: - inst = r300TranslateOpcodeSGE(vp, vpi, inst, src); - break; - case OPCODE_SLT: - inst = r300TranslateOpcodeSLT(vp, vpi, inst, src); - break; - case OPCODE_SUB: - inst = r300TranslateOpcodeSUB(vp, vpi, inst, src); - break; - case OPCODE_SWZ: - inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src); - break; - case OPCODE_XPD: - inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - default: - vp->error = GL_TRUE; - break; - } - } - - vp->hw_code.length = (inst - vp->hw_code.body.d); - if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) { - vp->error = GL_TRUE; + c->code->outputs[VERT_RESULT_FOGC] = cur_reg++; } } -static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) -{ - struct prog_instruction *vpi; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); - - vpi = &prog->Instructions[prog->NumInstructions - 3]; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_HPOS; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_END; -} - -static void pos_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - GLuint tempregi = prog->NumTemporaries; - - prog->NumTemporaries++; - - for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = tempregi; - } - } - - insert_wpos(prog, tempregi, tex_id); - - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - /** - * The fogcoord attribute is special in that only the first component - * is relevant, and the remaining components are always fixed (when read - * from by the fragment program) to yield an X001 pattern. + * The NV_vertex_program spec mandates that all registers be + * initialized to zero. We do this here unconditionally. * - * We need to enforce this either in the vertex program or in the fragment - * program, and this code chooses not to enforce it in the vertex program. - * This is slightly cheaper, as long as the fragment program does not use - * weird swizzles. - * - * And it seems that usually, weird swizzles are not used, so... - * - * See also the counterpart rewriting for fragment programs. + * \note We rely on dead-code elimination in the compiler. */ -static void fog_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - - vpi = prog->Instructions; - while (vpi->Opcode != OPCODE_END) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_X; - } - - ++vpi; - } - - prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - -static int translateABS(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_MAX; - inst->SrcReg[1] = inst->SrcReg[0]; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateDP3(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - return 0; -} - -static int translateDPH(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); - - return 0; -} - -static int translateFLR(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - struct prog_dst_register dst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - dst = inst->DstReg; - - inst->Opcode = OPCODE_FRC; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - ++inst; - - inst->Opcode = OPCODE_ADD; - inst->DstReg = dst; - inst->SrcReg[0] = (inst-1)->SrcReg[0]; - inst->SrcReg[1].File = PROGRAM_TEMPORARY; - inst->SrcReg[1].Index = tmp_idx; - inst->SrcReg[1].Negate = NEGATE_XYZW; - - return 1; -} - -static int translateSUB(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_ADD; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateSWZ(struct gl_program *prog, int pos) -{ - prog->Instructions[pos].Opcode = OPCODE_MOV; - - return 0; -} - -static int translateXPD(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - - *(inst+1) = *inst; - - inst->Opcode = OPCODE_MUL; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - ++inst; - - inst->Opcode = OPCODE_MAD; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - inst->SrcReg[2].File = PROGRAM_TEMPORARY; - inst->SrcReg[2].Index = tmp_idx; - - return 1; -} - -static void translateInsts(struct gl_program *prog) +static void initialize_NV_registers(struct radeon_compiler * compiler) { - struct prog_instruction *inst; - int i; + unsigned int reg; + struct rc_instruction * inst; - for (i = 0; i < prog->NumInstructions; ++i) { - inst = &prog->Instructions[i]; - - switch (inst->Opcode) { - case OPCODE_ABS: - i += translateABS(prog, i); - break; - case OPCODE_DP3: - i += translateDP3(prog, i); - break; - case OPCODE_DPH: - i += translateDPH(prog, i); - break; - case OPCODE_FLR: - i += translateFLR(prog, i); - break; - case OPCODE_SUB: - i += translateSUB(prog, i); - break; - case OPCODE_SWZ: - i += translateSWZ(prog, i); - break; - case OPCODE_XPD: - i += translateXPD(prog, i); - break; - default: - break; - } + for(reg = 0; reg < 12; ++reg) { + inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = reg; + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_0000; } -} - -#define ADD_OUTPUT(fp_attr, vp_result) \ - do { \ - if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \ - OutputsAdded |= 1 << (vp_result); \ - count++; \ - } \ - } while (0) - -static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - GLuint OutputsAdded, FpReads; - int i, count; - - OutputsAdded = 0; - count = 0; - FpReads = r300->selected_fp->Base->InputsRead; - - ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); - ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); - - for (i = 0; i < 7; ++i) { - ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); - } - - /* Some outputs may be artificially added, to match the inputs of the fragment program. - * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by - * vertex program are undefined, so just use MOV [vertex_result], CONST[0] - */ - if (count > 0) { - struct prog_instruction *inst; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, count); - inst = &prog->Instructions[prog->NumInstructions - 1 - count]; - for (i = 0; i < VERT_RESULT_MAX; ++i) { - if (OutputsAdded & (1 << i)) { - inst->Opcode = OPCODE_MOV; - - inst->DstReg.File = PROGRAM_OUTPUT; - inst->DstReg.Index = i; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->DstReg.CondMask = COND_TR; - - inst->SrcReg[0].File = PROGRAM_CONSTANT; - inst->SrcReg[0].Index = 0; - inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++inst; - } - } - - prog->OutputsWritten |= OutputsAdded; - } -} - -#undef ADD_OUTPUT - -static void nqssadceInit(struct nqssadce_state* s) -{ - r300ContextPtr r300 = R300_CONTEXT(s->Ctx); - GLuint fp_reads; - - fp_reads = r300->selected_fp->Base->InputsRead; - { - if (fp_reads & FRAG_BIT_COL0) { - s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; - } - - if (fp_reads & FRAG_BIT_COL1) { - s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; - } - } - - { - int i; - for (i = 0; i < 8; ++i) { - if (fp_reads & FRAG_BIT_TEX(i)) { - s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; - } - } - } - - s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; - if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) - s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; -} - -static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) -{ - (void) opcode; - (void) reg; - - return GL_TRUE; + inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); + inst->I.Opcode = OPCODE_ARL; + inst->I.DstReg.File = PROGRAM_ADDRESS; + inst->I.DstReg.Index = 0; + inst->I.DstReg.WriteMask = WRITEMASK_X; + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_0000; } static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program_key *wanted_key, const struct gl_vertex_program *mesa_vp) { - r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program *vp; - struct gl_program *prog; + struct r300_vertex_program_compiler compiler; vp = _mesa_calloc(sizeof(*vp)); vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - prog = &vp->Base->Base; + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE; - if (RADEON_DEBUG & DEBUG_VERTS) { + compiler.code = &vp->code; + compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads); + compiler.SetHwInputOutput = &t_inputs_outputs; + + if (compiler.Base.Debug) { fprintf(stderr, "Initial vertex program:\n"); - _mesa_print_program(prog); - fflush(stdout); + _mesa_print_program(&vp->Base->Base); + fflush(stderr); } - if (vp->Base->IsPositionInvariant) { + if (mesa_vp->IsPositionInvariant) { _mesa_insert_mvp_code(ctx, vp->Base); } - if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { - pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0); - } + rc_mesa_to_rc_program(&compiler.Base, &vp->Base->Base); - if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { - fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0); - } + if (mesa_vp->IsNVProgram) + initialize_NV_registers(&compiler.Base); - addArtificialOutputs(ctx, prog); + rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X); - translateInsts(prog); - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after native rewrite:\n"); - _mesa_print_program(prog); - fflush(stdout); + if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) { + rc_copy_output(&compiler.Base, + VERT_RESULT_HPOS, + vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0); } - { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadceInit, - .IsNativeSwizzle = &swizzleIsNative, - .BuildSwizzle = NULL - }; - radeonNqssaDce(ctx, prog, &nqssadce); - - /* We need this step for reusing temporary registers */ - _mesa_optimize_program(ctx, prog); - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after NQSSADCE:\n"); - _mesa_print_program(prog); - fflush(stdout); - } + if (vp->key.FogAttr != FRAG_ATTRIB_MAX) { + rc_move_output(&compiler.Base, + VERT_RESULT_FOGC, + vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X); } - assert(prog->NumInstructions); - { - struct prog_instruction *inst; - int max, i, tmp; - - inst = prog->Instructions; - max = -1; - while (inst->Opcode != OPCODE_END) { - tmp = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < tmp; ++i) { - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { - if ((int) inst->SrcReg[i].Index > max) { - max = inst->SrcReg[i].Index; - } - } - } + r3xx_compile_vertex_program(&compiler); + vp->error = compiler.Base.Error; - if (_mesa_num_inst_dst_regs(inst->Opcode)) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - if ((int) inst->DstReg.Index > max) { - max = inst->DstReg.Index; - } - } - } - ++inst; - } + vp->Base->Base.InputsRead = vp->code.InputsRead; + vp->Base->Base.OutputsWritten = vp->code.OutputsWritten; - /* We actually want highest index of used temporary register, - * not the number of temporaries used. - * These values aren't always the same. - */ - vp->num_temporaries = max + 1; - } + rc_destroy(&compiler.Base); return vp; } -struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) +struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program_key wanted_key = { 0 }; @@ -1639,7 +299,21 @@ struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) struct r300_vertex_program *vp; vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; - wanted_key.FpReads = r300->selected_fp->Base->InputsRead; + + if (!r300->selected_fp) { + /* This can happen when GetProgramiv is called to check + * whether the program runs natively. + * + * To be honest, this is not a very good solution, + * but solving the problem of reporting good values + * for those queries is tough anyway considering that + * we recompile vertex programs based on the precise + * fragment program that is in use. + */ + r300SelectAndTranslateFragmentShader(ctx); + } + + wanted_key.FpReads = r300->selected_fp->InputsRead; wanted_key.FogAttr = r300->selected_fp->fog_attr; wanted_key.WPosAttr = r300->selected_fp->wpos_attr; @@ -1664,12 +338,14 @@ struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ } while(0) -static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code) +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code) { int i; assert((code->length > 0) && (code->length % 4 == 0)); + R300_STATECHANGE( r300, vap_flush ); + switch ((dest >> 8) & 0xf) { case 0: R300_STATECHANGE(r300, vpi); @@ -1707,16 +383,17 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + R300_STATECHANGE(rmesa, vap_flush); R300_STATECHANGE(rmesa, vpp); - param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code)); - inst_count = (prog->hw_code.length / 4) - 1; + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code)); + inst_count = (prog->code.length / 4) - 1; - r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead), - _mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries); + r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead), + _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries); R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index 2dab11c337..ccec896be4 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -3,39 +3,9 @@ #include "r300_reg.h" -#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ - (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ - | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ - | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ - | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ - | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ - | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) - -#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ - (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ - | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ - | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ - | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ - | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ - | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ - | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) - -#if 1 - -#define VSF_FLAG_X 1 -#define VSF_FLAG_Y 2 -#define VSF_FLAG_Z 4 -#define VSF_FLAG_W 8 -#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z) -#define VSF_FLAG_ALL 0xf -#define VSF_FLAG_NONE 0 - -#endif void r300SetupVertexProgram(r300ContextPtr rmesa); -struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx); - -void r300TranslateVertexShader(struct r300_vertex_program *vp); +struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx); #endif diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.c b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c new file mode 120000 index 0000000000..f6a5f66470 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.h b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h new file mode 120000 index 0000000000..2f134fd17b --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h index 250570f6b8..da4812d323 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.h +++ b/src/mesa/drivers/dri/r300/radeon_context.h @@ -51,26 +51,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_screen.h" -#if R200_MERGED -extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode); - -#define FALLBACK( radeon, bit, mode ) do { \ - if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ - __FUNCTION__, bit, mode ); \ - radeonFallback( (radeon)->glCtx, bit, mode ); \ -} while (0) -#else #define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__); -#endif /* TCL fallbacks */ extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode); -#if R200_MERGED -#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode ) -#else #define TCL_FALLBACK( ctx, bit, mode ) ; -#endif #endif /* __RADEON_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r300/radeon_debug.c b/src/mesa/drivers/dri/r300/radeon_debug.c new file mode 120000 index 0000000000..c98c2e074c --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_debug.c @@ -0,0 +1 @@ +../radeon/radeon_debug.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_debug.h b/src/mesa/drivers/dri/r300/radeon_debug.h new file mode 120000 index 0000000000..bd8aa28e89 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_debug.h @@ -0,0 +1 @@ +../radeon/radeon_debug.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c deleted file mode 100644 index da5e7aefce..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program.h" - -#include "shader/prog_print.h" - - -/** - * Transform the given clause in the following way: - * 1. Replace it with an empty clause - * 2. For every instruction in the original clause, try the given - * transformations in order. - * 3. If one of the transformations returns GL_TRUE, assume that it - * has emitted the appropriate instruction(s) into the new clause; - * otherwise, copy the instruction verbatim. - * - * \note The transformation is currently not recursive; in other words, - * instructions emitted by transformations are not transformed. - * - * \note The transform is called 'local' because it can only look at - * one instruction at a time. - */ -void radeonLocalTransform( - GLcontext *Ctx, - struct gl_program *program, - int num_transformations, - struct radeon_program_transformation* transformations) -{ - struct radeon_transform_context ctx; - int ip; - - ctx.Ctx = Ctx; - ctx.Program = program; - ctx.OldInstructions = program->Instructions; - ctx.OldNumInstructions = program->NumInstructions; - - program->Instructions = 0; - program->NumInstructions = 0; - - for(ip = 0; ip < ctx.OldNumInstructions; ++ip) { - struct prog_instruction *instr = ctx.OldInstructions + ip; - int i; - - for(i = 0; i < num_transformations; ++i) { - struct radeon_program_transformation* t = transformations + i; - - if (t->function(&ctx, instr, t->userData)) - break; - } - - if (i >= num_transformations) { - struct prog_instruction* dest = radeonAppendInstructions(program, 1); - _mesa_copy_instructions(dest, instr, 1); - } - } - - _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions); -} - - -static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count) -{ - GLuint i; - for (i = 0; i < count; i++) { - const struct prog_instruction *inst = insts + i; - const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); - GLuint k; - - for (k = 0; k < n; k++) { - if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) - used[inst->SrcReg[k].Index] = GL_TRUE; - } - } -} - -GLint radeonFindFreeTemporary(struct radeon_transform_context *t) -{ - GLboolean used[MAX_PROGRAM_TEMPS]; - GLuint i; - - _mesa_memset(used, 0, sizeof(used)); - scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions); - scan_instructions(used, t->OldInstructions, t->OldNumInstructions); - - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { - if (!used[i]) - return i; - } - - return -1; -} - - -/** - * Append the given number of instructions to the program and return a - * pointer to the first new instruction. - */ -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count) -{ - int oldnum = program->NumInstructions; - _mesa_insert_instructions(program, oldnum, count); - return program->Instructions + oldnum; -} diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.c b/src/mesa/drivers/dri/r300/radeon_queryobj.c new file mode 120000 index 0000000000..1d6ebc1c48 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_queryobj.c @@ -0,0 +1 @@ +../radeon/radeon_queryobj.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.h b/src/mesa/drivers/dri/r300/radeon_queryobj.h new file mode 120000 index 0000000000..8f6f842b0a --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_queryobj.h @@ -0,0 +1 @@ +../radeon/radeon_queryobj.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 5bdc1afbf0..7d5a7b1ab6 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -27,16 +27,19 @@ COMMON_SOURCES = \ ../common/dri_util.c RADEON_COMMON_SOURCES = \ - radeon_texture.c \ + radeon_bo_legacy.c \ radeon_common_context.c \ + radeon_buffer_objects.c \ radeon_common.c \ + radeon_cs_legacy.c \ radeon_dma.c \ + radeon_debug.c \ + radeon_fbo.c \ radeon_lock.c \ - radeon_bo_legacy.c \ - radeon_cs_legacy.c \ radeon_mipmap_tree.c \ radeon_span.c \ - radeon_fbo.c + radeon_texture.c \ + radeon_queryobj.c DRIVER_SOURCES = \ radeon_screen.c \ @@ -63,8 +66,7 @@ DRIVER_SOURCES = \ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) -DRIVER_DEFINES = -DCOMPILE_R600 -DR200_MERGED=0 \ - -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R600 \ +DRIVER_DEFINES = -DRADEON_R600 \ # -DRADEON_BO_TRACK \ -Wall diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index 74fec02584..3cfe03a45f 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -74,12 +74,11 @@ static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm, return cs; } -int r600_cs_write_reloc(struct radeon_cs *cs, - struct radeon_bo *bo, - uint32_t read_domain, - uint32_t write_domain, - uint32_t flags, - offset_modifiers* poffset_mod) +static int r600_cs_write_reloc(struct radeon_cs *cs, + struct radeon_bo *bo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t flags) { struct r600_cs_reloc_legacy *relocs; int i; @@ -130,14 +129,10 @@ int r600_cs_write_reloc(struct radeon_cs *cs, } relocs[i].indices = indices; relocs[i].reloc_indices = reloc_indices; - relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1; - relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->section_cdw; - cs->section_ndw += 2; + relocs[i].indices[relocs[i].cindices - 1] = cs->cdw; + relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->cdw; cs->section_cdw += 2; - - relocs[i].offset_mod.shift = poffset_mod->shift; - relocs[i].offset_mod.shiftbits = poffset_mod->shiftbits; - relocs[i].offset_mod.mask = poffset_mod->mask; + cs->cdw += 2; return 0; } @@ -160,14 +155,11 @@ int r600_cs_write_reloc(struct radeon_cs *cs, { return -ENOMEM; } - relocs[cs->crelocs].offset_mod.shift = poffset_mod->shift; - relocs[cs->crelocs].offset_mod.shiftbits = poffset_mod->shiftbits; - relocs[cs->crelocs].offset_mod.mask = poffset_mod->mask; - relocs[cs->crelocs].indices[0] = cs->cdw - 1; - relocs[cs->crelocs].reloc_indices[0] = cs->section_cdw; - cs->section_ndw += 2; + relocs[cs->crelocs].indices[0] = cs->cdw; + relocs[cs->crelocs].reloc_indices[0] = cs->cdw; cs->section_cdw += 2; + cs->cdw += 2; relocs[cs->crelocs].cindices = 1; cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo); cs->crelocs++; @@ -191,11 +183,18 @@ static int r600_cs_begin(struct radeon_cs *cs, return -EPIPE; } - if (cs->cdw + ndw + 32 > cs->ndw) { /* Left 32 DWORD (8 offset+pitch) spare room for reloc indices */ + cs->section = 1; + cs->section_ndw = ndw; + cs->section_cdw = 0; + cs->section_file = file; + cs->section_func = func; + cs->section_line = line; + + if (cs->cdw + ndw > cs->ndw) { uint32_t tmp, *ptr; - int num = (ndw > 0x3FF) ? ndw : 0x3FF; + int num = (ndw > 0x400) ? ndw : 0x400; - tmp = (cs->cdw + 1 + num) & (~num); + tmp = (cs->cdw + num + 0x3FF) & (~0x3FF); ptr = (uint32_t*)realloc(cs->packets, 4 * tmp); if (ptr == NULL) { return -ENOMEM; @@ -204,13 +203,6 @@ static int r600_cs_begin(struct radeon_cs *cs, cs->ndw = tmp; } - cs->section = 1; - cs->section_ndw = 0; - cs->section_cdw = cs->cdw + ndw; /* start of reloc indices. */ - cs->section_file = file; - cs->section_func = func; - cs->section_line = line; - return 0; } @@ -227,8 +219,7 @@ static int r600_cs_end(struct radeon_cs *cs, } cs->section = 0; - if ( (cs->section_ndw + cs->cdw) != cs->section_cdw ) - { + if ( cs->section_ndw != cs->section_cdw ) { fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n", cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw); fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n", @@ -238,7 +229,14 @@ static int r600_cs_end(struct radeon_cs *cs, return -EPIPE; } - cs->cdw = cs->section_cdw; + if (cs->cdw > cs->ndw) { + fprintf(stderr, "CS section overflow at (%s,%s,%d) cdw %d ndw %d\n", + cs->section_file, cs->section_func, cs->section_line,cs->cdw,cs->ndw); + fprintf(stderr, "CS section end at (%s,%s,%d)\n", + file, func, line); + assert(0); + } + return 0; } @@ -255,65 +253,44 @@ static int r600_cs_process_relocs(struct radeon_cs *cs, csm = (struct r600_cs_manager_legacy*)cs->csm; relocs = (struct r600_cs_reloc_legacy *)cs->relocs; restart: - for (i = 0; i < cs->crelocs; i++) - { - for (j = 0; j < relocs[i].cindices; j++) - { + for (i = 0; i < cs->crelocs; i++) { uint32_t soffset, eoffset, asicoffset; r = radeon_bo_legacy_validate(relocs[i].base.bo, - &soffset, &eoffset); - if (r == -EAGAIN) - { - goto restart; + &soffset, &eoffset); + if (r == -EAGAIN) { + goto restart; } - if (r) - { - fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", - relocs[i].base.bo, soffset, eoffset); - return r; + if (r) { + fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", + relocs[i].base.bo, soffset, eoffset); + return r; } asicoffset = soffset; - if (asicoffset >= eoffset) - { - /* radeon_bo_debug(relocs[i].base.bo, 12); */ - fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", - relocs[i].base.bo, soffset, eoffset); - fprintf(stderr, "above end: %p 0x%08X 0x%08X\n", - relocs[i].base.bo, - cs->packets[relocs[i].indices[j]], - eoffset); - exit(0); - return -EINVAL; - } - /* apply offset operator */ - switch (relocs[i].offset_mod.shift) - { - case NO_SHIFT: - asicoffset = asicoffset & relocs[i].offset_mod.mask; - break; - case LEFT_SHIFT: - asicoffset = (asicoffset << relocs[i].offset_mod.shiftbits) & relocs[i].offset_mod.mask; - break; - case RIGHT_SHIFT: - asicoffset = (asicoffset >> relocs[i].offset_mod.shiftbits) & relocs[i].offset_mod.mask; - break; - default: - break; - }; - - /* pkt3 nop header in ib chunk */ - cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000; - - /* reloc index in ib chunk */ - cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw; - - /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */ - reloc_chunk[offset_dw] = asicoffset; - reloc_chunk[offset_dw + 3] = 0; - - offset_dw += 4; - } + + for (j = 0; j < relocs[i].cindices; j++) { + if (asicoffset >= eoffset) { + /* radeon_bo_debug(relocs[i].base.bo, 12); */ + fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", + relocs[i].base.bo, soffset, eoffset); + fprintf(stderr, "above end: %p 0x%08X 0x%08X\n", + relocs[i].base.bo, + cs->packets[relocs[i].indices[j]], + eoffset); + exit(0); + return -EINVAL; + } + /* pkt3 nop header in ib chunk */ + cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000; + /* reloc index in ib chunk */ + cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw; + } + + /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */ + reloc_chunk[offset_dw] = asicoffset; + reloc_chunk[offset_dw + 3] = 0; + + offset_dw += 4; } *length_dw_reloc_chunk = offset_dw; @@ -335,6 +312,7 @@ static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */ return 0; } +#if 0 static void dump_cmdbuf(struct radeon_cs *cs) { int i; @@ -345,61 +323,32 @@ static void dump_cmdbuf(struct radeon_cs *cs) fprintf(stderr,"--end--\n"); } +#endif static int r600_cs_emit(struct radeon_cs *cs) { struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm; struct drm_radeon_cs cs_cmd; struct drm_radeon_cs_chunk cs_chunk[2]; - drm_radeon_cmd_buffer_t cmd; - /* drm_r300_cmd_header_t age; */ uint32_t length_dw_reloc_chunk; - uint64_t ull; uint64_t chunk_ptrs[2]; - uint32_t reloc_chunk[128]; + uint32_t *reloc_chunk; int r; int retry = 0; /* TODO : put chip level things here if need. */ /* csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); */ - BATCH_LOCALS(csm->ctx); - drm_radeon_getparam_t gp; - uint32_t current_scratchx_age; - - gp.param = RADEON_PARAM_LAST_CLEAR; - gp.value = (int *)¤t_scratchx_age; - r = drmCommandWriteRead(cs->csm->fd, - DRM_RADEON_GETPARAM, - &gp, - sizeof(gp)); - if (r) - { - fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r); - exit(1); - } - - csm->pending_age = 0; csm->pending_count = 1; - current_scratchx_age++; - csm->pending_age = current_scratchx_age; - - BEGIN_BATCH_NO_AUTOSTATE(3); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH((SCRATCH_REG2 - R600_SET_CONFIG_REG_OFFSET) >> 2); - R600_OUT_BATCH(current_scratchx_age); - END_BATCH(); - COMMIT_BATCH(); + reloc_chunk = (uint32_t*)calloc(1, cs->crelocs * 4 * 4); - //TODO ioctl to get back cs id assigned in drm - //csm->pending_age = cs_id_back; - - r = r600_cs_process_relocs(cs, &(reloc_chunk[0]), &length_dw_reloc_chunk); + r = r600_cs_process_relocs(cs, reloc_chunk, &length_dw_reloc_chunk); if (r) { + free(reloc_chunk); return 0; } - + /* raw ib chunk */ cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB; cs_chunk[0].length_dw = cs->cdw; @@ -408,7 +357,7 @@ static int r600_cs_emit(struct radeon_cs *cs) /* reloc chaunk */ cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS; cs_chunk[1].length_dw = length_dw_reloc_chunk; - cs_chunk[1].chunk_data = (unsigned long)&(reloc_chunk[0]); + cs_chunk[1].chunk_data = (unsigned long)reloc_chunk; chunk_ptrs[0] = (uint64_t)(unsigned long)&(cs_chunk[0]); chunk_ptrs[1] = (uint64_t)(unsigned long)&(cs_chunk[1]); @@ -426,15 +375,20 @@ static int r600_cs_emit(struct radeon_cs *cs) } while (r == -EAGAIN && retry < 1000); if (r) { + free(reloc_chunk); return r; } + csm->pending_age = cs_cmd.cs_id; + r600_cs_set_age(cs); cs->csm->read_used = 0; cs->csm->vram_write_used = 0; cs->csm->gart_write_used = 0; + free(reloc_chunk); + return 0; } @@ -513,9 +467,11 @@ struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_cont void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */ { - radeonContextPtr rmesa = &r600->radeon; - - GLuint size; + radeonContextPtr rmesa = &r600->radeon; + GLuint size; + + r600InitAtoms(r600); + /* Initialize command buffer */ size = 256 * driQueryOptioni(&rmesa->optionCache, "command_buffer_size"); @@ -538,7 +494,10 @@ void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */ rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size); assert(rmesa->cmdbuf.cs != NULL); rmesa->cmdbuf.size = size; - + + radeon_cs_space_set_flush(rmesa->cmdbuf.cs, + (void (*)(void *))rmesa->glCtx->Driver.Flush, rmesa->glCtx); + if (!rmesa->radeonScreen->kernel_mm) { radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]); radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size); diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h index bd1ed7fdff..eba43d37b6 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h @@ -132,54 +132,37 @@ struct r600_cs_reloc_legacy { uint32_t cindices; uint32_t *indices; uint32_t *reloc_indices; - struct offset_modifiers offset_mod; }; -extern int r600_cs_write_reloc(struct radeon_cs *cs, - struct radeon_bo *bo, - uint32_t read_domain, - uint32_t write_domain, - uint32_t flags, - offset_modifiers* poffset_mod); - -static inline void r600_cs_write_dword(struct radeon_cs *cs, uint32_t dword) -{ - cs->packets[cs->cdw++] = dword; -} - struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx); /** * Write one dword to the command buffer. */ -#define R600_OUT_BATCH(data) \ - do { \ - r600_cs_write_dword(b_l_rmesa->cmdbuf.cs, data);\ - } while(0) +#define R600_OUT_BATCH(data) \ +do { \ + radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, data); \ +} while(0) /** * Write n dwords from ptr to the command buffer. */ -#define R600_OUT_BATCH_TABLE(ptr,n) \ - do { \ - int _i; \ - for (_i=0; _i < n; _i++) {\ - r600_cs_write_dword(b_l_rmesa->cmdbuf.cs, ptr[_i]);\ - }\ - } while(0) +#define R600_OUT_BATCH_TABLE(ptr,n) \ +do { \ + radeon_cs_write_table(b_l_rmesa->cmdbuf.cs, ptr, n); \ +} while(0) /** * Write a relocated dword to the command buffer. */ -#define R600_OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags, offset_mod) \ +#define R600_OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) \ do { \ if (0 && offset) { \ fprintf(stderr, "(%s:%s:%d) offset : %d\n", \ __FILE__, __FUNCTION__, __LINE__, offset); \ } \ - r600_cs_write_dword(b_l_rmesa->cmdbuf.cs, offset); \ - r600_cs_write_reloc(b_l_rmesa->cmdbuf.cs, \ - bo, rd, wd, flags, offset_mod); \ + radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, \ + bo, rd, wd, flags); \ } while(0) /* R600/R700 */ diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index eacf811282..969144ba12 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -56,8 +56,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drivers/common/driverfuncs.h" +#include "radeon_debug.h" #include "r600_context.h" #include "radeon_common_context.h" +#include "radeon_buffer_objects.h" #include "radeon_span.h" #include "r600_cmdbuf.h" #include "r600_emit.h" @@ -84,6 +86,7 @@ int hw_tcl_on = 1; #define need_GL_EXT_framebuffer_object #define need_GL_EXT_fog_coord #define need_GL_EXT_gpu_program_parameters +#define need_GL_EXT_provoking_vertex #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side #define need_GL_ATI_separate_stencil @@ -116,6 +119,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_packed_depth_stencil", NULL}, {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, + {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, {"GL_EXT_shadow_funcs", NULL}, {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, @@ -127,6 +131,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_texture_lod_bias", NULL}, {"GL_EXT_texture_mirror_clamp", NULL}, {"GL_EXT_texture_rectangle", NULL}, + {"GL_EXT_texture_sRGB", NULL}, {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions}, {"GL_ATI_texture_env_combine3", NULL}, {"GL_ATI_texture_mirror_once", NULL}, @@ -185,7 +190,7 @@ static void r600_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmes static void r600_vtbl_pre_emit_atoms(radeonContextPtr radeon) { - /* to be enabled */ + r700Start3D((context_t *)radeon); } static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) @@ -225,8 +230,10 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, /* Allocate the R600 context */ r600 = (context_t*) CALLOC(sizeof(*r600)); - if (!r600) + if (!r600) { + radeon_error("Failed to allocate memory for context.\n"); return GL_FALSE; + } if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) hw_tcl_on = future_hw_tcl_on = 0; @@ -247,16 +254,16 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, */ _mesa_init_driver_functions(&functions); - r700InitChipObject(r600); /* let the eag... */ - r700InitStateFuncs(&functions); r600InitTextureFuncs(&functions); r700InitShaderFuncs(&functions); r700InitIoctlFuncs(&functions); + radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r600->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { + radeon_error("Initializing context failed.\n"); FREE(r600); return GL_FALSE; } @@ -279,8 +286,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; - ctx->Const.MaxTextureLevels = 13; - ctx->Const.MaxTextureRectSize = 4096; + ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ + ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ ctx->Const.MinPointSize = 0x0001 / 8.0; ctx->Const.MinPointSizeAA = 0x0001 / 8.0; @@ -325,30 +332,33 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_allow_pixel_fog(ctx, GL_FALSE); _tnl_allow_vertex_fog(ctx, GL_TRUE); - /* currently bogus data */ - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; - - ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; - ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ - ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS; - ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST; - ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeInstructions = - PFS_MAX_ALU_INST + PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeTexIndirections = - PFS_MAX_TEX_INDIRECT; + /* 256 for reg-based consts, inline consts also supported */ + ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ + ctx->Const.VertexProgram.MaxNativeInstructions = 8192; + ctx->Const.VertexProgram.MaxNativeAttribs = 160; + ctx->Const.VertexProgram.MaxTemps = 128; + ctx->Const.VertexProgram.MaxNativeTemps = 128; + ctx->Const.VertexProgram.MaxNativeParameters = 256; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ + + ctx->Const.FragmentProgram.MaxNativeTemps = 128; + ctx->Const.FragmentProgram.MaxNativeAttribs = 32; + ctx->Const.FragmentProgram.MaxNativeParameters = 256; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; + /* 8 per clause on r6xx, 16 on rv670/r7xx */ + if ((screen->chip_family == CHIP_FAMILY_RV670) || + (screen->chip_family >= CHIP_FAMILY_RV770)) + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16; + else + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8; + ctx->Const.FragmentProgram.MaxNativeInstructions = 8192; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + radeon_init_debug(); + driInitExtensions(ctx, card_extensions, GL_TRUE); if (r600->radeon.radeonScreen->kernel_mm) driInitExtensions(ctx, mm_extensions, GL_FALSE); @@ -367,6 +377,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } + r700InitDraw(ctx); + radeon_fbo_init(&r600->radeon); radeonInitSpanFuncs( ctx ); @@ -377,25 +389,10 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, TNL_CONTEXT(ctx)->Driver.RunPipeline = r600RunPipeline; if (driQueryOptionb(&r600->radeon.optionCache, "no_rast")) { - fprintf(stderr, "disabling 3D acceleration\n"); -#if R200_MERGED - FALLBACK(&r600->radeon, RADEON_FALLBACK_DISABLE, 1); -#endif + radeon_warning("disabling 3D acceleration\n"); } return GL_TRUE; } -/* Clean our own things only, radeonDestroyContext will do every thing else. */ -void -r600DestroyContext (__DRIcontextPrivate * driContextPriv) -{ - GET_CURRENT_CONTEXT (ctx); - context_t *context = ctx ? R700_CONTEXT(ctx) : NULL; - - if (context) - FREE(context->hw.pStateList); -} - - diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index bcb33e1386..a296ea23fa 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -51,32 +51,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r700_chip.h" #include "r600_tex.h" #include "r700_oglprog.h" +#include "r700_vertprog.h" struct r600_context; typedef struct r600_context context_t; -GLboolean r700SendPSState(context_t *context); -GLboolean r700SendVSState(context_t *context); -GLboolean r700SendSQConfig(context_t *context); - #include "main/mm.h" -/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . - I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble - with other compilers ... GLUE! -*/ -#define WARN_ONCE(a, ...) { \ - static int warn##__LINE__=1; \ - if(warn##__LINE__){ \ - fprintf(stderr, "*********************************WARN_ONCE*********************************\n"); \ - fprintf(stderr, "File %s function %s line %d\n", \ - __FILE__, __FUNCTION__, __LINE__); \ - fprintf(stderr, a, ## __VA_ARGS__);\ - fprintf(stderr, "***************************************************************************\n"); \ - warn##__LINE__=0;\ - } \ - } - /************ DMA BUFFERS **************/ /* The blit width for texture uploads @@ -105,35 +86,73 @@ extern int hw_tcl_on; #include "tnl_dd/t_dd_vertex.h" #undef TAG -#define PFS_MAX_ALU_INST 64 -#define PFS_MAX_TEX_INST 64 -#define PFS_MAX_TEX_INDIRECT 4 -#define PFS_NUM_TEMP_REGS 32 -#define PFS_NUM_CONST_REGS 16 - -#define R600_MAX_AOS_ARRAYS 16 - -#define REG_COORDS 0 -#define REG_COLOR0 1 -#define REG_TEX0 2 - #define R600_FALLBACK_NONE 0 #define R600_FALLBACK_TCL 1 #define R600_FALLBACK_RAST 2 -enum -{ - NO_SHIFT = 0, - LEFT_SHIFT = 1, - RIGHT_SHIFT = 2, +struct r600_hw_state { + struct radeon_state_atom sq; + struct radeon_state_atom db; + struct radeon_state_atom stencil; + struct radeon_state_atom db_target; + struct radeon_state_atom sc; + struct radeon_state_atom scissor; + struct radeon_state_atom aa; + struct radeon_state_atom cl; + struct radeon_state_atom gb; + struct radeon_state_atom ucp; + struct radeon_state_atom su; + struct radeon_state_atom poly; + struct radeon_state_atom cb; + struct radeon_state_atom clrcmp; + struct radeon_state_atom blnd; + struct radeon_state_atom blnd_clr; + struct radeon_state_atom cb_target; + struct radeon_state_atom sx; + struct radeon_state_atom vgt; + struct radeon_state_atom spi; + struct radeon_state_atom vpt; + + struct radeon_state_atom fs; + struct radeon_state_atom vs; + struct radeon_state_atom ps; + + struct radeon_state_atom vs_consts; + struct radeon_state_atom ps_consts; + + struct radeon_state_atom vtx; + struct radeon_state_atom tx; + struct radeon_state_atom tx_smplr; + struct radeon_state_atom tx_brdr_clr; }; -typedef struct offset_modifiers +typedef struct StreamDesc { - GLuint shift; - GLuint shiftbits; - GLuint mask; -} offset_modifiers; + GLint size; //number of data element + GLenum type; //data element type + GLsizei stride; + + struct radeon_bo *bo; + GLint bo_offset; + + GLuint dwords; + GLuint dst_loc; + GLuint _signed; + GLboolean normalize; + GLboolean is_named_bo; + GLubyte element; +} StreamDesc; + +typedef struct r700_index_buffer +{ + struct radeon_bo *bo; + int bo_offset; + + GLboolean is_32bit; + GLuint count; + + GLboolean bHostIb; +} r700_index_buffer; /** * \brief R600 context structure. @@ -144,35 +163,52 @@ struct r600_context { /* ------ */ R700_CHIP_CONTEXT hw; + struct r600_hw_state atoms; + + struct r700_vertex_program *selected_vp; + /* Vertex buffers */ GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; + GLint nNumActiveAos; + StreamDesc stream_desc[VERT_ATTRIB_MAX]; + struct r700_index_buffer ind_buf; }; #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) #define GL_CONTEXT(context) ((GLcontext *)(context->radeon.glCtx)) -extern void r600DestroyContext(__DRIcontextPrivate * driContextPriv); extern GLboolean r600CreateContext(const __GLcontextModes * glVisual, __DRIcontextPrivate * driContextPriv, void *sharedContextPrivate); #define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(&context->hw)) -extern GLboolean r700InitChipObject(context_t *context); -extern GLboolean r700SendContextStates(context_t *context); -extern GLboolean r700SendViewportState(context_t *context, int id); -extern GLboolean r700SendRenderTargetState(context_t *context, int id); - -extern int r700SetupStreams(GLcontext * ctx); -extern void r700SetupVTXConstants(GLcontext * ctx, - unsigned int nStreamID, - void * pAos, - unsigned int size, /* number of elements in vector */ - unsigned int stride, - unsigned int Count); /* number of vectors in stream */ +#define R600_NEWPRIM( rmesa ) \ +do { \ + if ( rmesa->radeon.dma.flush ) \ + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ +} while (0) + +#define R600_STATECHANGE(r600, ATOM) \ +do { \ + R600_NEWPRIM(r600); \ + r600->atoms.ATOM.dirty = GL_TRUE; \ + r600->radeon.hw.is_dirty = GL_TRUE; \ +} while(0) + +extern GLboolean r700SyncSurf(context_t *context, + struct radeon_bo *pbo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t sync_type); + +extern void r700SetupStreams(GLcontext * ctx); +extern void r700Start3D(context_t *context); +extern void r600InitAtoms(context_t *context); +extern void r700InitDraw(GLcontext *ctx); #define RADEON_D_CAPTURE 0 #define RADEON_D_PLAYBACK 1 diff --git a/src/mesa/drivers/dri/r600/r600_emit.c b/src/mesa/drivers/dri/r600/r600_emit.c index 685f7fe473..5c250c2418 100644 --- a/src/mesa/drivers/dri/r600/r600_emit.c +++ b/src/mesa/drivers/dri/r600/r600_emit.c @@ -49,72 +49,67 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. void r600EmitCacheFlush(context_t *rmesa) { - BATCH_LOCALS(&rmesa->radeon); } -GLboolean r600EmitShader(GLcontext * ctx, +GLboolean r600EmitShader(GLcontext * ctx, void ** shaderbo, - GLvoid * data, + GLvoid * data, int sizeinDWORD, - char * szShaderUsage) + char * szShaderUsage) { - radeonContextPtr radeonctx = RADEON_CONTEXT(ctx); + radeonContextPtr radeonctx = RADEON_CONTEXT(ctx); + struct radeon_bo * pbo; + uint32_t *out; +shader_again_alloc: + pbo = radeon_bo_open(radeonctx->radeonScreen->bom, + 0, + sizeinDWORD * 4, + 256, + RADEON_GEM_DOMAIN_GTT, + 0); - struct radeon_bo * pbo; - uint32_t *out; + radeon_print(RADEON_SHADER, RADEON_NORMAL, "%s %p size %d: %s\n", __func__, pbo, sizeinDWORD, szShaderUsage); -shader_again_alloc: -#ifdef RADEON_DEBUG_BO - pbo = radeon_bo_open(radeonctx->radeonScreen->bom, - 0, - sizeinDWORD * 4, - 256, - RADEON_GEM_DOMAIN_GTT, - 0, - szShaderUsage); -#else - pbo = radeon_bo_open(radeonctx->radeonScreen->bom, - 0, - sizeinDWORD * 4, - 256, - RADEON_GEM_DOMAIN_GTT, - 0); -#endif /* RADEON_DEBUG_BO */ - - if (!pbo) - { + if (!pbo) { + radeon_print(RADEON_MEMORY | RADEON_CS, RADEON_IMPORTANT, "No memory for buffer object. Flushing command buffer.\n"); rcommonFlushCmdBuf(radeonctx, __FUNCTION__); goto shader_again_alloc; } - if (radeon_cs_space_check_with_bo(radeonctx->cmdbuf.cs, - pbo, - RADEON_GEM_DOMAIN_GTT, 0)) - fprintf(stderr,"failure to revalidate BOs - badness\n"); + radeon_cs_space_add_persistent_bo(radeonctx->cmdbuf.cs, + pbo, + RADEON_GEM_DOMAIN_GTT, 0); + if (radeon_cs_space_check_with_bo(radeonctx->cmdbuf.cs, + pbo, + RADEON_GEM_DOMAIN_GTT, 0)) { + radeon_error("failure to revalidate BOs - badness\n"); + return GL_FALSE; + } radeon_bo_map(pbo, 1); - radeon_bo_ref(pbo); + out = (uint32_t*)(pbo->ptr); - out = (uint32_t*)(pbo->ptr); + memcpy(out, data, sizeinDWORD * 4); - memcpy(out, data, sizeinDWORD * 4); + radeon_bo_unmap(pbo); - radeon_bo_unmap(pbo); + *shaderbo = (void*)pbo; - *shaderbo = (void*)pbo; - - return GL_TRUE; + return GL_TRUE; } -GLboolean r600DeleteShader(GLcontext * ctx, - void * shaderbo) +GLboolean r600DeleteShader(GLcontext * ctx, + void * shaderbo) { struct radeon_bo * pbo = (struct radeon_bo *)shaderbo; + radeon_print(RADEON_SHADER, RADEON_NORMAL, "%s: %p\n", __func__, pbo); + if (pbo) { - radeon_bo_unmap(pbo); + if (pbo->ptr) + radeon_bo_unmap(pbo); radeon_bo_unref(pbo); /* when bo->cref <= 0, bo will be bo_free */ } diff --git a/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h index 9d5aa3c7e4..edd85b0fac 100644 --- a/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h +++ b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h @@ -1366,6 +1366,7 @@ enum { DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3, DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15, DB_DEPTH_INFO__ARRAY_MODE_shift = 15, + ARRAY_1D_TILED_THIN1 = 0x02, ARRAY_2D_TILED_THIN1 = 0x04, TILE_SURFACE_ENABLE_bit = 1 << 25, TILE_COMPACT_bit = 1 << 26, @@ -1449,6 +1450,7 @@ enum { CB_COLOR0_INFO__ARRAY_MODE_shift = 8, ARRAY_LINEAR_GENERAL = 0x00, ARRAY_LINEAR_ALIGNED = 0x01, +/* ARRAY_1D_TILED_THIN1 = 0x02, */ /* ARRAY_2D_TILED_THIN1 = 0x04, */ NUMBER_TYPE_mask = 0x07 << 12, NUMBER_TYPE_shift = 12, diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index 43d9f641af..d105b90cd1 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -62,12 +62,12 @@ static unsigned int translate_wrap_mode(GLenum wrapmode) case GL_CLAMP: return SQ_TEX_CLAMP_HALF_BORDER; case GL_CLAMP_TO_EDGE: return SQ_TEX_CLAMP_LAST_TEXEL; case GL_CLAMP_TO_BORDER: return SQ_TEX_CLAMP_BORDER; - case GL_MIRRORED_REPEAT: return SQ_TEX_MIRROR_ONCE_HALF_BORDER; - case GL_MIRROR_CLAMP_EXT: return SQ_TEX_MIRROR; - case GL_MIRROR_CLAMP_TO_EDGE_EXT: return SQ_TEX_MIRROR_ONCE_BORDER; - case GL_MIRROR_CLAMP_TO_BORDER_EXT: return SQ_TEX_MIRROR_ONCE_LAST_TEXEL; + case GL_MIRRORED_REPEAT: return SQ_TEX_MIRROR; + case GL_MIRROR_CLAMP_EXT: return SQ_TEX_MIRROR_ONCE_HALF_BORDER; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: return SQ_TEX_MIRROR_ONCE_LAST_TEXEL; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: return SQ_TEX_MIRROR_ONCE_BORDER; default: - _mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__); + radeon_error("bad wrap mode in %s", __FUNCTION__); return 0; } } @@ -127,10 +127,18 @@ static void r600SetTexDefaultState(radeonTexObjPtr t) SETfield(t->SQ_TEX_RESOURCE4, SQ_ENDIAN_NONE, SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift, SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask); SETfield(t->SQ_TEX_RESOURCE4, 1, REQUEST_SIZE_shift, REQUEST_SIZE_mask); - t->SQ_TEX_RESOURCE4 |= SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift - |SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift - |SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift - |SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift; + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); /* mip-maps */ t->SQ_TEX_RESOURCE5 = 0; @@ -141,26 +149,27 @@ static void r600SetTexDefaultState(radeonTexObjPtr t) /* Initialize sampler registers */ t->SQ_TEX_SAMPLER0 = 0; - t->SQ_TEX_SAMPLER0 |= - SQ_TEX_WRAP << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift - |SQ_TEX_WRAP << CLAMP_Y_shift - |SQ_TEX_WRAP << CLAMP_Z_shift - |SQ_TEX_XY_FILTER_POINT << XY_MAG_FILTER_shift - |SQ_TEX_XY_FILTER_POINT << XY_MIN_FILTER_shift - |SQ_TEX_Z_FILTER_NONE << Z_FILTER_shift - |SQ_TEX_Z_FILTER_NONE << MIP_FILTER_shift - |SQ_TEX_BORDER_COLOR_TRANS_BLACK << BORDER_COLOR_TYPE_shift; - - t->SQ_TEX_SAMPLER1 = 0x7FF << MAX_LOD_shift; + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift, + SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, CLAMP_Y_shift, CLAMP_Y_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, CLAMP_Z_shift, CLAMP_Z_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT, XY_MAG_FILTER_shift, XY_MAG_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT, XY_MIN_FILTER_shift, XY_MIN_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE, Z_FILTER_shift, Z_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE, MIP_FILTER_shift, MIP_FILTER_mask); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_TRANS_BLACK, BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask); + + t->SQ_TEX_SAMPLER1 = 0; + SETfield(t->SQ_TEX_SAMPLER1, 0x3ff, MAX_LOD_shift, MAX_LOD_mask); t->SQ_TEX_SAMPLER2 = 0; SETbit(t->SQ_TEX_SAMPLER2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit); } +#if 0 static GLuint aniso_filter(GLfloat anisotropy) { -#if 0 if (anisotropy >= 16.0) { return R300_TX_MAX_ANISO_16_TO_1; } else if (anisotropy >= 8.0) { @@ -172,9 +181,9 @@ static GLuint aniso_filter(GLfloat anisotropy) } else { return R300_TX_MAX_ANISO_1_TO_1; } -#endif return 0; } +#endif /** * Set the texture magnification and minification modes. @@ -199,8 +208,7 @@ static void r600SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa | R300_TX_MIN_FILTER_ANISO | R300_TX_MIN_FILTER_MIP_LINEAR | aniso_filter(anisotropy);*/ - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy); + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "Using maximum anisotropy of %f\n", anisotropy); return; } @@ -260,14 +268,12 @@ static void r600SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa static void r600SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4]) { -#if 0 - GLubyte c[4]; - CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); - CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); - CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); - CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); - t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); -#endif + t->TD_PS_SAMPLER0_BORDER_ALPHA = *((uint32_t*)&(color[3])); + t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[2])); + t->TD_PS_SAMPLER0_BORDER_GREEN = *((uint32_t*)&(color[1])); + t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[0])); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_REGISTER, + BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask); } /** @@ -281,10 +287,9 @@ static void r600TexParameter(GLcontext * ctx, GLenum target, { radeonTexObj* t = radeon_tex_obj(texObj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { - fprintf(stderr, "%s( %s )\n", __FUNCTION__, + radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_VERBOSE, + "%s( %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr(pname)); - } switch (pname) { case GL_TEXTURE_MIN_FILTER: @@ -344,11 +349,10 @@ static void r600DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) context_t* rmesa = R700_CONTEXT(ctx); radeonTexObj* t = radeon_tex_obj(texObj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { - fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL, + "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, _mesa_lookup_enum_by_nr(texObj->Target)); - } if (rmesa) { int i; @@ -386,10 +390,9 @@ static struct gl_texture_object *r600NewTextureObject(GLcontext * ctx, radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { - fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL, + "%s( %p (target = %s) )\n", __FUNCTION__, t, _mesa_lookup_enum_by_nr(target)); - } _mesa_initialize_texture_object(&t->base, name, target); t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 2466aa9595..7d7e77d355 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -43,11 +43,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/teximage.h" #include "main/texobj.h" #include "main/enums.h" +#include "main/simple_list.h" #include "r600_context.h" #include "r700_state.h" #include "radeon_mipmap_tree.h" #include "r600_tex.h" +#include "r700_fragprog.h" +#include "r700_vertprog.h" void r600UpdateTextureState(GLcontext * ctx); @@ -59,10 +62,14 @@ void r600UpdateTextureState(GLcontext * ctx) struct radeon_tex_obj *t; GLuint unit; + R600_STATECHANGE(context, tx); + R600_STATECHANGE(context, tx_smplr); + R600_STATECHANGE(context, tx_brdr_clr); + for (unit = 0; unit < R700_MAX_TEXTURE_UNITS; unit++) { texUnit = &ctx->Texture.Unit[unit]; t = radeon_tex_obj(ctx->Texture.Unit[unit]._Current); - + r700->textures[unit] = NULL; if (texUnit->_ReallyEnabled) { if (!t) continue; @@ -75,10 +82,10 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo { radeonTexObj *t = radeon_tex_obj(tObj); - t->SQ_TEX_RESOURCE4 &= ~( SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask - |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask - |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask - |SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask ); + CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); switch (mesa_format) /* This is mesa format. */ { @@ -86,163 +93,211 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_RGBA8888_REV: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_ARGB8888: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_ARGB8888_REV: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_RGB888: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_RGB565: SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_RGB565_REV: SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_ARGB4444: SETfield(t->SQ_TEX_RESOURCE1, FMT_4_4_4_4, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_ARGB4444_REV: SETfield(t->SQ_TEX_RESOURCE1, FMT_4_4_4_4, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_ARGB1555: SETfield(t->SQ_TEX_RESOURCE1, FMT_1_5_5_5, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_ARGB1555_REV: SETfield(t->SQ_TEX_RESOURCE1, FMT_1_5_5_5, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_AL88: case MESA_FORMAT_AL88_REV: /* TODO : Check this. */ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_RGB332: SETfield(t->SQ_TEX_RESOURCE1, FMT_3_3_2, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */ SETfield(t->SQ_TEX_RESOURCE1, FMT_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_L8: /* X, X, X, ONE */ SETfield(t->SQ_TEX_RESOURCE1, FMT_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_I8: /* X, X, X, X */ case MESA_FORMAT_CI8: SETfield(t->SQ_TEX_RESOURCE1, FMT_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; /* YUV422 TODO conversion */ /* X, Y, Z, ONE, G8R8_G8B8 */ /* @@ -272,121 +327,157 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_32_32_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_RGBA_FLOAT16: SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_16_16_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_RGB_FLOAT32: /* X, Y, Z, ONE */ SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_32_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_RGB_FLOAT16: SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_16_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */ SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */ SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */ SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */ SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_Z16: case MESA_FORMAT_Z24_S8: @@ -407,30 +498,82 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo }; switch (tObj->DepthMode) { case GL_LUMINANCE: /* X, X, X, ONE */ - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case GL_INTENSITY: /* X, X, X, X */ - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case GL_ALPHA: /* ZERO, ZERO, ZERO, X */ - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_0 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; default: return GL_FALSE; } break; + /* EXT_texture_sRGB */ + case MESA_FORMAT_SRGBA8: + SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SLA8: + SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SL8: /* X, X, X, ONE */ + SETfield(t->SQ_TEX_RESOURCE1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; default: /* Not supported format */ return GL_FALSE; @@ -441,7 +584,6 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo void r600SetDepthTexMode(struct gl_texture_object *tObj) { - const GLuint *format; radeonTexObjPtr t; if (!tObj) @@ -464,21 +606,23 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex radeonTexObj *t = radeon_tex_obj(texObj); const struct gl_texture_image *firstImage; int firstlevel = t->mt ? t->mt->firstLevel : 0; - GLuint uTexelPitch; + GLuint uTexelPitch, row_align; + + if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled && + t->image_override && + t->bo) + return; firstImage = t->base.Image[0][firstlevel]; if (!t->image_override) { if (!r600GetTexFormat(texObj, firstImage->TexFormat->MesaFormat)) { - _mesa_problem(NULL, "unexpected texture format in %s", + radeon_error("unexpected texture format in %s\n", __FUNCTION__); return; } } - if (t->image_override && t->bo) - return; - switch (texObj->Target) { case GL_TEXTURE_1D: SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_1D, DIM_shift, DIM_mask); @@ -499,19 +643,30 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex SETfield(t->SQ_TEX_RESOURCE1, 0, TEX_DEPTH_shift, TEX_DEPTH_mask); break; default: - _mesa_problem(NULL, "unexpected texture target type in %s", __FUNCTION__); + radeon_error("unexpected texture target type in %s\n", __FUNCTION__); return; } - uTexelPitch = (firstImage->Width + R700_TEXEL_PITCH_ALIGNMENT_MASK) + row_align = rmesa->radeon.texture_row_align - 1; + uTexelPitch = ((firstImage->Width * t->mt->bpp + row_align) & ~row_align) / t->mt->bpp; + uTexelPitch = (uTexelPitch + R700_TEXEL_PITCH_ALIGNMENT_MASK) & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; + /* min pitch is 8 */ + if (uTexelPitch < 8) + uTexelPitch = 8; + SETfield(t->SQ_TEX_RESOURCE0, (uTexelPitch/8)-1, PITCH_shift, PITCH_mask); SETfield(t->SQ_TEX_RESOURCE0, firstImage->Width - 1, TEX_WIDTH_shift, TEX_WIDTH_mask); SETfield(t->SQ_TEX_RESOURCE1, firstImage->Height - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask); + if ((t->mt->lastLevel - t->mt->firstLevel) > 0) { + t->SQ_TEX_RESOURCE3 = t->mt->levels[0].size / 256; + SETfield(t->SQ_TEX_RESOURCE4, t->mt->firstLevel, BASE_LEVEL_shift, BASE_LEVEL_mask); + SETfield(t->SQ_TEX_RESOURCE5, t->mt->lastLevel, LAST_LEVEL_shift, LAST_LEVEL_mask); + } } /** @@ -542,6 +697,7 @@ GLboolean r600ValidateBuffers(GLcontext * ctx) { context_t *rmesa = R700_CONTEXT(ctx); struct radeon_renderbuffer *rrb; + struct radeon_bo *pbo; int i; int ret; @@ -570,9 +726,7 @@ GLboolean r600ValidateBuffers(GLcontext * ctx) continue; if (!r600_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) { - _mesa_warning(ctx, - "failed to validate texture for unit %d.\n", - i); + radeon_warning("failed to validate texture for unit %d.\n", i); } t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); if (t->image_override && t->bo) @@ -585,7 +739,19 @@ GLboolean r600ValidateBuffers(GLcontext * ctx) RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } - ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(ctx); + if (pbo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo, + RADEON_GEM_DOMAIN_GTT, 0); + } + + pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(ctx); + if (pbo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo, + RADEON_GEM_DOMAIN_GTT, 0); + } + + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) return GL_FALSE; return GL_TRUE; @@ -598,7 +764,7 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->radeon.glCtx, texname); radeonTexObjPtr t = radeon_tex_obj(tObj); - uint32_t pitch_val; + uint32_t pitch_val, size; if (!tObj) return; @@ -608,7 +774,12 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, if (!offset) return; - t->bo = NULL; + size = pitch;//h * w * (depth / 8); + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + t->bo = radeon_legacy_bo_alloc_fake(rmesa->radeon.radeonScreen->bom, size, offset); t->override_offset = offset; pitch_val = pitch; switch (depth) { @@ -616,11 +787,14 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); pitch_val /= 4; break; case 24: @@ -628,28 +802,39 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); pitch_val /= 4; break; case 16: SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); pitch_val /= 2; break; } pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK) & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; + + /* min pitch is 8 */ + if (pitch_val < 8) + pitch_val = 8; + SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1, PITCH_shift, PITCH_mask); } @@ -739,20 +924,26 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); } else { SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); } pitch_val /= 4; break; @@ -762,22 +953,28 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_W << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); pitch_val /= 4; break; case 2: SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - t->SQ_TEX_RESOURCE4 |= - (SQ_SEL_Z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) - |(SQ_SEL_Y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) - |(SQ_SEL_X << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) - |(SQ_SEL_1 << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); pitch_val /= 2; break; } @@ -785,6 +982,10 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK) & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; + /* min pitch is 8 */ + if (pitch_val < 8) + pitch_val = 8; + SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1, PITCH_shift, PITCH_mask); SETfield(t->SQ_TEX_RESOURCE0, rb->base.Width - 1, TEX_WIDTH_shift, TEX_WIDTH_mask); diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 1d41c5cf78..81269350e4 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -33,8 +33,8 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "radeon_debug.h" #include "r600_context.h" -#include "r700_debug.h" #include "r700_assembler.h" @@ -366,8 +366,8 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_COS: return 1; - default: r700_error(TODO_ASM_NEEDIMPINST, - "Need instruction operand number. \n");; + default: radeon_error( + "Need instruction operand number for %x.\n", pAsm->D.dst.opcode); }; return 3; @@ -531,7 +531,7 @@ int check_current_clause(r700_AssemblerBase* pAsm, case CF_EMPTY_CLAUSE: break; default: - r700_error(ERROR_ASM_VTX_CLAUSE, + radeon_error( "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type); return GL_FALSE; } @@ -565,7 +565,7 @@ int check_current_clause(r700_AssemblerBase* pAsm, } else { - r700_error(ERROR_ASM_ALLOCEXPORTCF, + radeon_error( "Error allocating new EXPORT CF instruction in check_current_clause. \n"); return GL_FALSE; } @@ -578,7 +578,7 @@ int check_current_clause(r700_AssemblerBase* pAsm, pAsm->cf_current_clause_type = CF_OTHER_CLAUSE; break; default: - r700_error(ERROR_ASM_UNKOWNCLAUSE, + radeon_error( "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type); return GL_FALSE; } @@ -611,7 +611,7 @@ GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, } else { - r700_error(ERROR_ASM_ALLOCVTXCF, "Could not allocate a new VFetch CF instruction."); + radeon_error("Could not allocate a new VFetch CF instruction.\n"); return GL_FALSE; } @@ -661,7 +661,7 @@ GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, } else { - r700_error(ERROR_ASM_ALLOCTEXCF, "Could not allocate a new TEX CF instruction."); + radeon_error("Could not allocate a new TEX CF instruction.\n"); return GL_FALSE; } @@ -786,6 +786,133 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod) +{ + GLuint client_size_inbyte; + GLuint data_format; + GLuint mega_fetch_count; + GLuint is_mega_fetch_flag; + + R700VertexGenericFetch* vfetch_instruction_ptr; + R700VertexGenericFetch* assembled_vfetch_instruction_ptr + = pAsm->vfetch_instruction_ptr_array[element]; + + if (assembled_vfetch_instruction_ptr == NULL) + { + vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch); + if (vfetch_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700VertexGenericFetch(vfetch_instruction_ptr); + } + else + { + vfetch_instruction_ptr = assembled_vfetch_instruction_ptr; + } + + data_format = GetSurfaceFormat(type, size, &client_size_inbyte); + + if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here + { + //TODO : mini fetch + } + else + { + mega_fetch_count = MEGA_FETCH_BYTES - 1; + is_mega_fetch_flag = 0x1; + pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte; + } + + vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH; + vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA; + vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + vfetch_instruction_ptr->m_Word0.f.buffer_id = element; + vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0; + vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; + vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; + + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + + vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; + vfetch_instruction_ptr->m_Word1.f.data_format = data_format; + vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE; + + if(1 == _signed) + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED; + } + else + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED; + } + + if(GL_TRUE == normalize) + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM; + } + else + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT; + } + + // Destination register + vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; + vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE; + + vfetch_instruction_ptr->m_Word2.f.offset = 0; + vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0; + + vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag; + + if (assembled_vfetch_instruction_ptr == NULL) + { + if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) + { + return GL_FALSE; + } + + if (pAsm->vfetch_instruction_ptr_array[element] != NULL) + { + return GL_FALSE; + } + else + { + pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr; + } + } + + return GL_TRUE; +} + +GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm) +{ + GLint i; + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + pAsm->cf_current_vtx_clause_ptr = NULL; + + for (i=0; i<VERT_ATTRIB_MAX; i++) + { + pAsm->vfetch_instruction_ptr_array[ i ] = NULL; + } + + cleanup_vfetch_shaderinst(pAsm->pR700Shader); + + return GL_TRUE; +} + GLuint gethelpr(r700_AssemblerBase* pAsm) { GLuint r = pAsm->uHelpReg; @@ -1047,7 +1174,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, } break; default: - r700_error(ERROR_ASM_SRCARGUMENT, "Invalid source argument type"); + radeon_error("Invalid source argument type\n"); return GL_FALSE; } } @@ -1094,7 +1221,7 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm) } break; default: - r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type"); + radeon_error("Invalid destination output argument type\n"); return GL_FALSE; } @@ -1134,7 +1261,7 @@ GLboolean tex_dst(r700_AssemblerBase *pAsm) } else { - r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type"); + radeon_error("Invalid destination output argument type\n"); return GL_FALSE; } @@ -1149,46 +1276,54 @@ GLboolean tex_dst(r700_AssemblerBase *pAsm) GLboolean tex_src(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - + GLboolean bValidTexCoord = GL_FALSE; - switch (pILInst->SrcReg[0].File) - { + switch (pILInst->SrcReg[0].File) { + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + bValidTexCoord = GL_TRUE; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->aArgSubst[1]; + break; case PROGRAM_TEMPORARY: - bValidTexCoord = GL_TRUE; - - pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + pAsm->starting_temp_register_number; - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - - break; + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + break; case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) - { - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; - - pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; - } - break; + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + break; } if(GL_TRUE == bValidTexCoord) - { + { setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); } else { - r700_error(ERROR_ASM_BADTEXSRC, "Invalid source texcoord for TEX instruction"); + radeon_error("Invalid source texcoord for TEX instruction\n"); return GL_FALSE; } @@ -1201,11 +1336,11 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1; pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1; pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1; - + return GL_TRUE; } -GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm) +GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized) { PVSSRC * texture_coordinate_source; PVSSRC * texture_unit_source; @@ -1227,10 +1362,18 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm) tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; - tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED; - tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED; - tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED; - tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED; + if (normalized) { + tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED; + } else { + /* XXX: UNNORMALIZED tex coords have limited wrap modes */ + tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED; + } tex_instruction_ptr->m_Word2.f.offset_x = 0x0; tex_instruction_ptr->m_Word2.f.offset_y = 0x0; @@ -1261,7 +1404,7 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm) } else { - r700_error(ERROR_ASM_TEXDSTBADTYPE, "Only temp destination registers supported for TEX dest regs."); + radeon_error("Only temp destination registers supported for TEX dest regs.\n"); return GL_FALSE; } @@ -1354,7 +1497,7 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, } else { - r700_error(ERROR_ASM_ALUSRCBADTYPE, "Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.", + radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n", source_index, pSource->rtype); return GL_FALSE; } @@ -1389,7 +1532,7 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, src_chan = SQ_CHAN_X; break; default: - r700_error(ERROR_ASM_ALUSRCSELECT, "Unknown source select value (%d) in assemble_alu_src()."); + radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle); return GL_FALSE; break; } @@ -1424,7 +1567,7 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg; break; default: - r700_error(ERROR_ASM_ALUSRCNUMBER, "Only three sources allowed in ALU opcodes."); + radeon_error("Only three sources allowed in ALU opcodes.\n"); return GL_FALSE; break; } @@ -1459,7 +1602,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, } else { - r700_error(ERROR_ASM_ALLOCALUCF, "Could not allocate a new ALU CF instruction."); + radeon_error("Could not allocate a new ALU CF instruction.\n"); return GL_FALSE; } @@ -1627,7 +1770,7 @@ GLboolean reserve_cfile(r700_AssemblerBase* pAsm, } else { - r700_error(ERROR_ASM_CONSTCHANNEL, "All cfile read ports are used, cannot reference C$sel, channel $chan."); + radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n"); return GL_FALSE; } return GL_TRUE; @@ -1641,7 +1784,7 @@ GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint } else if(pAsm->hw_gpr[cycle][chan] != (int)sel) { - r700_error(ERROR_ASM_BADGPRRESERVE, "Another scalar operation has already used GPR read port for given channel"); + radeon_error("Another scalar operation has already used GPR read port for given channel\n"); return GL_FALSE; } @@ -1681,7 +1824,7 @@ GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* p } break; default: - r700_error(ERROR_ASM_BADSCALARBZ, "Bad Scalar bank swizzle value"); + radeon_error("Bad Scalar bank swizzle value\n"); break; } @@ -1729,7 +1872,7 @@ GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* p } break; default: - r700_error(ERROR_ASM_BADVECTORBZ, "Bad Vec bank swizzle value"); + radeon_error("Bad Vec bank swizzle value\n"); return GL_FALSE; break; } @@ -2008,7 +2151,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) return GL_FALSE; } - if (pAsm->D.dst.math == 0) + if (uNumSrc > 1) { // Process source 1 current_source_index = 1; @@ -2048,7 +2191,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } else { - r700_error(ERROR_ASM_ALUDSTBADTYPE, "Only temp destination registers supported for ALU dest regs."); + radeon_error("Only temp destination registers supported for ALU dest regs.\n"); return GL_FALSE; } @@ -2124,7 +2267,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; break; default: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = SQ_SEL_MASK; + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK; break; } alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; @@ -2153,7 +2296,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; break; default: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = SQ_SEL_MASK; + alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK; break; } alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; @@ -2194,19 +2337,29 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - if( GL_TRUE == IsTex(pILInst->Opcode) ) - { - if( GL_FALSE == assemble_tex_instruction(pAsm) ) - { - r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction"); - return GL_FALSE; - } + if( GL_TRUE == IsTex(pILInst->Opcode) && + /* handle const moves to temp register */ + !(pAsm->D.dst.opcode == SQ_OP2_INST_MOV) ) + { + if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { + if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) + { + radeon_error("Error assembling TEX instruction\n"); + return GL_FALSE; + } + } else { + if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) ) + { + radeon_error("Error assembling TEX instruction\n"); + return GL_FALSE; + } + } } else { //ALU if( GL_FALSE == assemble_alu_instruction(pAsm) ) { - r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling ALU instruction"); + radeon_error("Error assembling ALU instruction\n"); return GL_FALSE; } } @@ -2351,7 +2504,7 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm) GLboolean assemble_BAD(char *opcode_str) { - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction (%s)", opcode_str); + radeon_error("Not yet implemented instruction (%s)\n", opcode_str); return GL_FALSE; } @@ -2854,6 +3007,11 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + /* dst.y = max(src.x, 0.0) */ pAsm->D.dst.opcode = SQ_OP2_INST_MAX; pAsm->D.dst.rtype = dstType; @@ -2865,11 +3023,6 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_X; - pAsm->S[0].src.swizzley = SQ_SEL_X; - pAsm->S[0].src.swizzlez = SQ_SEL_X; - pAsm->S[0].src.swizzlew = SQ_SEL_X; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = tmp; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); @@ -2883,34 +3036,47 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* before: dst.w = log(src.y) - * after : dst.x = log(src.y) - * why change dest register is that dst.w has been initialized as 1 before - */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y); + + /* dst.z = log(src.y) */ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; pAsm->D.dst.reg = dstReg; - pAsm->D.dst.writex = 1; + pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; - pAsm->D.dst.writez = 0; + pAsm->D.dst.writez = 1; pAsm->D.dst.writew = 0; pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_Y; - pAsm->S[0].src.swizzley = SQ_SEL_Y; - pAsm->S[0].src.swizzlez = SQ_SEL_Y; - pAsm->S[0].src.swizzlew = SQ_SEL_Y; if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } - /* before: tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */ - /* after : tmp.x = amd MUL_LIT(src.w, dst.x, src.x ) */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 2) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + + swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); + + /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; + pAsm->D.dst.math = 1; pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; @@ -2922,29 +3088,19 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_W; - pAsm->S[0].src.swizzley = SQ_SEL_W; - pAsm->S[0].src.swizzlez = SQ_SEL_W; - pAsm->S[0].src.swizzlew = SQ_SEL_W; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = dstReg; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); noneg_PVSSRC(&(pAsm->S[1].src)); - pAsm->S[1].src.swizzlex = SQ_SEL_X; - pAsm->S[1].src.swizzley = SQ_SEL_X; - pAsm->S[1].src.swizzlez = SQ_SEL_X; - pAsm->S[1].src.swizzlew = SQ_SEL_X; + pAsm->S[1].src.swizzlex = SQ_SEL_Z; + pAsm->S[1].src.swizzley = SQ_SEL_Z; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_Z; pAsm->S[2].src.rtype = srcType; pAsm->S[2].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[2].src)); - pAsm->S[2].src.swizzlex = SQ_SEL_X; - pAsm->S[2].src.swizzley = SQ_SEL_X; - pAsm->S[2].src.swizzlez = SQ_SEL_X; - pAsm->S[2].src.swizzlew = SQ_SEL_X; if( GL_FALSE == next_ins(pAsm) ) { @@ -3358,32 +3514,35 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: src_const = GL_TRUE; + break; case PROGRAM_TEMPORARY: case PROGRAM_INPUT: + default: src_const = GL_FALSE; + break; } - if (GL_TRUE == src_const) + if (GL_TRUE == src_const) { - r700_error(TODO_ASM_CONSTTEXADDR, "TODO: Texture coordinates from a constant register not supported."); - return GL_FALSE; + if ( GL_FALSE == mov_temp(pAsm, 0) ) + return GL_FALSE; } - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) + switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) { case OPCODE_TEX: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; - case OPCODE_TXB: - r700_error(TODO_ASM_TXB, "do not support TXB yet"); + case OPCODE_TXB: + radeon_error("do not support TXB yet\n"); return GL_FALSE; break; - case OPCODE_TXP: - /* TODO : tex proj version : divid first 3 components by 4th */ + case OPCODE_TXP: + /* TODO : tex proj version : divid first 3 components by 4th */ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; default: - r700_error(ERROR_ASM_BADTEXINST, "Internal error: bad texture op (not TEX)"); + radeon_error("Internal error: bad texture op (not TEX)\n"); return GL_FALSE; break; } @@ -3402,13 +3561,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { return GL_FALSE; } - + if( GL_FALSE == tex_src(pAsm) ) { return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } @@ -3565,12 +3724,12 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, break; case OPCODE_ARL: - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARL "); + radeon_error("Not yet implemented instruction OPCODE_ARL \n"); //if ( GL_FALSE == assemble_BAD("ARL") ) return GL_FALSE; break; case OPCODE_ARR: - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARR "); + radeon_error("Not yet implemented instruction OPCODE_ARR \n"); //if ( GL_FALSE == assemble_BAD("ARR") ) return GL_FALSE; break; @@ -3601,7 +3760,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_EXP: - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_EXP "); + radeon_error("Not yet implemented instruction OPCODE_EXP \n"); //if ( GL_FALSE == assemble_BAD("EXP") ) return GL_FALSE; break; // approx of EX2 @@ -3637,7 +3796,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_LOG: - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_LOG "); + radeon_error("Not yet implemented instruction OPCODE_LOG \n"); //if ( GL_FALSE == assemble_BAD("LOG") ) return GL_FALSE; break; // approx of LG2 @@ -3736,7 +3895,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_ELSE : - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ELSE "); + radeon_error("Not yet implemented instruction OPCODE_ELSE \n"); //if ( GL_FALSE == assemble_BAD("ELSE") ) return GL_FALSE; break; @@ -3758,7 +3917,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_TRUE; default: - r700_error(ERROR_ASM_UNKNOWNILINST, "internal: unknown instruction"); + radeon_error("internal: unknown instruction\n"); return GL_FALSE; } } @@ -3802,7 +3961,7 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, break; default: - r700_error(ERROR_ASM_BADEXPORTTYPE, "Unknown export type: %d", type); + radeon_error("Unknown export type: %d\n", type); return GL_FALSE; break; } @@ -3823,6 +3982,9 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, if (export_count == 1) { ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number]; + /* exports Z as a float into Red channel */ + if (GL_TRUE == is_depth_export) + ucWriteMask = 0x1; if( (ucWriteMask & 0x1) != 0) { @@ -4014,6 +4176,22 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, export_starting_index++; } + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + for(i=0; i<8; i++) { unBit = 1 << (VERT_RESULT_TEX0 + i); diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index e9b21b802e..4e6e20011a 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -411,6 +411,14 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, GLuint number_of_elements, GLenum dataElementType, VTX_FETCH_METHOD* pFetchMethod); +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod); GLuint gethelpr(r700_AssemblerBase* pAsm); void resethelpr(r700_AssemblerBase* pAsm); void checkop_init(r700_AssemblerBase* pAsm); @@ -424,7 +432,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, GLboolean assemble_dst(r700_AssemblerBase *pAsm); GLboolean tex_dst(r700_AssemblerBase *pAsm); GLboolean tex_src(r700_AssemblerBase *pAsm); -GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm); +GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized); void initialize(r700_AssemblerBase *pAsm); GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, int source_index, diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 635dd58e72..783427a94c 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -27,6 +27,7 @@ #include "main/imports.h" #include "main/glheader.h" +#include "main/simple_list.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -38,256 +39,119 @@ #include "r700_vertprog.h" #include "r700_ioctl.h" -#define LINK_STATES(reg) \ -do \ -{ \ - pStateListWork->puiValue = (unsigned int*)&(r700->reg); \ - pStateListWork->unOffset = mm##reg - ASIC_CONTEXT_BASE_INDEX; \ - pStateListWork->pNext = pStateListWork + 1; \ - pStateListWork++; \ -}while(0) - -GLboolean r700InitChipObject(context_t *context) -{ - ContextState * pStateListWork; - - R700_CHIP_CONTEXT *r700 = &context->hw; - - /* init state list */ - r700->pStateList = (ContextState*) MALLOC (sizeof(ContextState)*sizeof(R700_CHIP_CONTEXT)/sizeof(unsigned int)); - pStateListWork = r700->pStateList; - - // misc - LINK_STATES(TA_CNTL_AUX); - LINK_STATES(VC_ENHANCE); - LINK_STATES(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ); - LINK_STATES(DB_DEBUG); - LINK_STATES(DB_WATERMARKS); - - // SC - LINK_STATES(PA_SC_SCREEN_SCISSOR_TL); - LINK_STATES(PA_SC_SCREEN_SCISSOR_BR); - LINK_STATES(PA_SC_WINDOW_OFFSET); - LINK_STATES(PA_SC_WINDOW_SCISSOR_TL); - LINK_STATES(PA_SC_WINDOW_SCISSOR_BR); - LINK_STATES(PA_SC_CLIPRECT_RULE); - LINK_STATES(PA_SC_CLIPRECT_0_TL); - LINK_STATES(PA_SC_CLIPRECT_0_BR); - LINK_STATES(PA_SC_CLIPRECT_1_TL); - LINK_STATES(PA_SC_CLIPRECT_1_BR); - LINK_STATES(PA_SC_CLIPRECT_2_TL); - LINK_STATES(PA_SC_CLIPRECT_2_BR); - LINK_STATES(PA_SC_CLIPRECT_3_TL); - LINK_STATES(PA_SC_CLIPRECT_3_BR); - LINK_STATES(PA_SC_EDGERULE); - LINK_STATES(PA_SC_GENERIC_SCISSOR_TL); - LINK_STATES(PA_SC_GENERIC_SCISSOR_BR); - LINK_STATES(PA_SC_LINE_STIPPLE); - LINK_STATES(PA_SC_MPASS_PS_CNTL); - LINK_STATES(PA_SC_MODE_CNTL); - LINK_STATES(PA_SC_LINE_CNTL); - LINK_STATES(PA_SC_AA_CONFIG); - LINK_STATES(PA_SC_AA_SAMPLE_LOCS_MCTX); - LINK_STATES(PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX); - LINK_STATES(PA_SC_AA_MASK); - - // SU - LINK_STATES(PA_SU_POINT_SIZE); - LINK_STATES(PA_SU_POINT_MINMAX); - LINK_STATES(PA_SU_LINE_CNTL); - LINK_STATES(PA_SU_SC_MODE_CNTL); - LINK_STATES(PA_SU_VTX_CNTL); - LINK_STATES(PA_SU_POLY_OFFSET_DB_FMT_CNTL); - LINK_STATES(PA_SU_POLY_OFFSET_CLAMP); - LINK_STATES(PA_SU_POLY_OFFSET_FRONT_SCALE); - LINK_STATES(PA_SU_POLY_OFFSET_FRONT_OFFSET); - LINK_STATES(PA_SU_POLY_OFFSET_BACK_SCALE); - LINK_STATES(PA_SU_POLY_OFFSET_BACK_OFFSET); - - // CL - LINK_STATES(PA_CL_CLIP_CNTL); - LINK_STATES(PA_CL_VTE_CNTL); - LINK_STATES(PA_CL_VS_OUT_CNTL); - LINK_STATES(PA_CL_NANINF_CNTL); - LINK_STATES(PA_CL_GB_VERT_CLIP_ADJ); - LINK_STATES(PA_CL_GB_VERT_DISC_ADJ); - LINK_STATES(PA_CL_GB_HORZ_CLIP_ADJ); - LINK_STATES(PA_CL_GB_HORZ_DISC_ADJ); - - // CB - LINK_STATES(CB_CLEAR_RED_R6XX); - LINK_STATES(CB_CLEAR_GREEN_R6XX); - LINK_STATES(CB_CLEAR_BLUE_R6XX); - LINK_STATES(CB_CLEAR_ALPHA_R6XX); - LINK_STATES(CB_TARGET_MASK); - LINK_STATES(CB_SHADER_MASK); - LINK_STATES(CB_BLEND_RED); - LINK_STATES(CB_BLEND_GREEN); - LINK_STATES(CB_BLEND_BLUE); - LINK_STATES(CB_BLEND_ALPHA); - LINK_STATES(CB_FOG_RED_R6XX); - LINK_STATES(CB_FOG_GREEN_R6XX); - LINK_STATES(CB_FOG_BLUE_R6XX); - LINK_STATES(CB_SHADER_CONTROL); - LINK_STATES(CB_COLOR_CONTROL); - LINK_STATES(CB_CLRCMP_CONTROL); - LINK_STATES(CB_CLRCMP_SRC); - LINK_STATES(CB_CLRCMP_DST); - LINK_STATES(CB_CLRCMP_MSK); - LINK_STATES(CB_BLEND_CONTROL); - - // SX - LINK_STATES(SX_MISC); - LINK_STATES(SX_ALPHA_TEST_CONTROL); - LINK_STATES(SX_ALPHA_REF); - - // VGT - LINK_STATES(VGT_MAX_VTX_INDX); - LINK_STATES(VGT_MIN_VTX_INDX); - LINK_STATES(VGT_INDX_OFFSET); - LINK_STATES(VGT_MULTI_PRIM_IB_RESET_INDX); - LINK_STATES(VGT_OUTPUT_PATH_CNTL); - LINK_STATES(VGT_HOS_CNTL); - LINK_STATES(VGT_HOS_MAX_TESS_LEVEL); - LINK_STATES(VGT_HOS_MIN_TESS_LEVEL); - LINK_STATES(VGT_HOS_REUSE_DEPTH); - LINK_STATES(VGT_GROUP_PRIM_TYPE); - LINK_STATES(VGT_GROUP_FIRST_DECR); - LINK_STATES(VGT_GROUP_DECR); - LINK_STATES(VGT_GROUP_VECT_0_CNTL); - LINK_STATES(VGT_GROUP_VECT_1_CNTL); - LINK_STATES(VGT_GROUP_VECT_0_FMT_CNTL); - LINK_STATES(VGT_GROUP_VECT_1_FMT_CNTL); - LINK_STATES(VGT_GS_MODE); - LINK_STATES(VGT_PRIMITIVEID_EN); - LINK_STATES(VGT_DMA_NUM_INSTANCES); - LINK_STATES(VGT_MULTI_PRIM_IB_RESET_EN); - LINK_STATES(VGT_INSTANCE_STEP_RATE_0); - LINK_STATES(VGT_INSTANCE_STEP_RATE_1); - LINK_STATES(VGT_STRMOUT_EN); - LINK_STATES(VGT_REUSE_OFF); - LINK_STATES(VGT_VTX_CNT_EN); - LINK_STATES(VGT_STRMOUT_BUFFER_EN); - - LINK_STATES(SQ_VTX_SEMANTIC_0); - LINK_STATES(SQ_VTX_SEMANTIC_1); - LINK_STATES(SQ_VTX_SEMANTIC_2); - LINK_STATES(SQ_VTX_SEMANTIC_3); - LINK_STATES(SQ_VTX_SEMANTIC_4); - LINK_STATES(SQ_VTX_SEMANTIC_5); - LINK_STATES(SQ_VTX_SEMANTIC_6); - LINK_STATES(SQ_VTX_SEMANTIC_7); - LINK_STATES(SQ_VTX_SEMANTIC_8); - LINK_STATES(SQ_VTX_SEMANTIC_9); - LINK_STATES(SQ_VTX_SEMANTIC_10); - LINK_STATES(SQ_VTX_SEMANTIC_11); - LINK_STATES(SQ_VTX_SEMANTIC_12); - LINK_STATES(SQ_VTX_SEMANTIC_13); - LINK_STATES(SQ_VTX_SEMANTIC_14); - LINK_STATES(SQ_VTX_SEMANTIC_15); - LINK_STATES(SQ_VTX_SEMANTIC_16); - LINK_STATES(SQ_VTX_SEMANTIC_17); - LINK_STATES(SQ_VTX_SEMANTIC_18); - LINK_STATES(SQ_VTX_SEMANTIC_19); - LINK_STATES(SQ_VTX_SEMANTIC_20); - LINK_STATES(SQ_VTX_SEMANTIC_21); - LINK_STATES(SQ_VTX_SEMANTIC_22); - LINK_STATES(SQ_VTX_SEMANTIC_23); - LINK_STATES(SQ_VTX_SEMANTIC_24); - LINK_STATES(SQ_VTX_SEMANTIC_25); - LINK_STATES(SQ_VTX_SEMANTIC_26); - LINK_STATES(SQ_VTX_SEMANTIC_27); - LINK_STATES(SQ_VTX_SEMANTIC_28); - LINK_STATES(SQ_VTX_SEMANTIC_29); - LINK_STATES(SQ_VTX_SEMANTIC_30); - LINK_STATES(SQ_VTX_SEMANTIC_31); - - // SPI - LINK_STATES(SPI_VS_OUT_ID_0); - LINK_STATES(SPI_VS_OUT_ID_1); - LINK_STATES(SPI_VS_OUT_ID_2); - LINK_STATES(SPI_VS_OUT_ID_3); - LINK_STATES(SPI_VS_OUT_ID_4); - LINK_STATES(SPI_VS_OUT_ID_5); - LINK_STATES(SPI_VS_OUT_ID_6); - LINK_STATES(SPI_VS_OUT_ID_7); - LINK_STATES(SPI_VS_OUT_ID_8); - LINK_STATES(SPI_VS_OUT_ID_9); - - LINK_STATES(SPI_PS_INPUT_CNTL_0); - LINK_STATES(SPI_PS_INPUT_CNTL_1); - LINK_STATES(SPI_PS_INPUT_CNTL_2); - LINK_STATES(SPI_PS_INPUT_CNTL_3); - LINK_STATES(SPI_PS_INPUT_CNTL_4); - LINK_STATES(SPI_PS_INPUT_CNTL_5); - LINK_STATES(SPI_PS_INPUT_CNTL_6); - LINK_STATES(SPI_PS_INPUT_CNTL_7); - LINK_STATES(SPI_PS_INPUT_CNTL_8); - LINK_STATES(SPI_PS_INPUT_CNTL_9); - LINK_STATES(SPI_PS_INPUT_CNTL_10); - LINK_STATES(SPI_PS_INPUT_CNTL_11); - LINK_STATES(SPI_PS_INPUT_CNTL_12); - LINK_STATES(SPI_PS_INPUT_CNTL_13); - LINK_STATES(SPI_PS_INPUT_CNTL_14); - LINK_STATES(SPI_PS_INPUT_CNTL_15); - LINK_STATES(SPI_PS_INPUT_CNTL_16); - LINK_STATES(SPI_PS_INPUT_CNTL_17); - LINK_STATES(SPI_PS_INPUT_CNTL_18); - LINK_STATES(SPI_PS_INPUT_CNTL_19); - LINK_STATES(SPI_PS_INPUT_CNTL_20); - LINK_STATES(SPI_PS_INPUT_CNTL_21); - LINK_STATES(SPI_PS_INPUT_CNTL_22); - LINK_STATES(SPI_PS_INPUT_CNTL_23); - LINK_STATES(SPI_PS_INPUT_CNTL_24); - LINK_STATES(SPI_PS_INPUT_CNTL_25); - LINK_STATES(SPI_PS_INPUT_CNTL_26); - LINK_STATES(SPI_PS_INPUT_CNTL_27); - LINK_STATES(SPI_PS_INPUT_CNTL_28); - LINK_STATES(SPI_PS_INPUT_CNTL_29); - LINK_STATES(SPI_PS_INPUT_CNTL_30); - LINK_STATES(SPI_PS_INPUT_CNTL_31); - - LINK_STATES(SPI_VS_OUT_CONFIG); - LINK_STATES(SPI_THREAD_GROUPING); - LINK_STATES(SPI_PS_IN_CONTROL_0); - LINK_STATES(SPI_PS_IN_CONTROL_1); - LINK_STATES(SPI_INTERP_CONTROL_0); - LINK_STATES(SPI_INPUT_Z); - LINK_STATES(SPI_FOG_CNTL); - LINK_STATES(SPI_FOG_FUNC_SCALE); - LINK_STATES(SPI_FOG_FUNC_BIAS); - - // SQ - LINK_STATES(SQ_ESGS_RING_ITEMSIZE); - LINK_STATES(SQ_GSVS_RING_ITEMSIZE); - LINK_STATES(SQ_ESTMP_RING_ITEMSIZE); - LINK_STATES(SQ_GSTMP_RING_ITEMSIZE); - LINK_STATES(SQ_VSTMP_RING_ITEMSIZE); - LINK_STATES(SQ_PSTMP_RING_ITEMSIZE); - LINK_STATES(SQ_FBUF_RING_ITEMSIZE); - LINK_STATES(SQ_REDUC_RING_ITEMSIZE); - //LINK_STATES(SQ_GS_VERT_ITEMSIZE); - - pStateListWork->puiValue = (unsigned int*)&(r700->SQ_GS_VERT_ITEMSIZE); - pStateListWork->unOffset = mmSQ_GS_VERT_ITEMSIZE - ASIC_CONTEXT_BASE_INDEX; - pStateListWork->pNext = NULL; /* END OF STATE LIST */ - - return GL_TRUE; -} - -void r700SetupVTXConstants(GLcontext * ctx, - unsigned int nStreamID, - void * pAos, - unsigned int size, /* number of elements in vector */ - unsigned int stride, - unsigned int count) /* number of vectors in stream */ +#include "radeon_mipmap_tree.h" + +static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + struct radeon_bo *bo = NULL; + unsigned int i; + BATCH_LOCALS(&context->radeon); + + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) { + if (!t->image_override) + bo = t->mt->bo; + else + bo = t->bo; + if (bo) { + + r700SyncSurf(context, bo, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, + 0, TC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 4); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(i * 7); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); + R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, + bo, + r700->textures[i]->SQ_TEX_RESOURCE3, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + } + } + } + } +} + +static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + unsigned int i; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) { + BEGIN_BATCH_NO_AUTOSTATE(5); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); + R600_OUT_BATCH(i * 3); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); + END_BATCH(); + COMMIT_BATCH(); + } + } + } +} + +static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + unsigned int i; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) { + BEGIN_BATCH_NO_AUTOSTATE(2 + 4); + R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA); + END_BATCH(); + COMMIT_BATCH(); + } + } + } +} + +static void r700SetupVTXConstants(GLcontext * ctx, + unsigned int nStreamID, + void * pAos, + unsigned int size, /* number of elements in vector */ + unsigned int stride, + unsigned int count) /* number of vectors in stream */ { context_t *context = R700_CONTEXT(ctx); - uint32_t *dest; struct radeon_aos * paos = (struct radeon_aos *)pAos; - offset_modifiers offset_mod = {NO_SHIFT, 0, 0xFFFFFFFF}; - BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); unsigned int uSQ_VTX_CONSTANT_WORD0_0; unsigned int uSQ_VTX_CONSTANT_WORD1_0; @@ -295,9 +159,13 @@ void r700SetupVTXConstants(GLcontext * ctx, unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0; unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0; + if (!paos->bo) + return; + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); else @@ -306,55 +174,153 @@ void r700SetupVTXConstants(GLcontext * ctx, uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1; - uSQ_VTX_CONSTANT_WORD2_0 |= 0 << BASE_ADDRESS_HI_shift /* TODO */ - |stride << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift - |GetSurfaceFormat(GL_FLOAT, size, NULL) << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift /* TODO : trace back api for initial data type, not only GL_FLOAT */ - |SQ_NUM_FORMAT_SCALED << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift - |SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; - - uSQ_VTX_CONSTANT_WORD3_0 |= 1 << MEM_REQUEST_SIZE_shift; + SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ + SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); + SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL), + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); - uSQ_VTX_CONSTANT_WORD6_0 |= SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift; + SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); + SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); - BEGIN_BATCH_NO_AUTOSTATE(9); + BEGIN_BATCH_NO_AUTOSTATE(9 + 2); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); - + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0); R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0, paos->bo, uSQ_VTX_CONSTANT_WORD0_0, - RADEON_GEM_DOMAIN_GTT, 0, 0, &offset_mod); + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + +} + +extern int getTypeSize(GLenum type); +static void r700SetupVTXConstants2(GLcontext * ctx, + void * pAos, + StreamDesc * pStreamDesc) +{ + context_t *context = R700_CONTEXT(ctx); + struct radeon_aos * paos = (struct radeon_aos *)pAos; + unsigned int nVBsize; + BATCH_LOCALS(&context->radeon); + + unsigned int uSQ_VTX_CONSTANT_WORD0_0; + unsigned int uSQ_VTX_CONSTANT_WORD1_0; + unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0; + + if (!paos->bo) + return; + + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); + else + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + + if(0 == pStreamDesc->stride) + { + nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type); + } + else + { + nVBsize = paos->count * pStreamDesc->stride; + } + + uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; + uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1; + + SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ + SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); + SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL), + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + + SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); + SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 2); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0); R600_OUT_BATCH(0); R600_OUT_BATCH(0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0); - + R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0, + paos->bo, + uSQ_VTX_CONSTANT_WORD0_0, + RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); COMMIT_BATCH(); } -int r700SetupStreams(GLcontext * ctx) +void r700SetupStreams(GLcontext *ctx) { context_t *context = R700_CONTEXT(ctx); - - BATCH_LOCALS(&context->radeon); - - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; - + struct r700_vertex_program *vp = context->selected_vp; TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; + struct vertex_buffer *vb = &tnl->vb; + unsigned int i, j = 0; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + R600_STATECHANGE(context, vtx); + + for(i=0; i<VERT_ATTRIB_MAX; i++) { + if(vp->mesa_program->Base.InputsRead & (1 << i)) { + rcommon_emit_vector(ctx, + &context->radeon.tcl.aos[j], + vb->AttribPtr[i]->data, + vb->AttribPtr[i]->size, + vb->AttribPtr[i]->stride, + vb->Count); + j++; + } + } + context->radeon.tcl.aos_count = j; +} - unsigned int unBit; +static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vp = context->selected_vp; unsigned int i, j = 0; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + if (context->radeon.tcl.aos_count == 0) + return; BEGIN_BATCH_NO_AUTOSTATE(6); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); - R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); R600_OUT_BATCH(0); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); @@ -363,168 +329,188 @@ int r700SetupStreams(GLcontext * ctx) END_BATCH(); COMMIT_BATCH(); - context->radeon.tcl.aos_count = 0; - for(i=0; i<VERT_ATTRIB_MAX; i++) - { - unBit = 1 << i; - if(vpc->mesa_program.Base.InputsRead & unBit) - { - rcommon_emit_vector(ctx, - &context->radeon.tcl.aos[j], - vb->AttribPtr[i]->data, - vb->AttribPtr[i]->size, - vb->AttribPtr[i]->stride, - vb->Count); - - /* currently aos are packed */ - r700SetupVTXConstants(ctx, + for(i=0; i<VERT_ATTRIB_MAX; i++) { + if(vp->mesa_program->Base.InputsRead & (1 << i)) + { + if(1 == context->selected_vp->uiVersion) + { + /* currently aos are packed */ + r700SetupVTXConstants(ctx, i, (void*)(&context->radeon.tcl.aos[j]), (unsigned int)context->radeon.tcl.aos[j].components, (unsigned int)context->radeon.tcl.aos[j].stride * 4, (unsigned int)context->radeon.tcl.aos[j].count); - j++; - context->radeon.tcl.aos_count++; - } - } + } + else + { /* context->selected_vp->uiVersion == 2 : aos not always packed */ + r700SetupVTXConstants2(ctx, + (void*)(&context->radeon.tcl.aos[j]), + &(context->stream_desc[j])); + } + j++; + } + } +} - return R600_FALLBACK_NONE; +static void r700SetRenderTarget(context_t *context, int id) +{ + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + + struct radeon_renderbuffer *rrb; + unsigned int nPitchInPixel; + + rrb = radeon_get_colorbuffer(&context->radeon); + if (!rrb || !rrb->bo) { + return; + } + + R600_STATECHANGE(context, cb_target); + + /* color buffer */ + r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset; + + nPitchInPixel = rrb->pitch/rrb->cpp; + SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, (nPitchInPixel/8)-1, + PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); + SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1, + SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); + r700->render_target[id].CB_COLOR0_BASE.u32All = 0; + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask); + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + if(4 == rrb->cpp) + { + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_8_8_8_8, + CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT, COMP_SWAP_shift, COMP_SWAP_mask); + } + else + { + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_5_6_5, + CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT_REV, + COMP_SWAP_shift, COMP_SWAP_mask); + } + SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit); + SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit); + SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + + r700->render_target[id].enabled = GL_TRUE; } -GLboolean r700SendContextStates(context_t *context) +static void r700SetDepthTarget(context_t *context) { - BATCH_LOCALS(&context->radeon); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + struct radeon_renderbuffer *rrb; + unsigned int nPitchInPixel; - ContextState * pState = r700->pStateList; - ContextState * pInit; - unsigned int toSend; - unsigned int ui; + rrb = radeon_get_depthbuffer(&context->radeon); + if (!rrb) + return; - while(NULL != pState) - { - toSend = 1; - - pInit = pState; - - while(NULL != pState->pNext) - { - if ((pState->pNext->unOffset - pState->unOffset) > 1) - { - break; - } - else - { - pState = pState->pNext; - toSend++; - } - } + R600_STATECHANGE(context, db_target); - pState = pState->pNext; + /* depth buf */ + r700->DB_DEPTH_SIZE.u32All = 0; + r700->DB_DEPTH_BASE.u32All = 0; + r700->DB_DEPTH_INFO.u32All = 0; + r700->DB_DEPTH_VIEW.u32All = 0; - BEGIN_BATCH_NO_AUTOSTATE(toSend + 2); - R600_OUT_BATCH_REGSEQ(((pInit->unOffset + ASIC_CONTEXT_BASE_INDEX)<<2), toSend); - for(ui=0; ui<toSend; ui++) - { - R600_OUT_BATCH(*(pInit->puiValue)); - pInit = pInit->pNext; - }; - END_BATCH(); - }; - COMMIT_BATCH(); + nPitchInPixel = rrb->pitch/rrb->cpp; - return GL_TRUE; -} + SETfield(r700->DB_DEPTH_SIZE.u32All, (nPitchInPixel/8)-1, + PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); + SETfield(r700->DB_DEPTH_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1, + SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); /* size in pixel / 64 - 1 */ + if(4 == rrb->cpp) + { + SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_8_24, + DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); + } + else + { + SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16, + DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); + } + SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_1D_TILED_THIN1, + DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask); + /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */ +} -GLboolean r700SendDepthTargetState(context_t *context, int id) +static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_renderbuffer *rrb; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); rrb = radeon_get_depthbuffer(&context->radeon); if (!rrb || !rrb->bo) { fprintf(stderr, "no rrb\n"); - return GL_FALSE; + return; } - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; + r700SetDepthTarget(context); - BEGIN_BATCH_NO_AUTOSTATE(9); + BEGIN_BATCH_NO_AUTOSTATE(8 + 2); R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2); R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All); R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All); - R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 3); + R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2); + R600_OUT_BATCH(r700->DB_DEPTH_BASE.u32All); + R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All, rrb->bo, r700->DB_DEPTH_BASE.u32All, - 0, RADEON_GEM_DOMAIN_VRAM, 0, &offset_mod); - R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); - R600_OUT_BATCH(r700->DB_HTILE_DATA_BASE.u32All); + 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(24); - R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2); - R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All); - R600_OUT_BATCH(r700->DB_DEPTH_CLEAR.u32All); - - R600_OUT_BATCH_REGSEQ(DB_STENCILREFMASK, 2); - R600_OUT_BATCH(r700->DB_STENCILREFMASK.u32All); - R600_OUT_BATCH(r700->DB_STENCILREFMASK_BF.u32All); - - R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, r700->DB_DEPTH_CONTROL.u32All); - R600_OUT_BATCH_REGVAL(DB_SHADER_CONTROL, r700->DB_SHADER_CONTROL.u32All); - - R600_OUT_BATCH_REGSEQ(DB_RENDER_CONTROL, 2); - R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All); - R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All); - - R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All); - R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All); - END_BATCH(); + if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) && + (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) { + BEGIN_BATCH_NO_AUTOSTATE(2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0)); + R600_OUT_BATCH(1 << 0); + END_BATCH(); + } COMMIT_BATCH(); - r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, - DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); - - return GL_TRUE; } -GLboolean r700SendRenderTargetState(context_t *context, int id) +static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_renderbuffer *rrb; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); + int id = 0; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); rrb = radeon_get_colorbuffer(&context->radeon); if (!rrb || !rrb->bo) { fprintf(stderr, "no rrb\n"); - return GL_FALSE; + return; } + r700SetRenderTarget(context, 0); + if (id > R700_MAX_RENDER_TARGETS) - return GL_FALSE; + return; if (!r700->render_target[id].enabled) - return GL_FALSE; - - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; + return; - BEGIN_BATCH_NO_AUTOSTATE(3); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1); + R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_BASE.u32All); R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, rrb->bo, r700->render_target[id].CB_COLOR0_BASE.u32All, - 0, RADEON_GEM_DOMAIN_VRAM, 0, &offset_mod); + 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) && @@ -544,42 +530,32 @@ GLboolean r700SendRenderTargetState(context_t *context, int id) R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All); END_BATCH(); - if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { - BEGIN_BATCH_NO_AUTOSTATE(3); - R600_OUT_BATCH_REGVAL(CB_BLEND0_CONTROL + (4 * id), r700->render_target[id].CB_BLEND0_CONTROL.u32All); - END_BATCH(); - } - COMMIT_BATCH(); - r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, - CB_ACTION_ENA_bit | (1 << (id + 6))); - - return GL_TRUE; } -GLboolean r700SendPSState(context_t *context) +static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_renderbuffer *rrb; struct radeon_bo * pbo; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; + if (!pbo) + return; r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); + R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All); R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, pbo, r700->ps.SQ_PGM_START_PS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0, &offset_mod); + RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(9); @@ -590,31 +566,30 @@ GLboolean r700SendPSState(context_t *context) COMMIT_BATCH(); - return GL_TRUE; } -GLboolean r700SendVSState(context_t *context) +static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_renderbuffer *rrb; struct radeon_bo * pbo; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; + if (!pbo) + return; r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); + R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All); R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, pbo, r700->vs.SQ_PGM_START_VS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0, &offset_mod); + RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(6); @@ -623,17 +598,15 @@ GLboolean r700SendVSState(context_t *context) END_BATCH(); COMMIT_BATCH(); - - return GL_TRUE; } -GLboolean r700SendFSState(context_t *context) +static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_renderbuffer *rrb; struct radeon_bo * pbo; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); /* XXX fixme * R6xx chips require a FS be emitted, even if it's not used. @@ -646,18 +619,18 @@ GLboolean r700SendFSState(context_t *context) r700->fs.SQ_PGM_CF_OFFSET_FS.u32All = 0; /* XXX */ - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; + if (!pbo) + return; r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1); + R600_OUT_BATCH(r700->fs.SQ_PGM_START_FS.u32All); R600_OUT_BATCH_RELOC(r700->fs.SQ_PGM_START_FS.u32All, pbo, r700->fs.SQ_PGM_START_FS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0, &offset_mod); + RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(6); @@ -667,21 +640,21 @@ GLboolean r700SendFSState(context_t *context) COMMIT_BATCH(); - return GL_TRUE; } -GLboolean r700SendViewportState(context_t *context, int id) +static void r700SendViewportState(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_renderbuffer *rrb; - offset_modifiers offset_mod; BATCH_LOCALS(&context->radeon); + int id = 0; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); if (id > R700_MAX_VIEWPORTS) - return GL_FALSE; + return; if (!r700->viewport[id].enabled) - return GL_FALSE; + return; BEGIN_BATCH_NO_AUTOSTATE(16); R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL + (8 * id), 2); @@ -701,15 +674,16 @@ GLboolean r700SendViewportState(context_t *context, int id) COMMIT_BATCH(); - return GL_TRUE; } -GLboolean r700SendSQConfig(context_t *context) +static void r700SendSQConfig(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - BEGIN_BATCH_NO_AUTOSTATE(8); + BEGIN_BATCH_NO_AUTOSTATE(34); R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6); R600_OUT_BATCH(r700->sq_config.SQ_CONFIG.u32All); R600_OUT_BATCH(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All); @@ -717,17 +691,35 @@ GLboolean r700SendSQConfig(context_t *context) R600_OUT_BATCH(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All); R600_OUT_BATCH(r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All); R600_OUT_BATCH(r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All); + + R600_OUT_BATCH_REGVAL(TA_CNTL_AUX, r700->TA_CNTL_AUX.u32All); + R600_OUT_BATCH_REGVAL(VC_ENHANCE, r700->VC_ENHANCE.u32All); + R600_OUT_BATCH_REGVAL(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, r700->SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All); + R600_OUT_BATCH_REGVAL(DB_DEBUG, r700->DB_DEBUG.u32All); + R600_OUT_BATCH_REGVAL(DB_WATERMARKS, r700->DB_WATERMARKS.u32All); + + R600_OUT_BATCH_REGSEQ(SQ_ESGS_RING_ITEMSIZE, 9); + R600_OUT_BATCH(r700->SQ_ESGS_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_GSVS_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_ESTMP_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_GSTMP_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_VSTMP_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_PSTMP_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_FBUF_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_REDUC_RING_ITEMSIZE.u32All); + R600_OUT_BATCH(r700->SQ_GS_VERT_ITEMSIZE.u32All); END_BATCH(); - COMMIT_BATCH(); - return GL_TRUE; + COMMIT_BATCH(); } -GLboolean r700SendUCPState(context_t *context) +static void r700SendUCPState(GLcontext *ctx, struct radeon_state_atom *atom) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); int i; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_MAX_UCP; i++) { if (r700->ucp[i].enabled) { @@ -741,7 +733,626 @@ GLboolean r700SendUCPState(context_t *context) COMMIT_BATCH(); } } +} + +static void r700SendSPIState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + unsigned int ui; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + BEGIN_BATCH_NO_AUTOSTATE(59 + R700_MAX_SHADER_EXPORTS); + + R600_OUT_BATCH_REGSEQ(SQ_VTX_SEMANTIC_0, 32); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_0.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_1.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_2.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_3.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_4.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_5.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_6.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_7.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_8.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_9.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_10.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_11.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_12.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_13.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_14.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_15.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_16.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_17.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_18.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_19.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_20.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_21.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_22.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_23.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_24.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_25.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_26.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_27.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_28.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_29.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_30.u32All); + R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_31.u32All); + + R600_OUT_BATCH_REGSEQ(SPI_VS_OUT_ID_0, 10); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_0.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_1.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_2.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_3.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_4.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_5.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_6.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_7.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_8.u32All); + R600_OUT_BATCH(r700->SPI_VS_OUT_ID_9.u32All); + + R600_OUT_BATCH_REGSEQ(SPI_VS_OUT_CONFIG, 9); + R600_OUT_BATCH(r700->SPI_VS_OUT_CONFIG.u32All); + R600_OUT_BATCH(r700->SPI_THREAD_GROUPING.u32All); + R600_OUT_BATCH(r700->SPI_PS_IN_CONTROL_0.u32All); + R600_OUT_BATCH(r700->SPI_PS_IN_CONTROL_1.u32All); + R600_OUT_BATCH(r700->SPI_INTERP_CONTROL_0.u32All); + R600_OUT_BATCH(r700->SPI_INPUT_Z.u32All); + R600_OUT_BATCH(r700->SPI_FOG_CNTL.u32All); + R600_OUT_BATCH(r700->SPI_FOG_FUNC_SCALE.u32All); + R600_OUT_BATCH(r700->SPI_FOG_FUNC_BIAS.u32All); + + R600_OUT_BATCH_REGSEQ(SPI_PS_INPUT_CNTL_0, R700_MAX_SHADER_EXPORTS); + for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++) + R600_OUT_BATCH(r700->SPI_PS_INPUT_CNTL[ui].u32All); + + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendVGTState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + BEGIN_BATCH_NO_AUTOSTATE(41); + + R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4); + R600_OUT_BATCH(r700->VGT_MAX_VTX_INDX.u32All); + R600_OUT_BATCH(r700->VGT_MIN_VTX_INDX.u32All); + R600_OUT_BATCH(r700->VGT_INDX_OFFSET.u32All); + R600_OUT_BATCH(r700->VGT_MULTI_PRIM_IB_RESET_INDX.u32All); + + R600_OUT_BATCH_REGSEQ(VGT_OUTPUT_PATH_CNTL, 13); + R600_OUT_BATCH(r700->VGT_OUTPUT_PATH_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_HOS_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_HOS_MAX_TESS_LEVEL.u32All); + R600_OUT_BATCH(r700->VGT_HOS_MIN_TESS_LEVEL.u32All); + R600_OUT_BATCH(r700->VGT_HOS_REUSE_DEPTH.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_PRIM_TYPE.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_FIRST_DECR.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_DECR.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_VECT_0_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_VECT_1_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_VECT_0_FMT_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_GROUP_VECT_1_FMT_CNTL.u32All); + R600_OUT_BATCH(r700->VGT_GS_MODE.u32All); + + R600_OUT_BATCH_REGVAL(VGT_PRIMITIVEID_EN, r700->VGT_PRIMITIVEID_EN.u32All); + R600_OUT_BATCH_REGVAL(VGT_MULTI_PRIM_IB_RESET_EN, r700->VGT_MULTI_PRIM_IB_RESET_EN.u32All); + R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_0, r700->VGT_INSTANCE_STEP_RATE_0.u32All); + R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_1, r700->VGT_INSTANCE_STEP_RATE_1.u32All); + + R600_OUT_BATCH_REGSEQ(VGT_STRMOUT_EN, 3); + R600_OUT_BATCH(r700->VGT_STRMOUT_EN.u32All); + R600_OUT_BATCH(r700->VGT_REUSE_OFF.u32All); + R600_OUT_BATCH(r700->VGT_VTX_CNT_EN.u32All); + + R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, r700->VGT_STRMOUT_BUFFER_EN.u32All); + + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendSXState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + BEGIN_BATCH_NO_AUTOSTATE(9); + R600_OUT_BATCH_REGVAL(SX_MISC, r700->SX_MISC.u32All); + R600_OUT_BATCH_REGVAL(SX_ALPHA_TEST_CONTROL, r700->SX_ALPHA_TEST_CONTROL.u32All); + R600_OUT_BATCH_REGVAL(SX_ALPHA_REF, r700->SX_ALPHA_REF.u32All); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + BEGIN_BATCH_NO_AUTOSTATE(23); + R600_OUT_BATCH_REGVAL(DB_HTILE_DATA_BASE, r700->DB_HTILE_DATA_BASE.u32All); + + R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2); + R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All); + R600_OUT_BATCH(r700->DB_DEPTH_CLEAR.u32All); + + R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, r700->DB_DEPTH_CONTROL.u32All); + R600_OUT_BATCH_REGVAL(DB_SHADER_CONTROL, r700->DB_SHADER_CONTROL.u32All); + + R600_OUT_BATCH_REGSEQ(DB_RENDER_CONTROL, 2); + R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All); + R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All); + + R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All); + R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All); + + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendStencilState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(4); + R600_OUT_BATCH_REGSEQ(DB_STENCILREFMASK, 2); + R600_OUT_BATCH(r700->DB_STENCILREFMASK.u32All); + R600_OUT_BATCH(r700->DB_STENCILREFMASK_BF.u32All); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendCBState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { + BEGIN_BATCH_NO_AUTOSTATE(11); + R600_OUT_BATCH_REGSEQ(CB_CLEAR_RED, 4); + R600_OUT_BATCH(r700->CB_CLEAR_RED_R6XX.u32All); + R600_OUT_BATCH(r700->CB_CLEAR_GREEN_R6XX.u32All); + R600_OUT_BATCH(r700->CB_CLEAR_BLUE_R6XX.u32All); + R600_OUT_BATCH(r700->CB_CLEAR_ALPHA_R6XX.u32All); + R600_OUT_BATCH_REGSEQ(CB_FOG_RED, 3); + R600_OUT_BATCH(r700->CB_FOG_RED_R6XX.u32All); + R600_OUT_BATCH(r700->CB_FOG_GREEN_R6XX.u32All); + R600_OUT_BATCH(r700->CB_FOG_BLUE_R6XX.u32All); + END_BATCH(); + } + + BEGIN_BATCH_NO_AUTOSTATE(7); + R600_OUT_BATCH_REGSEQ(CB_TARGET_MASK, 2); + R600_OUT_BATCH(r700->CB_TARGET_MASK.u32All); + R600_OUT_BATCH(r700->CB_SHADER_MASK.u32All); + R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, r700->CB_SHADER_CONTROL.u32All); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendCBCLRCMPState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGSEQ(CB_CLRCMP_CONTROL, 4); + R600_OUT_BATCH(r700->CB_CLRCMP_CONTROL.u32All); + R600_OUT_BATCH(r700->CB_CLRCMP_SRC.u32All); + R600_OUT_BATCH(r700->CB_CLRCMP_DST.u32All); + R600_OUT_BATCH(r700->CB_CLRCMP_MSK.u32All); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendCBBlendState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + unsigned int ui; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL(CB_BLEND_CONTROL, r700->CB_BLEND_CONTROL.u32All); + END_BATCH(); + } + + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, r700->CB_COLOR_CONTROL.u32All); + END_BATCH(); + + if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { + for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) { + if (r700->render_target[ui].enabled) { + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL(CB_BLEND0_CONTROL + (4 * ui), + r700->render_target[ui].CB_BLEND0_CONTROL.u32All); + END_BATCH(); + } + } + } + + COMMIT_BATCH(); +} + +static void r700SendCBBlendColorState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGSEQ(CB_BLEND_RED, 4); + R600_OUT_BATCH(r700->CB_BLEND_RED.u32All); + R600_OUT_BATCH(r700->CB_BLEND_GREEN.u32All); + R600_OUT_BATCH(r700->CB_BLEND_BLUE.u32All); + R600_OUT_BATCH(r700->CB_BLEND_ALPHA.u32All); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendSUState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(9); + R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, r700->PA_SU_SC_MODE_CNTL.u32All); + R600_OUT_BATCH_REGSEQ(PA_SU_POINT_SIZE, 4); + R600_OUT_BATCH(r700->PA_SU_POINT_SIZE.u32All); + R600_OUT_BATCH(r700->PA_SU_POINT_MINMAX.u32All); + R600_OUT_BATCH(r700->PA_SU_LINE_CNTL.u32All); + R600_OUT_BATCH(r700->PA_SU_VTX_CNTL.u32All); + END_BATCH(); + COMMIT_BATCH(); + +} + +static void r700SendPolyState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(10); + R600_OUT_BATCH_REGSEQ(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_CLAMP.u32All); + R600_OUT_BATCH_REGSEQ(PA_SU_POLY_OFFSET_FRONT_SCALE, 4); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_FRONT_SCALE.u32All); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.u32All); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_BACK_SCALE.u32All); + R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_BACK_OFFSET.u32All); + END_BATCH(); + COMMIT_BATCH(); + +} + +static void r700SendCLState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + BEGIN_BATCH_NO_AUTOSTATE(12); + R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, r700->PA_CL_CLIP_CNTL.u32All); + R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, r700->PA_CL_VTE_CNTL.u32All); + R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, r700->PA_CL_VS_OUT_CNTL.u32All); + R600_OUT_BATCH_REGVAL(PA_CL_NANINF_CNTL, r700->PA_CL_NANINF_CNTL.u32All); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendGBState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGSEQ(PA_CL_GB_VERT_CLIP_ADJ, 4); + R600_OUT_BATCH(r700->PA_CL_GB_VERT_CLIP_ADJ.u32All); + R600_OUT_BATCH(r700->PA_CL_GB_VERT_DISC_ADJ.u32All); + R600_OUT_BATCH(r700->PA_CL_GB_HORZ_CLIP_ADJ.u32All); + R600_OUT_BATCH(r700->PA_CL_GB_HORZ_DISC_ADJ.u32All); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendScissorState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + BEGIN_BATCH_NO_AUTOSTATE(22); + R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2); + R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_BR.u32All); + + R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 12); + R600_OUT_BATCH(r700->PA_SC_WINDOW_OFFSET.u32All); + R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_BR.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_RULE.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_BR.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_BR.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_BR.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_BR.u32All); + + R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2); + R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_TL.u32All); + R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_BR.u32All); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendSCState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + BEGIN_BATCH_NO_AUTOSTATE(15); + R600_OUT_BATCH_REGVAL(R7xx_PA_SC_EDGERULE, r700->PA_SC_EDGERULE.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_LINE_STIPPLE, r700->PA_SC_LINE_STIPPLE.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_MPASS_PS_CNTL, r700->PA_SC_MPASS_PS_CNTL.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_MODE_CNTL, r700->PA_SC_MODE_CNTL.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_LINE_CNTL, r700->PA_SC_LINE_CNTL.u32All); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendAAState(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(12); + R600_OUT_BATCH_REGVAL(PA_SC_AA_CONFIG, r700->PA_SC_AA_CONFIG.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_AA_SAMPLE_LOCS_MCTX, r700->PA_SC_AA_SAMPLE_LOCS_MCTX.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, r700->PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX.u32All); + R600_OUT_BATCH_REGVAL(PA_SC_AA_MASK, r700->PA_SC_AA_MASK.u32All); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendPSConsts(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + int i; + BATCH_LOCALS(&context->radeon); + + if (r700->ps.num_consts == 0) + return; + + BEGIN_BATCH_NO_AUTOSTATE(2 + (r700->ps.num_consts * 4)); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, (r700->ps.num_consts * 4))); + /* assembler map const from very beginning. */ + R600_OUT_BATCH(SQ_ALU_CONSTANT_PS_OFFSET * 4); + for (i = 0; i < r700->ps.num_consts; i++) { + R600_OUT_BATCH(r700->ps.consts[i][0].u32All); + R600_OUT_BATCH(r700->ps.consts[i][1].u32All); + R600_OUT_BATCH(r700->ps.consts[i][2].u32All); + R600_OUT_BATCH(r700->ps.consts[i][3].u32All); + } + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + int i; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + if (r700->vs.num_consts == 0) + return; + + BEGIN_BATCH_NO_AUTOSTATE(2 + (r700->vs.num_consts * 4)); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, (r700->vs.num_consts * 4))); + /* assembler map const from very beginning. */ + R600_OUT_BATCH(SQ_ALU_CONSTANT_VS_OFFSET * 4); + for (i = 0; i < r700->vs.num_consts; i++) { + R600_OUT_BATCH(r700->vs.consts[i][0].u32All); + R600_OUT_BATCH(r700->vs.consts[i][1].u32All); + R600_OUT_BATCH(r700->vs.consts[i][2].u32All); + R600_OUT_BATCH(r700->vs.consts[i][3].u32All); + } + END_BATCH(); + COMMIT_BATCH(); +} + +static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) +{ + return atom->cmd_size; +} + +static int check_cb(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + int count = 7; + + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + count += 11; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + + return count; +} + +static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + unsigned int ui; + int count = 3; + + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + count += 3; + + if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { + for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) { + if (r700->render_target[ui].enabled) + count += 3; + } + } + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + + return count; +} - return GL_TRUE; +static int check_ucp(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + int i; + int count = 0; + + for (i = 0; i < R700_MAX_UCP; i++) { + if (r700->ucp[i].enabled) + count += 6; + } + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + return count; } +static int check_vtx(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + int count = context->radeon.tcl.aos_count * 18; + + if (count) + count += 6; + + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + return count; +} + +static int check_tx(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + unsigned int i, count = 0; + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + + for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) + count++; + } + } + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + return count * 31; +} + +static int check_ps_consts(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + int count = r700->ps.num_consts * 4; + + if (count) + count += 2; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + + return count; +} + +static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + int count = r700->vs.num_consts * 4; + + if (count) + count += 2; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + + return count; +} + +#define ALLOC_STATE( ATOM, CHK, SZ, EMIT ) \ +do { \ + context->atoms.ATOM.cmd_size = (SZ); \ + context->atoms.ATOM.cmd = NULL; \ + context->atoms.ATOM.name = #ATOM; \ + context->atoms.ATOM.idx = 0; \ + context->atoms.ATOM.check = check_##CHK; \ + context->atoms.ATOM.dirty = GL_FALSE; \ + context->atoms.ATOM.emit = (EMIT); \ + context->radeon.hw.max_state_size += (SZ); \ + insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \ +} while (0) + +void r600InitAtoms(context_t *context) +{ + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context); + context->radeon.hw.max_state_size = 10 + 5 + 14; /* start 3d, idle, cb/db flush */ + + /* Setup the atom linked list */ + make_empty_list(&context->radeon.hw.atomlist); + context->radeon.hw.atomlist.name = "atom-list"; + + ALLOC_STATE(sq, always, 34, r700SendSQConfig); + ALLOC_STATE(db, always, 23, r700SendDBState); + ALLOC_STATE(stencil, always, 4, r700SendStencilState); + ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState); + ALLOC_STATE(sc, always, 15, r700SendSCState); + ALLOC_STATE(scissor, always, 22, r700SendScissorState); + ALLOC_STATE(aa, always, 12, r700SendAAState); + ALLOC_STATE(cl, always, 12, r700SendCLState); + ALLOC_STATE(gb, always, 6, r700SendGBState); + ALLOC_STATE(ucp, ucp, (R700_MAX_UCP * 6), r700SendUCPState); + ALLOC_STATE(su, always, 9, r700SendSUState); + ALLOC_STATE(poly, always, 10, r700SendPolyState); + ALLOC_STATE(cb, cb, 18, r700SendCBState); + ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); + ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); + ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); + ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState); + ALLOC_STATE(sx, always, 9, r700SendSXState); + ALLOC_STATE(vgt, always, 41, r700SendVGTState); + ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState); + ALLOC_STATE(vpt, always, 16, r700SendViewportState); + ALLOC_STATE(fs, always, 18, r700SendFSState); + ALLOC_STATE(vs, always, 18, r700SendVSState); + ALLOC_STATE(ps, always, 21, r700SendPSState); + ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts); + ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts); + ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState); + ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState); + ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState); + ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState); + + context->radeon.hw.is_dirty = GL_TRUE; + context->radeon.hw.all_dirty = GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h index fa419aa499..ae249e15fd 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.h +++ b/src/mesa/drivers/dri/r600/r700_chip.h @@ -46,6 +46,7 @@ #define R700_MAX_VIEWPORTS 16 #define R700_MAX_SHADER_EXPORTS 32 #define R700_MAX_UCP 6 +#define R700_MAX_DX9_CONSTS 256 /* Enum not show in r600_*.h */ @@ -188,6 +189,7 @@ typedef struct _RENDER_TARGET_STATE_STRUCT union UINT_FLOAT CB_COLOR0_MASK; /* 0xA040 */ union UINT_FLOAT CB_BLEND0_CONTROL; /* 0xA1E0 */ GLboolean enabled; + GLboolean dirty; } RENDER_TARGET_STATE_STRUCT; typedef struct _VIEWPORT_STATE_STRUCT @@ -203,6 +205,7 @@ typedef struct _VIEWPORT_STATE_STRUCT union UINT_FLOAT PA_CL_VPORT_ZSCALE; /* 0xA113 */ union UINT_FLOAT PA_CL_VPORT_ZOFFSET; /* 0xA114 */ GLboolean enabled; + GLboolean dirty; } VIEWPORT_STATE_STRUCT; typedef struct _UCP_STATE_STRUCT @@ -212,6 +215,7 @@ typedef struct _UCP_STATE_STRUCT union UINT_FLOAT PA_CL_UCP_0_Z; union UINT_FLOAT PA_CL_UCP_0_W; GLboolean enabled; + GLboolean dirty; } UCP_STATE_STRUCT; typedef struct _PS_STATE_STRUCT @@ -220,6 +224,9 @@ typedef struct _PS_STATE_STRUCT union UINT_FLOAT SQ_PGM_RESOURCES_PS ; /* 0xA214 */ union UINT_FLOAT SQ_PGM_EXPORTS_PS ; /* 0xA215 */ union UINT_FLOAT SQ_PGM_CF_OFFSET_PS ; /* 0xA233 */ + GLboolean dirty; + int num_consts; + union UINT_FLOAT consts[R700_MAX_DX9_CONSTS][4]; } PS_STATE_STRUCT; typedef struct _VS_STATE_STRUCT @@ -227,6 +234,9 @@ typedef struct _VS_STATE_STRUCT union UINT_FLOAT SQ_PGM_START_VS ; /* 0xA216 */ union UINT_FLOAT SQ_PGM_RESOURCES_VS ; /* 0xA21A */ union UINT_FLOAT SQ_PGM_CF_OFFSET_VS ; /* 0xA234 */ + GLboolean dirty; + int num_consts; + union UINT_FLOAT consts[R700_MAX_DX9_CONSTS][4]; } VS_STATE_STRUCT; typedef struct _GS_STATE_STRUCT @@ -234,6 +244,7 @@ typedef struct _GS_STATE_STRUCT union UINT_FLOAT SQ_PGM_START_GS ; /* 0xA21B */ union UINT_FLOAT SQ_PGM_RESOURCES_GS ; /* 0xA21F */ union UINT_FLOAT SQ_PGM_CF_OFFSET_GS ; /* 0xA235 */ + GLboolean dirty; } GS_STATE_STRUCT; typedef struct _ES_STATE_STRUCT @@ -241,6 +252,7 @@ typedef struct _ES_STATE_STRUCT union UINT_FLOAT SQ_PGM_START_ES ; /* 0xA220 */ union UINT_FLOAT SQ_PGM_RESOURCES_ES ; /* 0xA224 */ union UINT_FLOAT SQ_PGM_CF_OFFSET_ES ; /* 0xA236 */ + GLboolean dirty; } ES_STATE_STRUCT; typedef struct _FS_STATE_STRUCT @@ -248,6 +260,7 @@ typedef struct _FS_STATE_STRUCT union UINT_FLOAT SQ_PGM_START_FS ; /* 0xA225 */ union UINT_FLOAT SQ_PGM_RESOURCES_FS ; /* 0xA229 */ union UINT_FLOAT SQ_PGM_CF_OFFSET_FS ; /* 0xA237 */ + GLboolean dirty; } FS_STATE_STRUCT; typedef struct _SQ_CONFIG_STRUCT @@ -260,27 +273,14 @@ typedef struct _SQ_CONFIG_STRUCT union UINT_FLOAT SQ_STACK_RESOURCE_MGMT_2 ; /* 0x2305 */ } SQ_CONFIG_STRUCT; -typedef struct ContextState -{ - unsigned int * puiValue; - unsigned int unOffset; - struct ContextState * pNext; -} ContextState; - typedef struct _R700_CHIP_CONTEXT { - // misc - union UINT_FLOAT TA_CNTL_AUX ; /* 0x2542 */ - union UINT_FLOAT VC_ENHANCE ; /* 0x25C5 */ - union UINT_FLOAT SQ_DYN_GPR_CNTL_PS_FLUSH_REQ; /* 0x2363 */ - union UINT_FLOAT DB_DEBUG ; /* 0x260C */ - union UINT_FLOAT DB_WATERMARKS ; /* 0x260E */ - // DB union UINT_FLOAT DB_DEPTH_SIZE ; /* 0xA000 */ union UINT_FLOAT DB_DEPTH_VIEW ; /* 0xA001 */ union UINT_FLOAT DB_DEPTH_BASE ; /* 0xA003 */ union UINT_FLOAT DB_DEPTH_INFO ; /* 0xA004 */ + GLboolean db_target_dirty; union UINT_FLOAT DB_HTILE_DATA_BASE ; /* 0xA005 */ union UINT_FLOAT DB_STENCIL_CLEAR ; /* 0xA00A */ union UINT_FLOAT DB_DEPTH_CLEAR ; /* 0xA00B */ @@ -292,6 +292,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT DB_ALPHA_TO_MASK ; /* 0xA351 */ union UINT_FLOAT DB_DEPTH_CONTROL ; /* 0xA200 */ union UINT_FLOAT DB_SHADER_CONTROL ; /* 0xA203 */ + GLboolean db_dirty; // SC union UINT_FLOAT PA_SC_SCREEN_SCISSOR_TL ; /* 0xA00C */ @@ -311,6 +312,8 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT PA_SC_EDGERULE ; /* 0xA08C */ union UINT_FLOAT PA_SC_GENERIC_SCISSOR_TL ; /* 0xA090 */ union UINT_FLOAT PA_SC_GENERIC_SCISSOR_BR ; /* 0xA091 */ + GLboolean scissor_dirty; + union UINT_FLOAT PA_SC_LINE_STIPPLE ; /* 0xA283 */ union UINT_FLOAT PA_SC_LINE_CNTL ; /* 0xA300 */ union UINT_FLOAT PA_SC_AA_CONFIG ; /* 0xA301 */ @@ -319,6 +322,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_MCTX ; /* 0xA307 */ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX; /* 0xA308 */ union UINT_FLOAT PA_SC_AA_MASK ; /* 0xA312 */ + GLboolean sc_dirty; // CL union UINT_FLOAT PA_CL_CLIP_CNTL ; /* 0xA204 */ @@ -329,6 +333,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT PA_CL_GB_VERT_DISC_ADJ ; /* 0xA304 */ union UINT_FLOAT PA_CL_GB_HORZ_CLIP_ADJ ; /* 0xA305 */ union UINT_FLOAT PA_CL_GB_HORZ_DISC_ADJ ; /* 0xA306 */ + GLboolean cl_dirty; // SU union UINT_FLOAT PA_SU_SC_MODE_CNTL ; /* 0xA205 */ @@ -342,6 +347,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_OFFSET; /* 0xA381 */ union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_SCALE; /* 0xA382 */ union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_OFFSET; /* 0xA383 */ + GLboolean su_dirty; VIEWPORT_STATE_STRUCT viewport[R700_MAX_VIEWPORTS]; UCP_STATE_STRUCT ucp[R700_MAX_UCP]; @@ -367,12 +373,14 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT CB_CLRCMP_DST ; /* 0xA30E */ union UINT_FLOAT CB_CLRCMP_MSK ; /* 0xA30F */ union UINT_FLOAT CB_BLEND_CONTROL ; /* 0xABD0 */ + GLboolean cb_dirty; RENDER_TARGET_STATE_STRUCT render_target[R700_MAX_RENDER_TARGETS]; // SX union UINT_FLOAT SX_MISC ; /* 0xA0D4 */ union UINT_FLOAT SX_ALPHA_TEST_CONTROL ; /* 0xA104 */ union UINT_FLOAT SX_ALPHA_REF ; /* 0xA10E */ + GLboolean sx_dirty; // VGT union UINT_FLOAT VGT_MAX_VTX_INDX ; /* 0xA100 */ @@ -393,7 +401,6 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT VGT_GROUP_VECT_1_FMT_CNTL ; /* 0xA28F */ union UINT_FLOAT VGT_GS_MODE ; /* 0xA290 */ union UINT_FLOAT VGT_PRIMITIVEID_EN ; /* 0xA2A1 */ - union UINT_FLOAT VGT_DMA_NUM_INSTANCES ; /* 0xA2A2 */ union UINT_FLOAT VGT_MULTI_PRIM_IB_RESET_EN; /* 0xA2A5 */ union UINT_FLOAT VGT_INSTANCE_STEP_RATE_0 ; /* 0xA2A8 */ union UINT_FLOAT VGT_INSTANCE_STEP_RATE_1 ; /* 0xA2A9 */ @@ -401,6 +408,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT VGT_REUSE_OFF ; /* 0xA2AD */ union UINT_FLOAT VGT_VTX_CNT_EN ; /* 0xA2AE */ union UINT_FLOAT VGT_STRMOUT_BUFFER_EN ; /* 0xA2C8 */ + GLboolean vgt_dirty; // SPI union UINT_FLOAT SPI_VS_OUT_ID_0 ; /* 0xA185 */ @@ -455,39 +463,8 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT SQ_VTX_SEMANTIC_29 ; /* 0xA0FD */ union UINT_FLOAT SQ_VTX_SEMANTIC_30 ; /* 0xA0FE */ union UINT_FLOAT SQ_VTX_SEMANTIC_31 ; /* 0xA0FF */ - - union UINT_FLOAT SPI_PS_INPUT_CNTL_0 ; /* 0xA191 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_1 ; /* 0xA192 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_2 ; /* 0xA193 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_3 ; /* 0xA194 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_4 ; /* 0xA195 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_5 ; /* 0xA196 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_6 ; /* 0xA197 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_7 ; /* 0xA198 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_8 ; /* 0xA199 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_9 ; /* 0xA19A */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_10 ; /* 0xA19B */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_11 ; /* 0xA19C */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_12 ; /* 0xA19D */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_13 ; /* 0xA19E */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_14 ; /* 0xA19F */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_15 ; /* 0xA1A0 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_16 ; /* 0xA1A1 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_17 ; /* 0xA1A2 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_18 ; /* 0xA1A3 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_19 ; /* 0xA1A4 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_20 ; /* 0xA1A5 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_21 ; /* 0xA1A6 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_22 ; /* 0xA1A7 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_23 ; /* 0xA1A8 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_24 ; /* 0xA1A9 */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_25 ; /* 0xA1AA */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_26 ; /* 0xA1AB */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_27 ; /* 0xA1AC */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_28 ; /* 0xA1AD */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_29 ; /* 0xA1AE */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_30 ; /* 0xA1AF */ - union UINT_FLOAT SPI_PS_INPUT_CNTL_31 ; /* 0xA1B0 */ + union UINT_FLOAT SPI_PS_INPUT_CNTL[R700_MAX_SHADER_EXPORTS]; + GLboolean spi_dirty; // shaders PS_STATE_STRUCT ps; @@ -498,7 +475,12 @@ typedef struct _R700_CHIP_CONTEXT // SQ CONFIG SQ_CONFIG_STRUCT sq_config; - + // misc + union UINT_FLOAT TA_CNTL_AUX ; /* 0x2542 */ + union UINT_FLOAT VC_ENHANCE ; /* 0x25C5 */ + union UINT_FLOAT SQ_DYN_GPR_CNTL_PS_FLUSH_REQ; /* 0x2363 */ + union UINT_FLOAT DB_DEBUG ; /* 0x260C */ + union UINT_FLOAT DB_WATERMARKS ; /* 0x260E */ // SQ union UINT_FLOAT SQ_ESGS_RING_ITEMSIZE ; /* 0xA22A */ union UINT_FLOAT SQ_GSVS_RING_ITEMSIZE ; /* 0xA22B */ @@ -509,8 +491,7 @@ typedef struct _R700_CHIP_CONTEXT union UINT_FLOAT SQ_FBUF_RING_ITEMSIZE ; /* 0xA230 */ union UINT_FLOAT SQ_REDUC_RING_ITEMSIZE ; /* 0xA231 */ union UINT_FLOAT SQ_GS_VERT_ITEMSIZE ; /* 0xA232 */ - - ContextState* pStateList; + GLboolean sq_dirty; radeonTexObj* textures[R700_TEXTURE_NUMBERUNITS]; diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index e84be38622..c6546ab00c 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -31,6 +31,7 @@ #include "main/imports.h" #include "main/mtypes.h" #include "main/enums.h" +#include "swrast/swrast.h" #include "radeon_lock.h" #include "r600_context.h" @@ -45,12 +46,6 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask) return GL_FALSE; } -#define R600_NEWPRIM( rmesa ) \ - do { \ - if ( rmesa->radeon.dma.flush ) \ - rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ - } while (0) - void r700Clear(GLcontext * ctx, GLbitfield mask) { context_t *context = R700_CONTEXT(ctx); @@ -60,6 +55,8 @@ void r700Clear(GLcontext * ctx, GLbitfield mask) int i; struct gl_framebuffer *fb = ctx->DrawBuffer; + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x\n", __func__, mask); + if( GL_TRUE == r700ClearFast(context, mask) ) { return; @@ -111,8 +108,7 @@ void r700Clear(GLcontext * ctx, GLbitfield mask) } if (swrast_mask) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s: swrast clear, mask: %x\n", + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, "%s: swrast clear, mask: %x\n", __FUNCTION__, swrast_mask); _swrast_Clear(ctx, swrast_mask); } diff --git a/src/mesa/drivers/dri/r600/r700_debug.c b/src/mesa/drivers/dri/r600/r700_debug.c index ecdb75ad48..cd1ba9eca3 100644 --- a/src/mesa/drivers/dri/r600/r700_debug.c +++ b/src/mesa/drivers/dri/r600/r700_debug.c @@ -25,31 +25,8 @@ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com> */ -#include <stdio.h> -#include <stdarg.h> -#include <stdlib.h> -#include <string.h> - -#include "main/glheader.h" - #include "r700_debug.h" -#include "r600_context.h" - -void NormalizeLogErrorCode(int nError) -{ - //TODO -} - -void r700_error(int nLocalError, char* fmt, ...) -{ - va_list args; - - NormalizeLogErrorCode(nLocalError); - - va_start(args, fmt); - fprintf(stderr, fmt, args); - va_end(args); -} +#include "radeon_debug.h" void DumpHwBinary(int type, void *addr, int size) { @@ -61,21 +38,21 @@ void DumpHwBinary(int type, void *addr, int size) switch (type) { case DUMP_PIXEL_SHADER: - DEBUGF("Pixel Shader\n"); + radeon_print(RADEON_SHADER, RADEON_TRACE, "Pixel Shader\n"); break; case DUMP_VERTEX_SHADER: - DEBUGF("Vertex Shader\n"); + radeon_print(RADEON_SHADER, RADEON_TRACE, "Vertex Shader\n"); break; case DUMP_FETCH_SHADER: - DEBUGF("Fetch Shader\n"); + radeon_print(RADEON_SHADER, RADEON_TRACE, "Fetch Shader\n"); break; } for (i = 0; i < size; i++) { - DEBUGP("0x%08x,\t", *pHw); + radeon_print(RADEON_SHADER, RADEON_TRACE, "0x%08x,\t", *pHw); if (i%4 == 3) - DEBUGP("\n", *pHw); + radeon_print(RADEON_SHADER, RADEON_TRACE, "0x%08x\n", *pHw); pHw++; } diff --git a/src/mesa/drivers/dri/r600/r700_debug.h b/src/mesa/drivers/dri/r600/r700_debug.h index e810e6da08..c0921bf610 100644 --- a/src/mesa/drivers/dri/r600/r700_debug.h +++ b/src/mesa/drivers/dri/r600/r700_debug.h @@ -27,67 +27,13 @@ #ifndef _R700_DEBUG_H_ #define _R700_DEBUG_H_ - -enum R700_ERROR -{ - ERROR_ASM_VTX_CLAUSE = 0x1000, - ERROR_ASM_UNKOWNCLAUSE = 0x1001, - ERROR_ASM_ALLOCEXPORTCF = 0x1002, - ERROR_ASM_ALLOCVTXCF = 0x1003, - ERROR_ASM_ALLOCTEXCF = 0x1004, - ERROR_ASM_ALLOCALUCF = 0x1005, - ERROR_ASM_UNKNOWNILINST = 0x1006, - ERROR_ASM_SRCARGUMENT = 0x1007, - ERROR_ASM_DSTARGUMENT = 0x1008, - ERROR_ASM_TEXINSTRUCTION = 0x1009, - ERROR_ASM_ALUINSTRUCTION = 0x100A, - ERROR_ASM_INSTDSTTRACK = 0x100B, - ERROR_ASM_TEXDSTBADTYPE = 0x100C, - ERROR_ASM_ALUSRCBADTYPE = 0x100D, - ERROR_ASM_ALUSRCSELECT = 0x100E, - ERROR_ASM_ALUSRCNUMBER = 0x100F, - ERROR_ASM_ALUDSTBADTYPE = 0x1010, - ERROR_ASM_CONSTCHANNEL = 0x1011, - ERROR_ASM_BADSCALARBZ = 0x1012, - ERROR_ASM_BADGPRRESERVE = 0x1013, - ERROR_ASM_BADVECTORBZ = 0x1014, - ERROR_ASM_BADTEXINST = 0x1015, - ERROR_ASM_BADTEXSRC = 0x1016, - ERROR_ASM_BADEXPORTTYPE = 0x1017, - - - TODO_ASM_CONSTTEXADDR = 0x8000, - TODO_ASM_NEEDIMPINST = 0x8001, - TODO_ASM_TXB = 0x8002, - TODO_ASM_TXP = 0x8003 -}; - enum R700_DUMP_TYPE { - DUMP_VERTEX_SHADER = 0x1, - DUMP_PIXEL_SHADER = 0x2, - DUMP_FETCH_SHADER = 0x4, + DUMP_VERTEX_SHADER = 0x1, + DUMP_PIXEL_SHADER = 0x2, + DUMP_FETCH_SHADER = 0x4, }; -#define DEBUGF printf -#define DEBUGP printf - -void NormalizeLogErrorCode(int nError); -/*NormalizeLogErrorCode(nLocalError); */ -void r700_error(int nLocalError, char *fmt, ...); extern void DumpHwBinary(int, void *, int); -#ifdef STANDALONE_COMPILER -#ifdef __cplusplus -extern "C" -{ -#endif //__cplusplus - -void LogString(char* szStr); - -#ifdef __cplusplus -} -#endif //__cplusplus -#endif /*STANDALONE_COMPILER*/ - #endif /*_R700_DEBUG_H_*/ diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 44de2aebee..78ce3ae436 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -55,6 +55,12 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, //Input mapping : mesa_fp->Base.InputsRead set the flag, set in //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ... //MUST match order in Map_Vertex_Output + unBit = 1 << FRAG_ATTRIB_WPOS; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++; + } + unBit = 1 << FRAG_ATTRIB_COL0; if(mesa_fp->Base.InputsRead & unBit) { @@ -67,6 +73,12 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++; } + unBit = 1 << FRAG_ATTRIB_FOGC; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; + } + for(i=0; i<8; i++) { unBit = 1 << (FRAG_ATTRIB_TEX0 + i); @@ -106,15 +118,16 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++; pAsm->number_of_exports++; pAsm->number_of_colorandz_exports++; + pAsm->pR700Shader->depthIsExported = 1; } - pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports); + pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports); for(ui=0; ui<pAsm->number_of_exports; ui++) { pAsm->pucOutMask[ui] = 0x0; } - - pAsm->uFirstHelpReg = pAsm->number_used_registers; + + pAsm->uFirstHelpReg = pAsm->number_used_registers; } GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, @@ -245,6 +258,20 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, return GL_TRUE; } +void r700SelectFragmentShader(GLcontext *ctx) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_fragment_program *fp = (struct r700_fragment_program *) + (ctx->FragmentProgram._Current); + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + { + fp->r700AsmCode.bR6xx = 1; + } + + if (GL_FALSE == fp->translated) + r700TranslateFragmentShader(fp, &(fp->mesa_program)); +} + void * r700GetActiveFpShaderBo(GLcontext * ctx) { struct r700_fragment_program *fp = (struct r700_fragment_program *) @@ -255,33 +282,32 @@ void * r700GetActiveFpShaderBo(GLcontext * ctx) GLboolean r700SetupFragmentProgram(GLcontext * ctx) { - context_t *context = R700_CONTEXT(ctx); - BATCH_LOCALS(&context->radeon); - + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - struct r700_fragment_program *fp = (struct r700_fragment_program *) (ctx->FragmentProgram._Current); - + r700_AssemblerBase *pAsm = &(fp->r700AsmCode); + struct gl_fragment_program *mesa_fp = &(fp->mesa_program); struct gl_program_parameter_list *paramList; unsigned int unNumParamData; - unsigned int ui; - + unsigned int ui, i; unsigned int unNumOfReg; - + unsigned int unBit; + GLuint exportCount; + if(GL_FALSE == fp->loaded) { - if(fp->r700Shader.bNeedsAssembly == GL_TRUE) + if(fp->r700Shader.bNeedsAssembly == GL_TRUE) { Assemble( &(fp->r700Shader) ); } /* Load fp to gpu */ - r600EmitShader(ctx, - &(fp->shaderbo), + r600EmitShader(ctx, + &(fp->shaderbo), (GLvoid *)(fp->r700Shader.pProgram), fp->r700Shader.uShaderBinaryDWORDSize, - "FS"); + "FS"); fp->loaded = GL_TRUE; } @@ -293,16 +319,33 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) (context->chipobj.MemUse)(context, fp->shadercode.buf->id); */ + R600_STATECHANGE(context, ps); + + r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0; + SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); + r700->ps.SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */ + R600_STATECHANGE(context, spi); + unNumOfReg = fp->r700Shader.nRegs + 1; ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift); - ui = ui ? ui : unNumOfReg; + /* PS uses fragment.position */ + if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) + { + ui += 1; + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask); + SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); + SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); + } + + ui = (unNumOfReg < ui) ? ui : unNumOfReg; + + SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); - SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); - CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit); if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ @@ -314,6 +357,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode, EXPORT_MODE_shift, EXPORT_MODE_mask); + R600_STATECHANGE(context, db); + if(fp->r700Shader.killIsUsed) { SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); @@ -325,45 +370,107 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) if(fp->r700Shader.depthIsExported) { - SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); + SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); } else { CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); } - /* sent out shader constants. */ + // emit ps input map + unBit = 1 << FRAG_ATTRIB_WPOS; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } - paramList = fp->mesa_program.Base.Parameters; + unBit = 1 << FRAG_ATTRIB_COL0; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + + unBit = 1 << FRAG_ATTRIB_COL1; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + + unBit = 1 << FRAG_ATTRIB_FOGC; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } - if(NULL != paramList) + for(i=0; i<8; i++) { - _mesa_load_state_parameters(ctx, paramList); + unBit = 1 << (FRAG_ATTRIB_TEX0 + i); + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + } - unNumParamData = paramList->NumParameters * 4; + R600_STATECHANGE(context, cb); + exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); + r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1; - BEGIN_BATCH_NO_AUTOSTATE(2 + unNumParamData); - - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData)); + /* sent out shader constants. */ + paramList = fp->mesa_program.Base.Parameters; - /* assembler map const from very beginning. */ - R600_OUT_BATCH(SQ_ALU_CONSTANT_PS_OFFSET * 4); + if(NULL != paramList) { + _mesa_load_state_parameters(ctx, paramList); - unNumParamData = paramList->NumParameters; + if (paramList->NumParameters > R700_MAX_DX9_CONSTS) + return GL_FALSE; - for(ui=0; ui<unNumParamData; ui++) - { - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][0]))); - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][1]))); - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][2]))); - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][3]))); - } - END_BATCH(); - COMMIT_BATCH(); - } + R600_STATECHANGE(context, ps_consts); - return GL_TRUE; -} + r700->ps.num_consts = paramList->NumParameters; + unNumParamData = paramList->NumParameters; + for(ui=0; ui<unNumParamData; ui++) { + r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + } + } else + r700->ps.num_consts = 0; + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h index 9c7813e908..cbb108d212 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.h +++ b/src/mesa/drivers/dri/r600/r700_fragprog.h @@ -49,13 +49,16 @@ struct r700_fragment_program /* Internal */ void Map_Fragment_Program(r700_AssemblerBase *pAsm, - struct gl_fragment_program *mesa_fp); + struct gl_fragment_program *mesa_fp); GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, - struct gl_fragment_program *mesa_fp); + struct gl_fragment_program *mesa_fp); + +GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, + struct gl_fragment_program *mesa_vp); /* Interface */ -extern GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, - struct gl_fragment_program *mesa_vp); +extern void r700SelectFragmentShader(GLcontext *ctx); + extern GLboolean r700SetupFragmentProgram(GLcontext * ctx); extern void * r700GetActiveFpShaderBo(GLcontext * ctx); diff --git a/src/mesa/drivers/dri/r600/r700_ioctl.c b/src/mesa/drivers/dri/r600/r700_ioctl.c index c479532001..72a8978976 100644 --- a/src/mesa/drivers/dri/r600/r700_ioctl.c +++ b/src/mesa/drivers/dri/r600/r700_ioctl.c @@ -31,6 +31,7 @@ #include "main/imports.h" #include "main/macros.h" #include "main/context.h" +#include "main/simple_list.h" #include "swrast/swrast.h" #include "radeon_common.h" @@ -40,34 +41,10 @@ #include "r700_ioctl.h" #include "r700_clear.h" -static void r700Flush(GLcontext *ctx) -{ - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - context_t * context = R700_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw); - - /* okay if we have no cmds in the buffer && - we have no DMA flush && - we have no DMA buffer allocated. - then no point flushing anything at all. - */ - if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && !radeon->dma.current) - return; - - if (radeon->dma.flush) - radeon->dma.flush( ctx ); - - r700SendContextStates(context); - - if (radeon->cmdbuf.cs->cdw) - rcommonFlushCmdBuf(radeon, __FUNCTION__); -} void r700InitIoctlFuncs(struct dd_function_table *functions) { functions->Clear = r700Clear; functions->Finish = radeonFinish; - functions->Flush = r700Flush; + functions->Flush = radeonFlush; } diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c index 36de143b1a..5290ef31be 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.c +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -33,6 +33,7 @@ #include "tnl/tnl.h" #include "r600_context.h" +#include "r600_emit.h" #include "r700_oglprog.h" #include "r700_fragprog.h" @@ -45,23 +46,21 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, { struct gl_program *pProgram = NULL; - struct r700_vertex_program *vp; + struct r700_vertex_program_cont *vpc; struct r700_fragment_program *fp; + radeon_print(RADEON_SHADER, RADEON_VERBOSE, + "%s %u, %u\n", __func__, target, id); + switch (target) { case GL_VERTEX_STATE_PROGRAM_NV: case GL_VERTEX_PROGRAM_ARB: - vp = CALLOC_STRUCT(r700_vertex_program); + vpc = CALLOC_STRUCT(r700_vertex_program_cont); pProgram = _mesa_init_vertex_program(ctx, - &vp->mesa_program, + &vpc->mesa_program, target, id); - vp->translated = GL_FALSE; - vp->loaded = GL_FALSE; - - vp->shaderbo = NULL; - break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: @@ -85,22 +84,31 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { - struct r700_vertex_program * vp; + struct r700_vertex_program_cont * vpc; + struct r700_vertex_program *vp, *tmp; struct r700_fragment_program * fp; - context_t *context = R700_CONTEXT(ctx); + + radeon_print(RADEON_SHADER, RADEON_VERBOSE, + "%s %p\n", __func__, prog); switch (prog->Target) { case GL_VERTEX_STATE_PROGRAM_NV: case GL_VERTEX_PROGRAM_ARB: - vp = (struct r700_vertex_program*)prog; - /* Release DMA region */ - - r600DeleteShader(ctx, vp->shaderbo); - - /* Clean up */ - Clean_Up_Assembler(&(vp->r700AsmCode)); - Clean_Up_Shader(&(vp->r700Shader)); + vpc = (struct r700_vertex_program_cont*)prog; + vp = vpc->progs; + while (vp) { + tmp = vp->next; + /* Release DMA region */ + + r600DeleteShader(ctx, vp->shaderbo); + + /* Clean up */ + Clean_Up_Assembler(&(vp->r700AsmCode)); + Clean_Up_Shader(&(vp->r700Shader)); + _mesa_free(vp); + vp = tmp; + } break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 77cbe3cfd0..b58859b6ba 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -43,8 +43,8 @@ #include "tnl/t_context.h" #include "tnl/t_vertex.h" #include "tnl/t_pipeline.h" +#include "vbo/vbo_context.h" -#include "radeon_mipmap_tree.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -54,11 +54,13 @@ #include "r700_fragprog.h" #include "r700_state.h" +#include "radeon_buffer_objects.h" +#include "radeon_common_context.h" + void r700WaitForIdle(context_t *context); void r700WaitForIdleClean(context_t *context); -void r700Start3D(context_t *context); GLboolean r700SendTextureState(context_t *context); -unsigned int r700PrimitiveType(int prim); +static unsigned int r700PrimitiveType(int prim); void r600UpdateTextureState(GLcontext * ctx); GLboolean r700SyncSurf(context_t *context, struct radeon_bo *pbo, @@ -69,6 +71,7 @@ GLboolean r700SyncSurf(context_t *context, void r700WaitForIdle(context_t *context) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(3); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); @@ -82,6 +85,7 @@ void r700WaitForIdle(context_t *context) void r700WaitForIdleClean(context_t *context) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(5); R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); @@ -98,6 +102,7 @@ void r700WaitForIdleClean(context_t *context) void r700Start3D(context_t *context) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { BEGIN_BATCH_NO_AUTOSTATE(2); @@ -117,83 +122,6 @@ void r700Start3D(context_t *context) r700WaitForIdleClean(context); } -static GLboolean r700SetupShaders(GLcontext * ctx) -{ - context_t *context = R700_CONTEXT(ctx); - - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - - GLuint exportCount; - - r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0; - r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0; - - SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); - SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); - - r700SetupVertexProgram(ctx); - - r700SetupFragmentProgram(ctx); - - exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); - r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1; - - return GL_TRUE; -} - -GLboolean r700SendTextureState(context_t *context) -{ - unsigned int i; - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - offset_modifiers offset_mod = {NO_SHIFT, 0, 0xFFFFFFFF}; - struct radeon_bo *bo = NULL; - BATCH_LOCALS(&context->radeon); - - for (i=0; i<R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) { - if (!t->image_override) - bo = t->mt->bo; - else - bo = t->bo; - if (bo) { - - r700SyncSurf(context, bo, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, - 0, TC_ACTION_ENA_bit); - - BEGIN_BATCH_NO_AUTOSTATE(9); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH(i * 7); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, - bo, - 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0, &offset_mod); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, - bo, - 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0, &offset_mod); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); - END_BATCH(); - - BEGIN_BATCH_NO_AUTOSTATE(5); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); - R600_OUT_BATCH(i * 3); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); - END_BATCH(); - COMMIT_BATCH(); - } - } - } - return GL_TRUE; -} - GLboolean r700SyncSurf(context_t *context, struct radeon_bo *pbo, uint32_t read_domain, @@ -201,35 +129,34 @@ GLboolean r700SyncSurf(context_t *context, uint32_t sync_type) { BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__); uint32_t cp_coher_size; - offset_modifiers offset_mod; + + if (!pbo) + return GL_FALSE; if (pbo->size == 0xffffffff) cp_coher_size = 0xffffffff; else cp_coher_size = ((pbo->size + 255) >> 8); - offset_mod.shift = NO_SHIFT; - offset_mod.shiftbits = 0; - offset_mod.mask = 0xFFFFFFFF; - - BEGIN_BATCH_NO_AUTOSTATE(5); + BEGIN_BATCH_NO_AUTOSTATE(5 + 2); R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3)); R600_OUT_BATCH(sync_type); R600_OUT_BATCH(cp_coher_size); + R600_OUT_BATCH(0); + R600_OUT_BATCH(10); R600_OUT_BATCH_RELOC(0, pbo, 0, - read_domain, write_domain, 0, &offset_mod); // ??? - R600_OUT_BATCH(10); - + read_domain, write_domain, 0); END_BATCH(); COMMIT_BATCH(); return GL_TRUE; } -unsigned int r700PrimitiveType(int prim) +static unsigned int r700PrimitiveType(int prim) { switch (prim & PRIM_MODE_MASK) { @@ -270,120 +197,253 @@ unsigned int r700PrimitiveType(int prim) } } -static GLboolean r700RunRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) +static int r700NumVerts(int num_verts, int prim) { - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - int lastIndex = 0; -#if 1 - BATCH_LOCALS(&context->radeon); - - unsigned int i, j; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; + int verts_off = 0; + + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + verts_off = 0; + break; + case GL_LINES: + verts_off = num_verts % 2; + break; + case GL_LINE_STRIP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_LINE_LOOP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_TRIANGLES: + verts_off = num_verts % 3; + break; + case GL_TRIANGLE_STRIP: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_TRIANGLE_FAN: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_QUADS: + verts_off = num_verts % 4; + break; + case GL_QUAD_STRIP: + if (num_verts < 4) + verts_off = num_verts; + else + verts_off = num_verts % 2; + break; + case GL_POLYGON: + if (num_verts < 3) + verts_off = num_verts; + break; + default: + assert(0); + return -1; + break; + } + + return num_verts - verts_off; +} - struct r700_fragment_program *fp = (struct r700_fragment_program *) - (ctx->FragmentProgram._Current); - if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) - { - fp->r700AsmCode.bR6xx = 1; - } +static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) +{ + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, i, total_emit; + int num_indices; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; + + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + num_indices + 3; /* DRAW_INDEX_IMMD */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); + R600_OUT_BATCH(vgt_primitive_type); - r700Start3D(context); /* TODO : this is too much. */ + // index type + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); - r700SendSQConfig(context); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); - r700UpdateShaders(ctx); + // draw packet + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - r700SetScissor(context); - r700SetRenderTarget(context, 0); - r700SetDepthTarget(context); + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); - if(r700SetupStreams(ctx)) + if(NULL == context->ind_buf.bo) { - return GL_TRUE; + for (i = start; i < (start + num_indices); i++) { + if(vb->Elts) + { + R600_OUT_BATCH(vb->Elts[i]); + } + else + R600_OUT_BATCH(i); + } } - - r600UpdateTextureState(ctx); - r700SendTextureState(context); - - if(GL_FALSE == fp->translated) - { - if( GL_FALSE == r700TranslateFragmentShader(fp, &(fp->mesa_program)) ) + else + { + if(GL_TRUE == context->ind_buf.bHostIb) { - return GL_TRUE; + if(GL_TRUE != context->ind_buf.is_32bit) + { + GLushort * pIndex = (GLushort*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + pIndex += start; + for (i = 0; i < num_indices; i++) + { + R600_OUT_BATCH(*pIndex); + pIndex++; + } + } + else + { + GLuint * pIndex = (GLuint*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + pIndex += start; + + for (i = 0; i < num_indices; i++) + { + R600_OUT_BATCH(*pIndex); + pIndex++; + } + } + } + else + { + /* TODO : hw ib draw */ } } - r700SetupShaders(ctx); - - r700SendFSState(context); // FIXME just a place holder for now - r700SendPSState(context); - r700SendVSState(context); - - r700SendUCPState(context); - r700SendContextStates(context); - r700SendViewportState(context, 0); - r700SendRenderTargetState(context, 0); - r700SendDepthTargetState(context); + END_BATCH(); + COMMIT_BATCH(); +} - /* richard test code */ - for (i = 0; i < vb->PrimitiveCount; i++) - { - GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); - GLuint start = vb->Primitive[i].start; - GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - GLuint numIndices = vb->Primitive[i].count; - GLuint numEntires; +/* start 3d, idle, cb/db flush */ +#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 - unsigned int VGT_DRAW_INITIATOR = 0; - unsigned int VGT_INDEX_TYPE = 0; - unsigned int VGT_PRIMITIVE_TYPE = 0; - unsigned int VGT_NUM_INDICES = 0; - - numEntires = 2 /* VGT_INDEX_TYPE */ - + 3 /* VGT_PRIMITIVE_TYPE */ - + numIndices + 3; /* DRAW_INDEX_IMMD */ - - BEGIN_BATCH_NO_AUTOSTATE(numEntires); +static GLuint r700PredictRenderSize(GLcontext* ctx) +{ + context_t *context = R700_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct r700_vertex_program *vp = context->selected_vp; + struct vertex_buffer *vb = &tnl->vb; + GLboolean flushed; + GLuint dwords, i; + GLuint state_size; + /* pre calculate aos count so state prediction works */ + context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead); + + dwords = PRE_EMIT_STATE_BUFSZ; + for (i = 0; i < vb->PrimitiveCount; i++) + dwords += vb->Primitive[i].count + 10; + state_size = radeonCountStateEmitSize(&context->radeon); + flushed = rcommonEnsureCmdBufSpace(&context->radeon, + dwords + state_size, __FUNCTION__); + + if (flushed) + dwords += radeonCountStateEmitSize(&context->radeon); + else + dwords += state_size; - VGT_INDEX_TYPE |= DI_INDEX_SIZE_32_BIT << INDEX_TYPE_shift; + radeon_print(RADEON_RENDER, RADEON_VERBOSE, + "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + return dwords; +} - R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); - R600_OUT_BATCH(VGT_INDEX_TYPE); +static GLboolean r700RunRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + unsigned int i, id = 0; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + struct radeon_renderbuffer *rrb; - VGT_NUM_INDICES = numIndices; + radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n", + __func__, context->radeon.cmdbuf.cs->cdw); - VGT_PRIMITIVE_TYPE |= r700PrimitiveType(prim) << VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift; - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R600_OUT_BATCH(VGT_PRIMITIVE_TYPE); + /* always emit CB base to prevent + * lock ups on some chips. + */ + R600_STATECHANGE(context, cb_target); + /* mark vtx as dirty since it changes per-draw */ + R600_STATECHANGE(context, vtx); - VGT_DRAW_INITIATOR |= DI_SRC_SEL_IMMEDIATE << SOURCE_SELECT_shift; - VGT_DRAW_INITIATOR |= DI_MAJOR_MODE_0 << MAJOR_MODE_shift; + r700SetScissor(context); + r700SetupVertexProgram(ctx); + r700SetupFragmentProgram(ctx); + r600UpdateTextureState(ctx); - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (numIndices + 1))); - R600_OUT_BATCH(VGT_NUM_INDICES); - R600_OUT_BATCH(VGT_DRAW_INITIATOR); + GLuint emit_end = r700PredictRenderSize(ctx) + + context->radeon.cmdbuf.cs->cdw; + r700SetupStreams(ctx); - for (j = lastIndex; j < lastIndex + numIndices; j++) - { - R600_OUT_BATCH(j); - } - lastIndex += numIndices; + radeonEmitState(radeon); - END_BATCH(); - COMMIT_BATCH(); + radeon_debug_add_indent(); + /* richard test code */ + for (i = 0; i < vb->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); + GLuint start = vb->Primitive[i].start; + GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; + r700RunRenderPrimitive(ctx, start, end, prim); } + radeon_debug_remove_indent(); /* Flush render op cached for last several quads. */ r700WaitForIdleClean(context); - radeonReleaseArrays(ctx, 0); + rrb = radeon_get_colorbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + CB_ACTION_ENA_bit | (1 << (id + 6))); + + rrb = radeon_get_depthbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); -#endif //0 - rcommonFlushCmdBuf( &context->radeon, __FUNCTION__ ); + radeonReleaseArrays(ctx, ~0); + + radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n", + __func__, context->radeon.cmdbuf.cs->cdw); + + if ( emit_end < context->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); return GL_FALSE; } @@ -403,7 +463,10 @@ static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ /* TODO : sw fallback */ + /* Need shader bo's setup before bo check */ + r700UpdateShaders(ctx); /** + * Ensure all enabled and complete textures are uploaded along with any buffers being used. */ if(!r600ValidateBuffers(ctx)) @@ -411,10 +474,6 @@ static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ return GL_TRUE; } - context_t *context = R700_CONTEXT(ctx); - - r700UpdateShaders(ctx); - bRet = r700RunRender(ctx, stage); return bRet; @@ -456,4 +515,585 @@ const struct tnl_pipeline_stage *r700_pipeline[] = 0, }; +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j, sz; \ + sz = input->Size; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = MACRO(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = (GLfloat)(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } \ +} while (0) + +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r700ConvertAttrib(GLcontext *ctx, int count, + const struct gl_client_array *input, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + { + count = 1; + } + + if (input->BufferObj->Name) + { + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + } + else + { + src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, + sizeof(GLfloat) * input->Size * count, 32); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + assert(src_ptr != NULL); + + switch (input->Type) + { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void r700AlignDataToDword(GLcontext *ctx, + const struct gl_client_array *input, + int count, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) + { + _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + GLuint stride; + int ret; + int i, index; + + R600_STATECHANGE(context, vtx); + + for(index = 0; index < context->nNumActiveAos; index++) + { + struct radeon_aos *aos = &context->radeon.tcl.aos[index]; + i = context->stream_desc[index].element; + + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) + { + r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); + } + else + { + if (input[i]->BufferObj->Name) + { + if (stride % 4 != 0) + { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); + context->stream_desc[index].is_named_bo = GL_FALSE; + } + else + { + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; + } + } + else + { + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) + { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } + else + { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, + &context->stream_desc[index].bo_offset, size, 32); + assert(context->stream_desc[index].bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, + context->stream_desc[index].bo_offset); + + switch (context->stream_desc[index].dwords) + { + case 1: + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 4; + break; + case 2: + radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 8; + break; + case 3: + radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 12; + break; + case 4: + radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 16; + break; + default: + assert(0); + break; + } + } + } + + aos->count = context->stream_desc[index].stride == 0 ? 1 : count; + aos->stride = context->stream_desc[index].stride / sizeof(float); + aos->components = context->stream_desc[index].dwords; + aos->bo = context->stream_desc[index].bo; + aos->offset = context->stream_desc[index].bo_offset; + + if(context->stream_desc[index].is_named_bo) + { + radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs, + context->stream_desc[index].bo, + RADEON_GEM_DOMAIN_GTT, 0); + } + } + + context->radeon.tcl.aos_count = context->nNumActiveAos; + ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, + first_elem(&context->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); +} + +static void r700FreeData(GLcontext *ctx) +{ + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + context_t *context = R700_CONTEXT(ctx); + + int i; + + for (i = 0; i < context->nNumActiveAos; i++) + { + if (!context->stream_desc[i].is_named_bo) + { + radeon_bo_unref(context->stream_desc[i].bo); + } + context->radeon.tcl.aos[i].bo = NULL; + } + + if (context->ind_buf.bo != NULL) + { + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeon_bo_unref(context->ind_buf.bo); + } + else + { + FREE(context->ind_buf.bo->ptr); + FREE(context->ind_buf.bo); + context->ind_buf.bo = NULL; + } + } +} + +static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; + assert(mesa_ind_buf->obj->Pointer != NULL); + } + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) + { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + GLubyte *in = (GLubyte *)src_ptr; + + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + } + else + { + context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); + context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); + context->ind_buf.bo_offset = 0; + out = (GLuint *)context->ind_buf.bo->ptr; + } + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } + +#if MESA_BIG_ENDIAN + } + else + { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ + GLushort *in = (GLushort *)src_ptr; + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeonAllocDmaRegion(&context->radeon, &r300->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + } + else + { + context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); + context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); + context->ind_buf.bo_offset = 0; + out = (GLuint *)context->ind_buf.bo->ptr; + } + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } +#endif + } + + context->ind_buf.is_32bit = GL_FALSE; + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } +} + +static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + + if (!mesa_ind_buf) { + context->ind_buf.bo = NULL; + return; + } + + context->ind_buf.bHostIb = GL_TRUE; + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) + { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) + { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + assert(context->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + } + else + { + context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); + context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); + context->ind_buf.bo_offset = 0; + dst_ptr = context->ind_buf.bo->ptr; + } + + _mesa_memcpy(dst_ptr, src_ptr, size); + + context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } + else + { + r700FixupIndexBuffer(ctx, mesa_ind_buf); + } +} + +static GLboolean r700TryDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) +{ + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + GLuint i, id = 0; + GLboolean bValidedbuffer; + struct radeon_renderbuffer *rrb; + + if (ctx->NewState) + { + _mesa_update_state( ctx ); + } + + bValidedbuffer = r600ValidateBuffers(ctx); + + /* always emit CB base to prevent + * lock ups on some chips. + */ + R600_STATECHANGE(context, cb_target); + /* mark vtx as dirty since it changes per-draw */ + R600_STATECHANGE(context, vtx); + + _tnl_UpdateFixedFunctionProgram(ctx); + r700SetVertexFormat(ctx, arrays, max_index + 1); + r700SetupStreams2(ctx, arrays, max_index + 1); + r700UpdateShaders2(ctx); + + r700SetScissor(context); + + r700SetupVertexProgram(ctx); + + r700SetupFragmentProgram(ctx); + + r600UpdateTextureState(ctx); + + GLuint emit_end = r700PredictRenderSize(ctx) + + context->radeon.cmdbuf.cs->cdw; + + r700SetupIndexBuffer(ctx, ib); + + radeonEmitState(radeon); + + for (i = 0; i < nr_prims; ++i) + { + r700RunRenderPrimitive(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); + } + + /* Flush render op cached for last several quads. */ + r700WaitForIdleClean(context); + + rrb = radeon_get_colorbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + CB_ACTION_ENA_bit | (1 << (id + 6))); + + rrb = radeon_get_depthbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); + + r700FreeData(ctx); + + if (emit_end < context->radeon.cmdbuf.cs->cdw) + { + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); + } + + return GL_TRUE; +} + +static void r700DrawPrimsRe(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + GLboolean retval = GL_FALSE; + + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } + + if (min_index) { + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrimsRe ); + return; + } + + /* Make an attempt at drawing */ + retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + + /* If failed run tnl pipeline - it should take care of fallbacks */ + if (!retval) + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +} + +static void r700DrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + context_t *context = R700_CONTEXT(ctx); + + /* For non indexed drawing, using tnl pipe. */ + if(!ib) + { + context->ind_buf.bo = NULL; + + _tnl_vbo_draw_prims(ctx, arrays, prim, nr_prims, ib, + index_bounds_valid, min_index, max_index); + return; + } + + r700DrawPrimsRe(ctx, arrays, prim, nr_prims, ib, index_bounds_valid, min_index, max_index); +} + +void r700InitDraw(GLcontext *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + + vbo->draw_prims = r700DrawPrims; +} + diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c index b4fd51c137..955ea4e4e1 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.c +++ b/src/mesa/drivers/dri/r600/r700_shader.c @@ -60,6 +60,55 @@ void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * plstCFInstructions->uNumOfNode++; } +void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst) +{ + GLuint ulIndex = 0; + GLboolean bFound = GL_FALSE; + R700ShaderInstruction * pPrevInst = NULL; + R700ShaderInstruction * pCurInst = plstCFInstructions->pHead; + + /* Need go thro list to make sure pInst is there. */ + while(NULL != pCurInst) + { + if(pCurInst == pInst) + { + bFound = GL_TRUE; + break; + } + + pPrevInst = pCurInst; + pCurInst = pCurInst->pNextInst; + } + if(GL_TRUE == bFound) + { + plstCFInstructions->uNumOfNode--; + + pCurInst = pInst->pNextInst; + ulIndex = pInst->m_uIndex; + while(NULL != pCurInst) + { + pCurInst->m_uIndex = ulIndex; + ulIndex++; + pCurInst = pCurInst->pNextInst; + } + + if(plstCFInstructions->pHead == pInst) + { + plstCFInstructions->pHead = pInst->pNextInst; + } + if(plstCFInstructions->pTail == pInst) + { + plstCFInstructions->pTail = pPrevInst; + } + if(NULL != pPrevInst) + { + pPrevInst->pNextInst = pInst->pNextInst; + } + + FREE(pInst); + } +} + void Init_R700_Shader(R700_Shader * pShader) { pShader->Type = R700_SHADER_INVALID; @@ -488,6 +537,47 @@ void DebugPrint(void) { } +void cleanup_vfetch_shaderinst(R700_Shader *pShader) +{ + R700ShaderInstruction *pInst; + R700ShaderInstruction *pInstToFree; + R700VertexInstruction *pVTXInst; + R700ControlFlowInstruction *pCFInst; + + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pVTXInst = (R700VertexInstruction *)pInst; + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pVTXInst->m_ShaderInstType); + + if(NULL != pVTXInst->m_pLinkedGenericClause) + { + pCFInst = (R700ControlFlowInstruction*)(pVTXInst->m_pLinkedGenericClause); + + TakeInstOutFromList(&(pShader->lstCFInstructions), + (R700ShaderInstruction*)pCFInst); + + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pCFInst->m_ShaderInstType); + } + + pInst = pInst->pNextInst; + }; + + //destroy each item in pShader->lstVTXInstructions; + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pInstToFree = pInst; + pInst = pInst->pNextInst; + FREE(pInstToFree); + }; + + //set NULL pShader->lstVTXInstructions + pShader->lstVTXInstructions.pHead=NULL; + pShader->lstVTXInstructions.pTail=NULL; + pShader->lstVTXInstructions.uNumOfNode=0; +} + void Clean_Up_Shader(R700_Shader *pShader) { FREE(pShader->pProgram); diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h index bfd01e1a93..997cb05aaf 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.h +++ b/src/mesa/drivers/dri/r600/r700_shader.h @@ -143,6 +143,7 @@ void LoadProgram(R700_Shader *pShader); void UpdateShaderRegisters(R700_Shader *pShader); void DeleteInstructions(R700_Shader *pShader); void DebugPrint(void); +void cleanup_vfetch_shaderinst(R700_Shader *pShader); void Clean_Up_Shader(R700_Shader *pShader); diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.c b/src/mesa/drivers/dri/r600/r700_shaderinst.c index c1bffee91f..f120d9f941 100644 --- a/src/mesa/drivers/dri/r600/r700_shaderinst.c +++ b/src/mesa/drivers/dri/r600/r700_shaderinst.c @@ -27,7 +27,7 @@ #include "main/mtypes.h" -#include "r700_debug.h" +#include "radeon_debug.h" #include "r700_shaderinst.h" void Init_R700ControlFlowGenericClause(R700ControlFlowGenericClause* pInst) @@ -208,16 +208,16 @@ unsigned int GetCFMaxInstructions(ShaderInstType instType) GLboolean LinkVertexInstruction(R700ControlFlowGenericClause *pCFGeneric, R700VertexInstruction *pVTXInstruction) { - if (pCFGeneric->m_pLinkedTEXInstruction != 0) - { - r700_error(ERROR_ASM_VTX_CLAUSE, "This instruction is already linked to a texture instruction"); - return GL_FALSE; + if (pCFGeneric->m_pLinkedTEXInstruction != 0) + { + radeon_error("This instruction is already linked to a texture instruction.\n"); + return GL_FALSE; } pCFGeneric->m_pLinkedVTXInstruction = pVTXInstruction; - pVTXInstruction->m_pLinkedGenericClause = pCFGeneric; + pVTXInstruction->m_pLinkedGenericClause = pCFGeneric; - return GL_TRUE; + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index a2ddebb1ce..1043eabb14 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -61,57 +61,58 @@ static void r700UpdatePolygonMode(GLcontext * ctx); static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state); static void r700SetStencilState(GLcontext * ctx, GLboolean state); -void r700SetDefaultStates(context_t *context) //-------------------- -{ - -} - void r700UpdateShaders (GLcontext * ctx) //---------------------------------- { context_t *context = R700_CONTEXT(ctx); - GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; + int i; - struct r700_vertex_program *vp; - int i; + /* should only happenen once, just after context is created */ + /* TODO: shouldn't we fallback to sw here? */ + if (!ctx->FragmentProgram._Current) { + _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; + } - if (context->radeon.NewGLState) - { - context->radeon.NewGLState = 0; + r700SelectFragmentShader(ctx); - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) - { - /* mat states from state var not array for sw */ - dummy_attrib[i].stride = 0; + if (context->radeon.NewGLState) { + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + /* mat states from state var not array for sw */ + dummy_attrib[i].stride = 0; + temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &(dummy_attrib[i]); + } - temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &(dummy_attrib[i]); - } + _tnl_UpdateFixedFunctionProgram(ctx); - _tnl_UpdateFixedFunctionProgram(ctx); + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = temp_attrib[i]; + } + } - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) - { - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = temp_attrib[i]; - } + r700SelectVertexShader(ctx, 1); + r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + context->radeon.NewGLState = 0; +} - r700SelectVertexShader(ctx); - vp = (struct r700_vertex_program *)ctx->VertexProgram._Current; +void r700UpdateShaders2(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); - if (vp->translated == GL_FALSE) - { - // TODO - //fprintf(stderr, "Failing back to sw-tcl\n"); - //hw_tcl_on = future_hw_tcl_on = 0; - //r300ResetHwState(rmesa); - // - r700UpdateStateParameters(ctx, _NEW_PROGRAM); - return; - } + /* should only happenen once, just after context is created */ + /* TODO: shouldn't we fallback to sw here? */ + if (!ctx->FragmentProgram._Current) { + _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; } - r700UpdateStateParameters(ctx, _NEW_PROGRAM); + r700SelectFragmentShader(ctx); + + r700SelectVertexShader(ctx, 2); + r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + context->radeon.NewGLState = 0; } /* @@ -130,76 +131,38 @@ void r700UpdateViewportOffset(GLcontext * ctx) //------------------ GLfloat tx = v[MAT_TX] + xoffset; GLfloat ty = (-v[MAT_TY]) + yoffset; - r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx; - r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty; - - radeonUpdateScissor(ctx); -} - -/** - * Tell the card where to render (offset, pitch). - * Effected by glDrawBuffer, etc - */ -void r700UpdateDrawBuffer(GLcontext * ctx) /* TODO */ //--------------------- -{ -#if 0 /* to be enabled */ - context_t *context = R700_CONTEXT(ctx); - - switch (ctx->DrawBuffer->_ColorDrawBufferIndexes[0]) - { - case BUFFER_FRONT_LEFT: - context->target.rt = context->screen->frontBuffer; - break; - case BUFFER_BACK_LEFT: - context->target.rt = context->screen->backBuffer; - break; - default: - memset (&context->target.rt, sizeof(context->target.rt), 0); + if (r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All != tx || + r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All != ty) { + /* Note: this should also modify whatever data the context reset + * code uses... + */ + R600_STATECHANGE(context, vpt); + r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx; + r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty; } -#endif /* to be enabled */ -} - -static void r700FetchStateParameter(GLcontext * ctx, - const gl_state_index state[STATE_LENGTH], - GLfloat * value) -{ - context_t *context = R700_CONTEXT(ctx); - /* TODO */ + radeonUpdateScissor(ctx); } void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state) //-------------------- { - struct r700_fragment_program *fp; + struct r700_fragment_program *fp = + (struct r700_fragment_program *)ctx->FragmentProgram._Current; struct gl_program_parameter_list *paramList; - GLuint i; - if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM))) + if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) return; - fp = (struct r700_fragment_program *)ctx->FragmentProgram._Current; - if (!fp) - { + if (!ctx->FragmentProgram._Current || !fp) return; - } - paramList = fp->mesa_program.Base.Parameters; + paramList = ctx->FragmentProgram._Current->Base.Parameters; if (!paramList) - { return; - } - for (i = 0; i < paramList->NumParameters; i++) - { - if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) - { - r700FetchStateParameter(ctx, - paramList->Parameters[i]. - StateIndexes, - paramList->ParameterValues[i]); - } - } + _mesa_load_state_parameters(ctx, paramList); + } /** @@ -212,21 +175,32 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //----------- R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); _swrast_InvalidateState(ctx, new_state); - _swsetup_InvalidateState(ctx, new_state); - _vbo_InvalidateState(ctx, new_state); - _tnl_InvalidateState(ctx, new_state); - _ae_invalidate_state(ctx, new_state); + _swsetup_InvalidateState(ctx, new_state); + _vbo_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + _ae_invalidate_state(ctx, new_state); + + if (new_state & _NEW_BUFFERS) { + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + + R600_STATECHANGE(context, cb_target); + R600_STATECHANGE(context, db_target); + } - if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) - { - _mesa_update_framebuffer(ctx); - /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); + if (new_state & (_NEW_LIGHT)) { + R600_STATECHANGE(context, su); + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) + SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit); + else + CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit); + } - r700UpdateDrawBuffer(ctx); - } + r700UpdateStateParameters(ctx, new_state); - r700UpdateStateParameters(ctx, new_state); + R600_STATECHANGE(context, cl); + R600_STATECHANGE(context, spi); if(GL_TRUE == r700->bEnablePerspective) { @@ -251,14 +225,15 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //----------- SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); } - context->radeon.NewGLState |= new_state; + context->radeon.NewGLState |= new_state; } static void r700SetDepthState(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, db); if (ctx->Depth.Test) { @@ -326,6 +301,8 @@ static void r700SetAlphaState(GLcontext * ctx) uint32_t alpha_func = REF_ALWAYS; GLboolean really_enabled = ctx->Color.AlphaEnabled; + R600_STATECHANGE(context, sx); + switch (ctx->Color.AlphaFunc) { case GL_NEVER: alpha_func = REF_NEVER; @@ -378,6 +355,8 @@ static void r700BlendColor(GLcontext * ctx, const GLfloat cf[4]) //------------- context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, blnd_clr); + r700->CB_BLEND_RED.f32All = cf[0]; r700->CB_BLEND_GREEN.f32All = cf[1]; r700->CB_BLEND_BLUE.f32All = cf[2]; @@ -446,6 +425,8 @@ static void r700SetBlendState(GLcontext * ctx) int id = 0; uint32_t blend_reg = 0, eqn, eqnA; + R600_STATECHANGE(context, blnd); + if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) { SETfield(blend_reg, BLEND_ONE, COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask); @@ -584,14 +565,46 @@ static void r700BlendFuncSeparate(GLcontext * ctx, /** * Translate LogicOp enums into hardware representation. - * Both use a very logical bit-wise layout, but unfortunately the order - * of bits is reversed. */ static GLuint translate_logicop(GLenum logicop) { - GLuint bits = logicop - GL_CLEAR; - bits = ((bits & 1) << 3) | ((bits & 2) << 1) | ((bits & 4) >> 1) | ((bits & 8) >> 3); - return bits; + switch (logicop) { + case GL_CLEAR: + return 0x00; + case GL_SET: + return 0xff; + case GL_COPY: + return 0xcc; + case GL_COPY_INVERTED: + return 0x33; + case GL_NOOP: + return 0xaa; + case GL_INVERT: + return 0x55; + case GL_AND: + return 0x88; + case GL_NAND: + return 0x77; + case GL_OR: + return 0xee; + case GL_NOR: + return 0x11; + case GL_XOR: + return 0x66; + case GL_EQUIV: + return 0xaa; + case GL_AND_REVERSE: + return 0x44; + case GL_AND_INVERTED: + return 0x22; + case GL_OR_REVERSE: + return 0xdd; + case GL_OR_INVERTED: + return 0xbb; + default: + fprintf(stderr, "unknown blend logic operation %x\n", logicop); + return 0xcc; + } } /** @@ -600,8 +613,11 @@ static GLuint translate_logicop(GLenum logicop) */ static void r700SetLogicOpState(GLcontext *ctx) { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); + R600_STATECHANGE(context, blnd); + if (RGBA_LOGICOP_ENABLED(ctx)) SETfield(r700->CB_COLOR_CONTROL.u32All, translate_logicop(ctx->Color.LogicOp), ROP3_shift, ROP3_mask); @@ -621,7 +637,10 @@ static void r700LogicOpcode(GLcontext *ctx, GLenum logicop) static void r700UpdateCulling(GLcontext * ctx) { - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); + + R600_STATECHANGE(context, su); CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); @@ -666,7 +685,11 @@ static void r700UpdateCulling(GLcontext * ctx) static void r700UpdateLineStipple(GLcontext * ctx) { - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); + + R600_STATECHANGE(context, sc); + if (ctx->Line.StippleFlag) { SETbit(r700->PA_SC_MODE_CNTL.u32All, LINE_STIPPLE_ENABLE_bit); @@ -741,14 +764,17 @@ static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //--------- static void r700ColorMask(GLcontext * ctx, GLboolean r, GLboolean g, GLboolean b, GLboolean a) //------------------ { + context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); unsigned int mask = ((r ? 1 : 0) | (g ? 2 : 0) | (b ? 4 : 0) | (a ? 8 : 0)); - if (mask != r700->CB_SHADER_MASK.u32All) + if (mask != r700->CB_SHADER_MASK.u32All) { + R600_STATECHANGE(context, cb); SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask); + } } /** @@ -804,6 +830,8 @@ static void r700ShadeModel(GLcontext * ctx, GLenum mode) //-------------------- context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, spi); + /* also need to set/clear FLAT_SHADE bit per param in SPI_PS_INPUT_CNTL_[0-31] */ switch (mode) { case GL_FLAT: @@ -825,6 +853,8 @@ static void r700PointSize(GLcontext * ctx, GLfloat size) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, su); + /* We need to clamp to user defined range here, because * the HW clamping happens only for per vertex point size. */ size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize); @@ -833,9 +863,9 @@ static void r700PointSize(GLcontext * ctx, GLfloat size) size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); /* format is 12.4 fixed point */ - SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 16), + SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0), PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask); - SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 16), + SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0), PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask); } @@ -845,14 +875,16 @@ static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * pa context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, su); + /* format is 12.4 fixed point */ switch (pname) { case GL_POINT_SIZE_MIN: - SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 16.0), + SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 8.0), MIN_SIZE_shift, MIN_SIZE_mask); break; case GL_POINT_SIZE_MAX: - SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 16.0), + SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 8.0), MAX_SIZE_shift, MAX_SIZE_mask); break; case GL_POINT_DISTANCE_ATTENUATION: @@ -919,9 +951,6 @@ static void r700SetStencilState(GLcontext * ctx, GLboolean state) R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); GLboolean hw_stencil = GL_FALSE; - //fixme - //r300CatchStencilFallback(ctx); - if (ctx->DrawBuffer) { struct radeon_renderbuffer *rrbStencil = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); @@ -929,9 +958,11 @@ static void r700SetStencilState(GLcontext * ctx, GLboolean state) } if (hw_stencil) { - if (state) + R600_STATECHANGE(context, db); + if (state) { SETbit(r700->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit); - else + SETbit(r700->DB_DEPTH_CONTROL.u32All, BACKFACE_ENABLE_bit); + } else CLEARbit(r700->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit); } } @@ -943,8 +974,8 @@ static void r700StencilFuncSeparate(GLcontext * ctx, GLenum face, R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); const unsigned back = ctx->Stencil._BackFace; - //fixme - //r300CatchStencilFallback(ctx); + R600_STATECHANGE(context, stencil); + R600_STATECHANGE(context, db); //front SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.Ref[0], @@ -972,8 +1003,7 @@ static void r700StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) / R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); const unsigned back = ctx->Stencil._BackFace; - //fixme - //r300CatchStencilFallback(ctx); + R600_STATECHANGE(context, stencil); // front SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.WriteMask[0], @@ -992,8 +1022,7 @@ static void r700StencilOpSeparate(GLcontext * ctx, GLenum face, R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); const unsigned back = ctx->Stencil._BackFace; - //fixme - //r300CatchStencilFallback(ctx); + R600_STATECHANGE(context, db); SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.FailFunc[0]), STENCILFAIL_shift, STENCILFAIL_mask); @@ -1037,7 +1066,7 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //-------------------- GLfloat sz = v[MAT_SZ] * depthScale; GLfloat tz = v[MAT_TZ] * depthScale; - /* TODO : Need DMA flush as well. */ + R600_STATECHANGE(context, vpt); r700->viewport[id].PA_CL_VPORT_XSCALE.f32All = sx; r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx; @@ -1075,10 +1104,13 @@ static void r700LineWidth(GLcontext * ctx, GLfloat widthf) //--------------- context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); uint32_t lineWidth = (uint32_t)((widthf * 0.5) * (1 << 4)); + + R600_STATECHANGE(context, su); + if (lineWidth > 0xFFFF) - lineWidth = 0xFFFF; + lineWidth = 0xFFFF; SETfield(r700->PA_SU_LINE_CNTL.u32All,(uint16_t)lineWidth, - PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask); + PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask); } static void r700LineStipple(GLcontext *ctx, GLint factor, GLushort pattern) @@ -1086,6 +1118,8 @@ static void r700LineStipple(GLcontext *ctx, GLint factor, GLushort pattern) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, sc); + SETfield(r700->PA_SC_LINE_STIPPLE.u32All, pattern, LINE_PATTERN_shift, LINE_PATTERN_mask); SETfield(r700->PA_SC_LINE_STIPPLE.u32All, (factor-1), REPEAT_COUNT_shift, REPEAT_COUNT_mask); SETfield(r700->PA_SC_LINE_STIPPLE.u32All, 1, AUTO_RESET_CNTL_shift, AUTO_RESET_CNTL_mask); @@ -1096,6 +1130,8 @@ static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, su); + if (state) { SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit); SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit); @@ -1112,18 +1148,25 @@ static void r700PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) // context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); GLfloat constant = units; + GLchar depth = 0; + + R600_STATECHANGE(context, poly); switch (ctx->Visual.depthBits) { case 16: constant *= 4.0; + depth = -16; break; case 24: constant *= 2.0; + depth = -24; break; } factor *= 12.0; - + SETfield(r700->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All, depth, + POLY_OFFSET_NEG_NUM_DB_BITS_shift, POLY_OFFSET_NEG_NUM_DB_BITS_mask); + //r700->PA_SU_POLY_OFFSET_CLAMP.f32All = constant; //??? r700->PA_SU_POLY_OFFSET_FRONT_SCALE.f32All = factor; r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.f32All = constant; r700->PA_SU_POLY_OFFSET_BACK_SCALE.f32All = factor; @@ -1135,6 +1178,8 @@ static void r700UpdatePolygonMode(GLcontext * ctx) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + R600_STATECHANGE(context, su); + SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DISABLE_POLY_MODE, POLY_MODE_shift, POLY_MODE_mask); /* Only do something if a polygon mode is wanted, default is GL_FILL */ @@ -1210,6 +1255,8 @@ static void r700ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) p = (GLint) plane - (GLint) GL_CLIP_PLANE0; ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + R600_STATECHANGE(context, ucp); + r700->ucp[p].PA_CL_UCP_0_X.u32All = ip[0]; r700->ucp[p].PA_CL_UCP_0_Y.u32All = ip[1]; r700->ucp[p].PA_CL_UCP_0_Z.u32All = ip[2]; @@ -1223,6 +1270,9 @@ static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state) GLuint p; p = cap - GL_CLIP_PLANE0; + + R600_STATECHANGE(context, cl); + if (state) { r700->PA_CL_CLIP_CNTL.u32All |= (UCP_ENA_0_bit << p); r700->ucp[p].enabled = GL_TRUE; @@ -1247,15 +1297,36 @@ void r700SetScissor(context_t *context) //--------------- if (context->radeon.state.scissor.enabled) { x1 = context->radeon.state.scissor.rect.x1; y1 = context->radeon.state.scissor.rect.y1; - x2 = context->radeon.state.scissor.rect.x2 - 1; - y2 = context->radeon.state.scissor.rect.y2 - 1; + x2 = context->radeon.state.scissor.rect.x2; + y2 = context->radeon.state.scissor.rect.y2; } else { - x1 = rrb->dPriv->x; - y1 = rrb->dPriv->y; - x2 = rrb->dPriv->x + rrb->dPriv->w; - y2 = rrb->dPriv->y + rrb->dPriv->h; + if (context->radeon.radeonScreen->driScreen->dri2.enabled) { + x1 = 0; + y1 = 0; + x2 = rrb->base.Width; + y2 = rrb->base.Height; + } else { + x1 = rrb->dPriv->x; + y1 = rrb->dPriv->y; + x2 = rrb->dPriv->x + rrb->dPriv->w; + y2 = rrb->dPriv->y + rrb->dPriv->h; + } } + R600_STATECHANGE(context, scissor); + + /* screen */ + SETbit(r700->PA_SC_SCREEN_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit); + SETfield(r700->PA_SC_SCREEN_SCISSOR_TL.u32All, x1, + PA_SC_SCREEN_SCISSOR_TL__TL_X_shift, PA_SC_SCREEN_SCISSOR_TL__TL_X_mask); + SETfield(r700->PA_SC_SCREEN_SCISSOR_TL.u32All, y1, + PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift, PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask); + + SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, x2, + PA_SC_SCREEN_SCISSOR_BR__BR_X_shift, PA_SC_SCREEN_SCISSOR_BR__BR_X_mask); + SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, y2, + PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift, PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask); + /* window */ SETbit(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit); SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, x1, @@ -1311,120 +1382,6 @@ void r700SetScissor(context_t *context) //--------------- r700->viewport[id].enabled = GL_TRUE; } -void r700SetRenderTarget(context_t *context, int id) -{ - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - - struct radeon_renderbuffer *rrb; - unsigned int nPitchInPixel; - - /* screen/window/view */ - SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask); - - rrb = radeon_get_colorbuffer(&context->radeon); - if (!rrb || !rrb->bo) { - fprintf(stderr, "no rrb\n"); - return; - } - - /* color buffer */ - r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset; - - nPitchInPixel = rrb->pitch/rrb->cpp; - SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, (nPitchInPixel/8)-1, - PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); - SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1, - SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); - r700->render_target[id].CB_COLOR0_BASE.u32All = 0; - SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask); - SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL, - CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); - if(4 == rrb->cpp) - { - SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_8_8_8_8, - CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); - SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT, COMP_SWAP_shift, COMP_SWAP_mask); - } - else - { - SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_5_6_5, - CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); - SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT_REV, - COMP_SWAP_shift, COMP_SWAP_mask); - } - SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit); - SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit); - SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); - - r700->render_target[id].enabled = GL_TRUE; -} - -void r700SetDepthTarget(context_t *context) -{ - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - - struct radeon_renderbuffer *rrb; - unsigned int nPitchInPixel; - - /* depth buf */ - r700->DB_DEPTH_SIZE.u32All = 0; - r700->DB_DEPTH_BASE.u32All = 0; - r700->DB_DEPTH_INFO.u32All = 0; - - r700->DB_DEPTH_CLEAR.u32All = 0x3F800000; - r700->DB_DEPTH_VIEW.u32All = 0; - r700->DB_RENDER_CONTROL.u32All = 0; - SETbit(r700->DB_RENDER_CONTROL.u32All, STENCIL_COMPRESS_DISABLE_bit); - SETbit(r700->DB_RENDER_CONTROL.u32All, DEPTH_COMPRESS_DISABLE_bit); - r700->DB_RENDER_OVERRIDE.u32All = 0; - if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) - SETbit(r700->DB_RENDER_OVERRIDE.u32All, FORCE_SHADER_Z_ORDER_bit); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); - - r700->DB_ALPHA_TO_MASK.u32All = 0; - SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask); - SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET1_shift, ALPHA_TO_MASK_OFFSET1_mask); - SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET2_shift, ALPHA_TO_MASK_OFFSET2_mask); - SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET3_shift, ALPHA_TO_MASK_OFFSET3_mask); - - rrb = radeon_get_depthbuffer(&context->radeon); - if (!rrb) - return; - - nPitchInPixel = rrb->pitch/rrb->cpp; - - SETfield(r700->DB_DEPTH_SIZE.u32All, (nPitchInPixel/8)-1, - PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); - SETfield(r700->DB_DEPTH_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1, - SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); /* size in pixel / 64 - 1 */ - - if(4 == rrb->cpp) - { - switch (GL_CONTEXT(context)->Visual.depthBits) - { - case 16: - case 24: - SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_8_24, - DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); - break; - default: - fprintf(stderr, "Error: Unsupported depth %d... exiting\n", - GL_CONTEXT(context)->Visual.depthBits); - _mesa_exit(-1); - } - } - else - { - SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16, - DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); - } - SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_2D_TILED_THIN1, - DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask); - /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */ -} - static void r700InitSQConfig(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); @@ -1447,6 +1404,8 @@ static void r700InitSQConfig(GLcontext * ctx) int num_gs_stack_entries; int num_es_stack_entries; + R600_STATECHANGE(context, sq); + // SQ ps_prio = 0; vs_prio = 1; @@ -1487,6 +1446,7 @@ static void r700InitSQConfig(GLcontext * ctx) case CHIP_FAMILY_RV610: case CHIP_FAMILY_RV620: case CHIP_FAMILY_RS780: + case CHIP_FAMILY_RS880: default: num_ps_gprs = 84; num_vs_gprs = 36; @@ -1569,6 +1529,7 @@ static void r700InitSQConfig(GLcontext * ctx) if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) CLEARbit(r700->sq_config.SQ_CONFIG.u32All, VC_ENABLE_bit); else @@ -1622,8 +1583,10 @@ static void r700InitSQConfig(GLcontext * ctx) void r700InitState(GLcontext * ctx) //------------------- { context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + int id = 0; + + radeon_firevertices(&context->radeon); r700->TA_CNTL_AUX.u32All = 0; SETfield(r700->TA_CNTL_AUX.u32All, 28, TD_FIFO_CREDIT_shift, TD_FIFO_CREDIT_mask); @@ -1653,31 +1616,16 @@ void r700InitState(GLcontext * ctx) //------------------- r700->VGT_MIN_VTX_INDX.u32All = 0; r700->VGT_INDX_OFFSET.u32All = 0; - /* Specify the number of instances */ - r700->VGT_DMA_NUM_INSTANCES.u32All = 1; - /* default shader connections. */ r700->SPI_VS_OUT_ID_0.u32All = 0x03020100; r700->SPI_VS_OUT_ID_1.u32All = 0x07060504; - - r700->SPI_PS_INPUT_CNTL_0.u32All = 0x00000800; - r700->SPI_PS_INPUT_CNTL_1.u32All = 0x00000801; - r700->SPI_PS_INPUT_CNTL_2.u32All = 0x00000802; + r700->SPI_VS_OUT_ID_2.u32All = 0x0b0a0908; + r700->SPI_VS_OUT_ID_3.u32All = 0x0f0e0d0c; r700->SPI_THREAD_GROUPING.u32All = 0; if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770) SETfield(r700->SPI_THREAD_GROUPING.u32All, 1, PS_GROUPING_shift, PS_GROUPING_mask); - /* screen */ - r700->PA_SC_SCREEN_SCISSOR_TL.u32All = 0x0; - - SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, - ((RADEONDRIPtr)(context->radeon.radeonScreen->driScreen->pDevPriv))->width, - PA_SC_SCREEN_SCISSOR_BR__BR_X_shift, PA_SC_SCREEN_SCISSOR_BR__BR_X_mask); - SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, - ((RADEONDRIPtr)(context->radeon.radeonScreen->driScreen->pDevPriv))->height, - PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift, PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask); - /* 4 clip rectangles */ /* TODO : set these clip rects according to context->currentDraw->numClipRects */ r700->PA_SC_CLIPRECT_RULE.u32All = 0; SETfield(r700->PA_SC_CLIPRECT_RULE.u32All, CLIP_RULE_mask, CLIP_RULE_shift, CLIP_RULE_mask); @@ -1747,6 +1695,24 @@ void r700InitState(GLcontext * ctx) //------------------- r700DepthFunc(ctx, ctx->Depth.Func); SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit); + r700->DB_DEPTH_CLEAR.u32All = 0x3F800000; + + r700->DB_RENDER_CONTROL.u32All = 0; + SETbit(r700->DB_RENDER_CONTROL.u32All, STENCIL_COMPRESS_DISABLE_bit); + SETbit(r700->DB_RENDER_CONTROL.u32All, DEPTH_COMPRESS_DISABLE_bit); + r700->DB_RENDER_OVERRIDE.u32All = 0; + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + SETbit(r700->DB_RENDER_OVERRIDE.u32All, FORCE_SHADER_Z_ORDER_bit); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); + + r700->DB_ALPHA_TO_MASK.u32All = 0; + SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask); + SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET1_shift, ALPHA_TO_MASK_OFFSET1_mask); + SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET2_shift, ALPHA_TO_MASK_OFFSET2_mask); + SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET3_shift, ALPHA_TO_MASK_OFFSET3_mask); + /* stencil */ r700Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled); r700StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]); @@ -1812,6 +1778,11 @@ void r700InitState(GLcontext * ctx) //------------------- /* Set up color compare mask */ r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF; + /* screen/window/view */ + SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask); + + context->radeon.hw.all_dirty = GL_TRUE; + } void r700InitStateFuncs(struct dd_function_table *functions) //----------------- diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h index 23246367db..209189d8d7 100644 --- a/src/mesa/drivers/dri/r600/r700_state.h +++ b/src/mesa/drivers/dri/r600/r700_state.h @@ -35,17 +35,13 @@ extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state); extern void r700UpdateShaders (GLcontext * ctx); +extern void r700UpdateShaders2(GLcontext * ctx); extern void r700UpdateViewportOffset(GLcontext * ctx); -extern void r700UpdateDrawBuffer (GLcontext * ctx); extern void r700InitState (GLcontext * ctx); extern void r700InitStateFuncs (struct dd_function_table *functions); -extern void r700SetRenderTarget(context_t *context, int id); -extern void r700SetDefaultStates(context_t * context); - -void r700SetScissor(context_t *context); -void r700SetDepthTarget(context_t *context); +extern void r700SetScissor(context_t *context); #endif /* _R600_SCREEN_H */ diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 172e6ee501..e7a209be9d 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -35,11 +35,14 @@ #include "main/mtypes.h" #include "tnl/t_context.h" +#include "shader/program.h" #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" +#include "radeon_debug.h" #include "r600_context.h" #include "r600_cmdbuf.h" +#include "shader/programopt.c" #include "r700_debug.h" #include "r700_vertprog.h" @@ -52,7 +55,7 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, unsigned int unBit; unsigned int unTotal = unStart; - //!!!!!!! THE ORDER MATCH FS INPUT + //!!!!!!! THE ORDER MATCH FS INPUT unBit = 1 << VERT_RESULT_HPOS; if(mesa_vp->Base.OutputsWritten & unBit) @@ -73,17 +76,17 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, } //TODO : dealing back face. - //unBit = 1 << VERT_RESULT_BFC0; - //if(mesa_vp->Base.OutputsWritten & unBit) - //{ - // pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++; - //} - - //unBit = 1 << VERT_RESULT_BFC1; - //if(mesa_vp->Base.OutputsWritten & unBit) - //{ - // pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++; - //} + unBit = 1 << VERT_RESULT_BFC0; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++; + } + + unBit = 1 << VERT_RESULT_BFC1; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++; + } //TODO : dealing fog. unBit = 1 << VERT_RESULT_FOGC; @@ -93,11 +96,11 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, } //TODO : dealing point size. - //unBit = 1 << VERT_RESULT_PSIZ; - //if(mesa_vp->Base.OutputsWritten & unBit) - //{ - // pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++; - //} + unBit = 1 << VERT_RESULT_PSIZ; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++; + } for(i=0; i<8; i++) { @@ -156,7 +159,35 @@ GLboolean Process_Vertex_Program_Vfetch_Instructions( return GL_TRUE; } -void Map_Vertex_Program(struct r700_vertex_program *vp, +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + int i; + context_t *context = R700_CONTEXT(ctx); + + VTX_FETCH_METHOD vtxFetchMethod; + vtxFetchMethod.bEnableMini = GL_FALSE; + vtxFetchMethod.mega_fetch_remainder = 0; + + for(i=0; i<context->nNumActiveAos; i++) + { + assemble_vfetch_instruction2(&vp->r700AsmCode, + vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element], + context->stream_desc[i].type, + context->stream_desc[i].size, + context->stream_desc[i].element, + context->stream_desc[i]._signed, + context->stream_desc[i].normalize, + &vtxFetchMethod); + } + + return GL_TRUE; +} + +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp) { GLuint ui; @@ -172,11 +203,22 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, pAsm->number_used_registers += num_inputs; // Create VFETCH instructions for inputs - if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) - { - r700_error(ERROR_ASM_VTX_CLAUSE, "Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); - return; //error - } + if(1 == vp->uiVersion) + { + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) + { + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); + return; + } + } + else + { + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) ) + { + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n"); + return; + } + } // Map Outputs pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers); @@ -257,28 +299,61 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, return GL_TRUE; } -GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, - struct gl_vertex_program *mesa_vp) +struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, + struct gl_vertex_program *mesa_vp, + GLint nVer) { + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vp; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + unsigned int unBit; + unsigned int i; + + vp = _mesa_calloc(sizeof(*vp)); + vp->uiVersion = nVer; + vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base); + + if (mesa_vp->IsPositionInvariant) + { + _mesa_insert_mvp_code(ctx, vp->mesa_program); + } + + for(i=0; i<VERT_ATTRIB_MAX; i++) + { + unBit = 1 << i; + if(vp->mesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */ + { + vp->aos_desc[i].size = vb->AttribPtr[i]->size; + vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ + vp->aos_desc[i].type = GL_FLOAT; + } + } + + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + { + vp->r700AsmCode.bR6xx = 1; + } + //Init_Program Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) ); - Map_Vertex_Program( vp, mesa_vp ); + Map_Vertex_Program(ctx, vp, vp->mesa_program ); - if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, mesa_vp)) + if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program)) { - return GL_FALSE; + return NULL; } - if(GL_FALSE == AssembleInstr(mesa_vp->Base.NumInstructions, - &(mesa_vp->Base.Instructions[0]), + if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions, + &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) { - return GL_FALSE; + return NULL; } - if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), mesa_vp->Base.OutputsWritten) ) + if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) ) { - return GL_FALSE; + return NULL; } vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 @@ -288,79 +363,229 @@ GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, vp->translated = GL_TRUE; - return GL_TRUE; + return vp; } -void r700SelectVertexShader(GLcontext *ctx) +void r700SelectVertexShader(GLcontext *ctx, GLint nVersion) { context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; - if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) - { - vpc->r700AsmCode.bR6xx = 1; - } - + struct r700_vertex_program_cont *vpc; + struct r700_vertex_program *vp; TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - + struct vertex_buffer *vb = &tnl->vb; unsigned int unBit; unsigned int i; + GLboolean match; + GLbitfield InputsRead; + + vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + + InputsRead = vpc->mesa_program.Base.InputsRead; + if (vpc->mesa_program.IsPositionInvariant) + { + InputsRead |= VERT_BIT_POS; + } + + for (vp = vpc->progs; vp; vp = vp->next) + { + match = GL_TRUE; for(i=0; i<VERT_ATTRIB_MAX; i++) { unBit = 1 << i; - if(vpc->mesa_program.Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */ + if(InputsRead & unBit) { - vpc->aos_desc[i].size = vb->AttribPtr[i]->size; - vpc->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ - vpc->aos_desc[i].type = GL_FLOAT; + if (vp->aos_desc[i].size != vb->AttribPtr[i]->size) + match = GL_FALSE; + break; } } + if (match) + { + context->selected_vp = vp; + return; + } + } - if(GL_FALSE == vpc->translated) + vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program), nVersion); + if(!vp) { - r700TranslateVertexShader(vpc, - &(vpc->mesa_program) ); + radeon_error("Failed to translate vertex shader. \n"); + return; } + vp->next = vpc->progs; + vpc->progs = vp; + context->selected_vp = vp; + return; } -void * r700GetActiveVpShaderBo(GLcontext * ctx) +int getTypeSize(GLenum type) +{ + switch (type) + { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input) { - struct r700_vertex_program *vp - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + context_t *context = R700_CONTEXT(ctx); + + StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]); + + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size + : input->StrideB; - return vp->shaderbo; + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input->Type) != 4 || +#endif + stride < 4) + { + pStreamDesc->type = GL_FLOAT; + + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = sizeof(GLfloat) * input->Size; + } + pStreamDesc->dwords = input->Size; + pStreamDesc->is_named_bo = GL_FALSE; + } + else + { + pStreamDesc->type = input->Type; + pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4; + if (!input->BufferObj->Name) + { + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3; + } + + pStreamDesc->is_named_bo = GL_FALSE; + } + } + + pStreamDesc->size = input->Size; + pStreamDesc->dst_loc = context->nNumActiveAos; + pStreamDesc->element = unLoc; + + switch (pStreamDesc->type) + { //GetSurfaceFormat + case GL_FLOAT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = GL_FALSE; + break; + case GL_SHORT: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_BYTE: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_SHORT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_BYTE: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + default: + case GL_INT: + case GL_UNSIGNED_INT: + case GL_DOUBLE: + assert(0); + break; + } + context->nNumActiveAos++; } -GLboolean r700SetupVertexProgram(GLcontext * ctx) +void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) { context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vpc + = (struct r700_vertex_program *)ctx->VertexProgram._Current; - BATCH_LOCALS(&context->radeon); + struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program); + unsigned int unLoc = 0; + unsigned int unBit = mesa_vp->Base.InputsRead; + context->nNumActiveAos = 0; - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + while(unBit) + { + if(unBit & 1) + { + r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]); + } + + unBit >>= 1; + ++unLoc; + } +} + +void * r700GetActiveVpShaderBo(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vp = context->selected_vp;; + + if (vp) + return vp->shaderbo; + else + return NULL; +} - struct r700_vertex_program *vp - = (struct r700_vertex_program *)ctx->VertexProgram._Current; +GLboolean r700SetupVertexProgram(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + struct r700_vertex_program *vp = context->selected_vp; struct gl_program_parameter_list *paramList; unsigned int unNumParamData; - unsigned int ui; if(GL_FALSE == vp->loaded) { - if(vp->r700Shader.bNeedsAssembly == GL_TRUE) + if(vp->r700Shader.bNeedsAssembly == GL_TRUE) { Assemble( &(vp->r700Shader) ); } /* Load vp to gpu */ - r600EmitShader(ctx, - &(vp->shaderbo), + r600EmitShader(ctx, + &(vp->shaderbo), (GLvoid *)(vp->r700Shader.pProgram), - vp->r700Shader.uShaderBinaryDWORDSize,
- "VS"); + vp->r700Shader.uShaderBinaryDWORDSize, + "VS"); vp->loaded = GL_TRUE; } @@ -372,8 +597,14 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) (context->chipobj.MemUse)(context, vp->shadercode.buf->id); */ - r700->vs.SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */ - + R600_STATECHANGE(context, vs); + R600_STATECHANGE(context, fs); /* hack */ + + r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0; + SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); + + r700->vs.SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */ + SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1, NUM_GPRS_shift, NUM_GPRS_mask); @@ -383,9 +614,12 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) STACK_SIZE_shift, STACK_SIZE_mask); } - SETfield(r700->SPI_VS_OUT_CONFIG.u32All, vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0, + R600_STATECHANGE(context, spi); + + SETfield(r700->SPI_VS_OUT_CONFIG.u32All, + vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0, VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask); - SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports, + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports, NUM_INTERP_shift, NUM_INTERP_mask); /* @@ -394,37 +628,28 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) */ /* sent out shader constants. */ + paramList = vp->mesa_program->Base.Parameters; - paramList = vp->mesa_program.Base.Parameters; + if(NULL != paramList) { + _mesa_load_state_parameters(ctx, paramList); - if(NULL != paramList) - { - _mesa_load_state_parameters(ctx, paramList); + if (paramList->NumParameters > R700_MAX_DX9_CONSTS) + return GL_FALSE; - unNumParamData = paramList->NumParameters * 4; + R600_STATECHANGE(context, vs_consts); - BEGIN_BATCH_NO_AUTOSTATE(unNumParamData + 2); - - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData)); - /* assembler map const from very beginning. */ - R600_OUT_BATCH(SQ_ALU_CONSTANT_VS_OFFSET * 4); + r700->vs.num_consts = paramList->NumParameters; - unNumParamData = paramList->NumParameters; + unNumParamData = paramList->NumParameters; - for(ui=0; ui<unNumParamData; ui++) - { - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][0]))); - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][1]))); - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][2]))); - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][3]))); - } - END_BATCH(); - COMMIT_BATCH(); - } + for(ui=0; ui<unNumParamData; ui++) { + r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + } + } else + r700->vs.num_consts = 0; return GL_TRUE; } - - - - diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h index 6a9726a3d0..f9a3e395ee 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.h +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -43,46 +43,61 @@ typedef struct ArrayDesc //TEMP struct r700_vertex_program { - struct gl_vertex_program mesa_program; /* Must be first */ + struct gl_vertex_program *mesa_program; /* Must be first */ - struct r700_vertex_program *next; + struct r700_vertex_program *next; r700_AssemblerBase r700AsmCode; R700_Shader r700Shader; - GLboolean translated; + GLboolean translated; GLboolean loaded; + GLint uiVersion; - /* ... */ - void * shaderbo; - ArrayDesc aos_desc[VERT_ATTRIB_MAX]; + ArrayDesc aos_desc[VERT_ATTRIB_MAX]; +}; + +struct r700_vertex_program_cont +{ + struct gl_vertex_program mesa_program; + + struct r700_vertex_program *progs; }; //Internal unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, - struct gl_vertex_program *mesa_vp, - unsigned int unStart); + struct gl_vertex_program *mesa_vp, + unsigned int unStart); unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm, - struct gl_vertex_program *mesa_vp, - unsigned int unStart); + struct gl_vertex_program *mesa_vp, + unsigned int unStart); GLboolean Process_Vertex_Program_Vfetch_Instructions( - struct r700_vertex_program *vp, - struct gl_vertex_program *mesa_vp); -void Map_Vertex_Program(struct r700_vertex_program *vp, - struct gl_vertex_program *mesa_vp); + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, - struct gl_vertex_program *mesa_vp); + struct gl_vertex_program *mesa_vp); -/* Interface */ -extern GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, - struct gl_vertex_program *mesa_vp); +struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, + struct gl_vertex_program *mesa_vp, + GLint nVer); -extern void r700SelectVertexShader(GLcontext *ctx); +/* Interface */ +extern void r700SelectVertexShader(GLcontext *ctx, GLint nVersion); +extern void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count); extern GLboolean r700SetupVertexProgram(GLcontext * ctx); extern void * r700GetActiveVpShaderBo(GLcontext * ctx); +extern int getTypeSize(GLenum type); + #endif /* _R700_VERTPROG_H_ */ diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.c b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c new file mode 120000 index 0000000000..f6a5f66470 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.h b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h new file mode 120000 index 0000000000..2f134fd17b --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/radeon_debug.c b/src/mesa/drivers/dri/r600/radeon_debug.c new file mode 120000 index 0000000000..c98c2e074c --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_debug.c @@ -0,0 +1 @@ +../radeon/radeon_debug.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/radeon_debug.h b/src/mesa/drivers/dri/r600/radeon_debug.h new file mode 120000 index 0000000000..bd8aa28e89 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_debug.h @@ -0,0 +1 @@ +../radeon/radeon_debug.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/radeon_queryobj.c b/src/mesa/drivers/dri/r600/radeon_queryobj.c new file mode 120000 index 0000000000..1d6ebc1c48 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_queryobj.c @@ -0,0 +1 @@ +../radeon/radeon_queryobj.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/radeon_queryobj.h b/src/mesa/drivers/dri/r600/radeon_queryobj.h new file mode 120000 index 0000000000..8f6f842b0a --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_queryobj.h @@ -0,0 +1 @@ +../radeon/radeon_queryobj.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile index b59ad68f44..b1efc72872 100644 --- a/src/mesa/drivers/dri/radeon/Makefile +++ b/src/mesa/drivers/dri/radeon/Makefile @@ -15,16 +15,18 @@ CS_SOURCES = radeon_cs_space_drm.c endif RADEON_COMMON_SOURCES = \ - radeon_texture.c \ + radeon_bo_legacy.c \ radeon_common_context.c \ radeon_common.c \ + radeon_cs_legacy.c \ radeon_dma.c \ + radeon_debug.c \ + radeon_fbo.c \ radeon_lock.c \ - radeon_bo_legacy.c \ - radeon_cs_legacy.c \ radeon_mipmap_tree.c \ + radeon_queryobj.c \ radeon_span.c \ - radeon_fbo.c + radeon_texture.c DRIVER_SOURCES = \ radeon_context.c \ @@ -45,7 +47,7 @@ C_SOURCES = \ $(DRIVER_SOURCES) \ $(CS_SOURCES) -DRIVER_DEFINES = -DRADEON_COMMON=0 +DRIVER_DEFINES = -DRADEON_R100 -Wall DRI_LIB_DEPS += $(RADEON_LDFLAGS) diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h index 8eeaea1cb2..7141371633 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h +++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h @@ -34,10 +34,6 @@ #include <stdint.h> //#include "radeon_track.h" -#ifndef RADEON_DEBUG_BO -#define RADEON_DEBUG_BO 0 -#endif - /* bo object */ #define RADEON_BO_FLAGS_MACRO_TILE 1 #define RADEON_BO_FLAGS_MICRO_TILE 2 @@ -61,28 +57,23 @@ struct radeon_bo { /* bo functions */ struct radeon_bo_funcs { -#ifdef RADEON_DEBUG_BO - struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom, - uint32_t handle, - uint32_t size, - uint32_t alignment, - uint32_t domains, - uint32_t flags, - char * szBufUsage); -#else struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom, uint32_t handle, uint32_t size, uint32_t alignment, uint32_t domains, uint32_t flags); -#endif /* RADEON_DEBUG_BO */ void (*bo_ref)(struct radeon_bo *bo); struct radeon_bo *(*bo_unref)(struct radeon_bo *bo); int (*bo_map)(struct radeon_bo *bo, int write); int (*bo_unmap)(struct radeon_bo *bo); int (*bo_wait)(struct radeon_bo *bo); int (*bo_is_static)(struct radeon_bo *bo); + int (*bo_set_tiling)(struct radeon_bo *bo, uint32_t tiling_flags, + uint32_t pitch); + int (*bo_get_tiling)(struct radeon_bo *bo, uint32_t *tiling_flags, + uint32_t *pitch); + int (*bo_is_busy)(struct radeon_bo *bo, uint32_t *domain); }; struct radeon_bo_manager { @@ -110,20 +101,13 @@ static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom, uint32_t alignment, uint32_t domains, uint32_t flags, -#ifdef RADEON_DEBUG_BO - char * szBufUsage, -#endif /* RADEON_DEBUG_BO */ const char *file, const char *func, int line) { struct radeon_bo *bo; -#ifdef RADEON_DEBUG_BO - bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags, szBufUsage); -#else bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags); -#endif /* RADEON_DEBUG_BO */ #ifdef RADEON_BO_TRACK if (bo) { @@ -187,6 +171,27 @@ static inline int _radeon_bo_wait(struct radeon_bo *bo, return bo->bom->funcs->bo_wait(bo); } +static inline int _radeon_bo_is_busy(struct radeon_bo *bo, + uint32_t *domain, + const char *file, + const char *func, + int line) +{ + return bo->bom->funcs->bo_is_busy(bo, domain); +} + +static inline int radeon_bo_set_tiling(struct radeon_bo *bo, + uint32_t tiling_flags, uint32_t pitch) +{ + return bo->bom->funcs->bo_set_tiling(bo, tiling_flags, pitch); +} + +static inline int radeon_bo_get_tiling(struct radeon_bo *bo, + uint32_t *tiling_flags, uint32_t *pitch) +{ + return bo->bom->funcs->bo_get_tiling(bo, tiling_flags, pitch); +} + static inline int radeon_bo_is_static(struct radeon_bo *bo) { if (bo->bom->funcs->bo_is_static) @@ -194,13 +199,8 @@ static inline int radeon_bo_is_static(struct radeon_bo *bo) return 0; } -#ifdef RADEON_DEBUG_BO -#define radeon_bo_open(bom, h, s, a, d, f, u)\ - _radeon_bo_open(bom, h, s, a, d, f, u, __FILE__, __FUNCTION__, __LINE__) -#else #define radeon_bo_open(bom, h, s, a, d, f)\ _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__) -#endif /* RADEON_DEBUG_BO */ #define radeon_bo_ref(bo)\ _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__) #define radeon_bo_unref(bo)\ @@ -213,5 +213,7 @@ static inline int radeon_bo_is_static(struct radeon_bo *bo) _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__) #define radeon_bo_wait(bo) \ _radeon_bo_wait(bo, __FILE__, __func__, __LINE__) +#define radeon_bo_is_busy(bo, domain) \ + _radeon_bo_is_busy(bo, domain, __FILE__, __func__, __LINE__) #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c index 992eb4611b..3e7547d2f9 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c @@ -69,9 +69,6 @@ struct bo_legacy { void *ptr; struct bo_legacy *next, *prev; struct bo_legacy *pnext, *pprev; -#ifdef RADEON_DEBUG_BO - char szBufUsage[16]; -#endif /* RADEON_DEBUG_BO */ }; struct bo_manager_legacy { @@ -238,8 +235,9 @@ static int legacy_wait_pending(struct radeon_bo *bo) return 0; } -static void legacy_track_pending(struct bo_manager_legacy *boml, int debug) +void legacy_track_pending(struct radeon_bo_manager *bom, int debug) { + struct bo_manager_legacy *boml = (struct bo_manager_legacy*) bom; struct bo_legacy *bo_legacy; struct bo_legacy *next; @@ -247,8 +245,8 @@ static void legacy_track_pending(struct bo_manager_legacy *boml, int debug) bo_legacy = boml->pending_bos.pnext; while (bo_legacy) { if (debug) - fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size, - boml->current_age, bo_legacy->pending); + fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size, + boml->current_age, bo_legacy->pending); next = bo_legacy->pnext; if (legacy_is_pending(&(bo_legacy->base))) { } @@ -289,12 +287,7 @@ static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml, uint32_t size, uint32_t alignment, uint32_t domains, -#ifdef RADEON_DEBUG_BO - uint32_t flags, - char * szBufUsage) -#else uint32_t flags) -#endif /* RADEON_DEBUG_BO */ { struct bo_legacy *bo_legacy; static int pgsize; @@ -327,10 +320,6 @@ static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml, bo_legacy->next->prev = bo_legacy; } -#ifdef RADEON_DEBUG_BO - sprintf(bo_legacy->szBufUsage, "%s", szBufUsage); -#endif /* RADEON_DEBUG_BO */ - return bo_legacy; } @@ -429,12 +418,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, uint32_t size, uint32_t alignment, uint32_t domains, -#ifdef RADEON_DEBUG_BO - uint32_t flags, - char * szBufUsage) -#else uint32_t flags) -#endif /* RADEON_DEBUG_BO */ { struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom; struct bo_legacy *bo_legacy; @@ -451,11 +435,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, } return NULL; } -#ifdef RADEON_DEBUG_BO - bo_legacy = bo_allocate(boml, size, alignment, domains, flags, szBufUsage); -#else bo_legacy = bo_allocate(boml, size, alignment, domains, flags); -#endif /* RADEON_DEBUG_BO */ bo_legacy->static_bo = 0; r = legacy_new_handle(boml, &bo_legacy->base.handle); if (r) { @@ -465,7 +445,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) { retry: - legacy_track_pending(boml, 0); + legacy_track_pending(&boml->base, 0); /* dma buffers */ r = bo_dma_alloc(&(bo_legacy->base)); @@ -562,6 +542,18 @@ static int bo_unmap(struct radeon_bo *bo) return 0; } +static int bo_is_busy(struct radeon_bo *bo, uint32_t *domain) +{ + *domain = 0; + if (bo->domains & RADEON_GEM_DOMAIN_GTT) + *domain = RADEON_GEM_DOMAIN_GTT; + else + *domain = RADEON_GEM_DOMAIN_CPU; + if (legacy_is_pending(bo)) + return -EBUSY; + else + return 0; +} static int bo_is_static(struct radeon_bo *bo) { @@ -577,6 +569,9 @@ static struct radeon_bo_funcs bo_legacy_funcs = { bo_unmap, NULL, bo_is_static, + NULL, + NULL, + bo_is_busy }; static int bo_vram_validate(struct radeon_bo *bo, @@ -599,7 +594,7 @@ static int bo_vram_validate(struct radeon_bo *bo, if (r) { pending_retry = 0; while(boml->cpendings && pending_retry++ < 10000) { - legacy_track_pending(boml, 0); + legacy_track_pending(&boml->base, 0); retry_count++; if (retry_count > 2) { free(bo_legacy->tobj); @@ -622,12 +617,34 @@ static int bo_vram_validate(struct radeon_bo *bo, if (bo_legacy->dirty || bo_legacy->tobj->base.dirty_images[0]) { if (IS_R600_CLASS(boml->screen)) { - char *src = bo_legacy->ptr; - char *dst = (char *) boml->screen->driScreen->pFB + - (bo_legacy->offset - boml->fb_location); + drm_radeon_texture_t tex; + drm_radeon_tex_image_t tmp; + int ret; + + tex.offset = bo_legacy->offset; + tex.image = &tmp; + assert(!(tex.offset & 1023)); - /* FIXME: alignment, pitch, etc. */ - memcpy(dst, src, bo->size); + tmp.x = 0; + tmp.y = 0; + tmp.width = bo->size; + tmp.height = 1; + tmp.data = bo_legacy->ptr; + tex.format = RADEON_TXFORMAT_ARGB8888; + tex.width = tmp.width; + tex.height = tmp.height; + tex.pitch = bo->size; + do { + ret = drmCommandWriteRead(bo->bom->fd, + DRM_RADEON_TEXTURE, + &tex, + sizeof(drm_radeon_texture_t)); + if (ret) { + if (RADEON_DEBUG & RADEON_IOCTL) + fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n"); + usleep(1); + } + } while (ret == -EAGAIN); } else { /* Copy to VRAM using a blit. * All memory is 4K aligned. We're using 1024 pixels wide blits. @@ -660,7 +677,7 @@ static int bo_vram_validate(struct radeon_bo *bo, &tex, sizeof(drm_radeon_texture_t)); if (ret) { - if (RADEON_DEBUG & DEBUG_IOCTL) + if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n"); usleep(1); } @@ -689,14 +706,8 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo, int retries = 0; if (bo_legacy->map_count) { -#ifdef RADEON_DEBUG_BO - fprintf(stderr, "bo(%p, %d, %s) is mapped (%d) can't valide it.\n", - bo, bo->size, bo_legacy->szBufUsage, bo_legacy->map_count); -#else fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n", bo, bo->size, bo_legacy->map_count); -#endif /* RADEON_DEBUG_BO */ - return -EINVAL; } if (bo_legacy->static_bo || bo_legacy->validated) { @@ -709,7 +720,7 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo, r = bo_vram_validate(bo, soffset, eoffset); if (r) { - legacy_track_pending(boml, 0); + legacy_track_pending(&boml->base, 0); legacy_kick_all_buffers(boml); retries++; if (retries == 2) { @@ -768,21 +779,13 @@ void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom) } static struct bo_legacy *radeon_legacy_bo_alloc_static(struct bo_manager_legacy *bom, - int size, -#ifdef RADEON_DEBUG_BO - uint32_t offset, - char * szBufUsage) -#else - uint32_t offset) -#endif /* RADEON_DEBUG_BO */ + int size, + uint32_t offset) { struct bo_legacy *bo; -#ifdef RADEON_DEBUG_BO - bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0, szBufUsage); -#else bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ + if (bo == NULL) return NULL; bo->static_bo = 1; @@ -843,11 +846,8 @@ struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *sc size = 4096*4096*4; /* allocate front */ -#ifdef RADEON_DEBUG_BO - bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->frontOffset, "FRONT BUF"); -#else bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->frontOffset); -#endif /* RADEON_DEBUG_BO */ + if (!bo) { radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom); return NULL; @@ -857,11 +857,8 @@ struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *sc } /* allocate back */ -#ifdef RADEON_DEBUG_BO - bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->backOffset, "BACK BUF"); -#else bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->backOffset); -#endif /* RADEON_DEBUG_BO */ + if (!bo) { radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom); return NULL; @@ -871,11 +868,8 @@ struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *sc } /* allocate depth */ -#ifdef RADEON_DEBUG_BO - bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->depthOffset, "Z BUF"); -#else bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->depthOffset); -#endif /* RADEON_DEBUG_BO */ + if (!bo) { radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom); return NULL; @@ -904,3 +898,29 @@ unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo) return bo->size; } +/* + * Fake up a bo for things like texture image_override. + * bo->offset already includes fb_location + */ +struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, + int size, + uint32_t offset) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom; + struct bo_legacy *bo; + + bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); + + if (bo == NULL) + return NULL; + bo->static_bo = 1; + bo->offset = offset; + bo->base.handle = bo->offset; + bo->ptr = boml->screen->driScreen->pFB + (offset - boml->fb_location); + if (bo->base.handle > boml->nhandle) { + boml->nhandle = bo->base.handle + 1; + } + radeon_bo_ref(&(bo->base)); + return &(bo->base); +} + diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h index 0db817cab0..2cf15dfaff 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h @@ -42,5 +42,9 @@ struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *sc void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom); void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom); unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo); +struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, + int size, + uint32_t offset); +void legacy_track_pending(struct radeon_bo_manager *bom, int debug); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h index e0c70dd9a1..4520a7d7d4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -15,6 +15,12 @@ #define RADEON_GEM_DOMAIN_GTT 0x2 // GTT or cache flushed #define RADEON_GEM_DOMAIN_VRAM 0x4 // VRAM domain +#define RADEON_TILING_MACRO 0x1 +#define RADEON_TILING_MICRO 0x2 +#define RADEON_TILING_SWAP 0x4 +#define RADEON_TILING_SURFACE 0x8 /* this object requires a surface + * when mapped - i.e. front buffer */ + /* to be used to build locally in mesa with no libdrm bits */ #include "../radeon/radeon_bo_drm.h" #include "../radeon/radeon_cs_drm.h" @@ -39,6 +45,10 @@ struct drm_radeon_info { #define RADEON_PARAM_DEVICE_ID 16 #endif +#ifndef RADEON_PARAM_NUM_Z_PIPES +#define RADEON_PARAM_NUM_Z_PIPES 17 +#endif + #ifndef RADEON_INFO_DEVICE_ID #define RADEON_INFO_DEVICE_ID 0 #endif @@ -46,6 +56,10 @@ struct drm_radeon_info { #define RADEON_INFO_NUM_GB_PIPES 0 #endif +#ifndef RADEON_INFO_NUM_Z_PIPES +#define RADEON_INFO_NUM_Z_PIPES 0 +#endif + #ifndef DRM_RADEON_INFO #define DRM_RADEON_INFO 0x1 #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c new file mode 100644 index 0000000000..8fac5c6c51 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -0,0 +1,229 @@ +/* + * Copyright 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_buffer_objects.h" + +#include "main/imports.h" +#include "main/mtypes.h" +#include "main/bufferobj.h" + +#include "radeon_common.h" + +struct radeon_buffer_object * +get_radeon_buffer_object(struct gl_buffer_object *obj) +{ + return (struct radeon_buffer_object *) obj; +} + +static struct gl_buffer_object * +radeonNewBufferObject(GLcontext * ctx, + GLuint name, + GLenum target) +{ + struct radeon_buffer_object *obj = CALLOC_STRUCT(radeon_buffer_object); + + _mesa_initialize_buffer_object(&obj->Base, name, target); + + obj->bo = NULL; + + return &obj->Base; +} + +/** + * Called via glDeleteBuffersARB(). + */ +static void +radeonDeleteBufferObject(GLcontext * ctx, + struct gl_buffer_object *obj) +{ + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + if (obj->Pointer) { + radeon_bo_unmap(radeon_obj->bo); + } + + if (radeon_obj->bo) { + radeon_bo_unref(radeon_obj->bo); + } + + _mesa_free(radeon_obj); +} + + +/** + * Allocate space for and store data in a buffer object. Any data that was + * previously stored in the buffer object is lost. If data is NULL, + * memory will be allocated, but no copy will occur. + * Called via ctx->Driver.BufferData(). + * \return GL_TRUE for success, GL_FALSE if out of memory + */ +static GLboolean +radeonBufferData(GLcontext * ctx, + GLenum target, + GLsizeiptrARB size, + const GLvoid * data, + GLenum usage, + struct gl_buffer_object *obj) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + radeon_obj->Base.Size = size; + radeon_obj->Base.Usage = usage; + + if (radeon_obj->bo != NULL) { + radeon_bo_unref(radeon_obj->bo); + radeon_obj->bo = NULL; + } + + if (size != 0) { + radeon_obj->bo = radeon_bo_open(radeon->radeonScreen->bom, + 0, + size, + 32, + RADEON_GEM_DOMAIN_GTT, + 0); + + if (!radeon_obj->bo) + return GL_FALSE; + + if (data != NULL) { + radeon_bo_map(radeon_obj->bo, GL_TRUE); + + _mesa_memcpy(radeon_obj->bo->ptr, data, size); + + radeon_bo_unmap(radeon_obj->bo); + } + } + return GL_TRUE; +} + +/** + * Replace data in a subrange of buffer object. If the data range + * specified by size + offset extends beyond the end of the buffer or + * if data is NULL, no copy is performed. + * Called via glBufferSubDataARB(). + */ +static void +radeonBufferSubData(GLcontext * ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + const GLvoid * data, + struct gl_buffer_object *obj) +{ + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + radeon_bo_map(radeon_obj->bo, GL_TRUE); + + _mesa_memcpy(radeon_obj->bo->ptr + offset, data, size); + + radeon_bo_unmap(radeon_obj->bo); +} + +/** + * Called via glGetBufferSubDataARB() + */ +static void +radeonGetBufferSubData(GLcontext * ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + GLvoid * data, + struct gl_buffer_object *obj) +{ + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + radeon_bo_map(radeon_obj->bo, GL_FALSE); + + _mesa_memcpy(data, radeon_obj->bo->ptr + offset, size); + + radeon_bo_unmap(radeon_obj->bo); +} + +/** + * Called via glMapBufferARB() + */ +static void * +radeonMapBuffer(GLcontext * ctx, + GLenum target, + GLenum access, + struct gl_buffer_object *obj) +{ + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + if (access == GL_WRITE_ONLY_ARB) { + ctx->Driver.Flush(ctx); + } + + if (radeon_obj->bo == NULL) { + obj->Pointer = NULL; + return NULL; + } + + radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB); + + obj->Pointer = radeon_obj->bo->ptr; + obj->Length = obj->Size; + obj->Offset = 0; + + return obj->Pointer; +} + + +/** + * Called via glUnmapBufferARB() + */ +static GLboolean +radeonUnmapBuffer(GLcontext * ctx, + GLenum target, + struct gl_buffer_object *obj) +{ + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + if (radeon_obj->bo != NULL) { + radeon_bo_unmap(radeon_obj->bo); + } + + obj->Pointer = NULL; + obj->Offset = 0; + obj->Length = 0; + + return GL_TRUE; +} + +void +radeonInitBufferObjectFuncs(struct dd_function_table *functions) +{ + functions->NewBufferObject = radeonNewBufferObject; + functions->DeleteBuffer = radeonDeleteBufferObject; + functions->BufferData = radeonBufferData; + functions->BufferSubData = radeonBufferSubData; + functions->GetBufferSubData = radeonGetBufferSubData; + functions->MapBuffer = radeonMapBuffer; + functions->UnmapBuffer = radeonUnmapBuffer; +} diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h new file mode 100644 index 0000000000..d681960825 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h @@ -0,0 +1,52 @@ +/* + * Copyright 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_BUFFER_OBJECTS_H +#define RADEON_BUFFER_OBJECTS_H + +#include "main/mtypes.h" + +struct radeon_bo; + +/** + * Radeon vertex/pixel buffer object, derived from Mesa's gl_buffer_object. + */ +struct radeon_buffer_object +{ + struct gl_buffer_object Base; + struct radeon_bo *bo; +}; + +struct radeon_buffer_object * +get_radeon_buffer_object(struct gl_buffer_object *obj); + +/** + * Hook the bufferobject implementation into mesa: + */ +void radeonInitBufferObjectFuncs(struct dd_function_table *functions); + +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 0a6a2df35b..46a9cd5ff8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -335,9 +335,16 @@ #define PCI_CHIP_RS780_9615 0x9615 #define PCI_CHIP_RS780_9616 0x9616 +#define PCI_CHIP_RS880_9710 0x9710 +#define PCI_CHIP_RS880_9711 0x9711 +#define PCI_CHIP_RS880_9712 0x9712 +#define PCI_CHIP_RS880_9713 0x9713 +#define PCI_CHIP_RS880_9714 0x9714 + #define PCI_CHIP_RV770_9440 0x9440 #define PCI_CHIP_RV770_9441 0x9441 #define PCI_CHIP_RV770_9442 0x9442 +#define PCI_CHIP_RV770_9443 0x9443 #define PCI_CHIP_RV770_9444 0x9444 #define PCI_CHIP_RV770_9446 0x9446 #define PCI_CHIP_RV770_944A 0x944A @@ -356,11 +363,14 @@ #define PCI_CHIP_RV770_947A 0x947A #define PCI_CHIP_RV770_947B 0x947B +#define PCI_CHIP_RV730_9480 0x9480 #define PCI_CHIP_RV730_9487 0x9487 +#define PCI_CHIP_RV730_9488 0x9488 #define PCI_CHIP_RV730_9489 0x9489 #define PCI_CHIP_RV730_948F 0x948F #define PCI_CHIP_RV730_9490 0x9490 #define PCI_CHIP_RV730_9491 0x9491 +#define PCI_CHIP_RV730_9495 0x9495 #define PCI_CHIP_RV730_9498 0x9498 #define PCI_CHIP_RV730_949C 0x949C #define PCI_CHIP_RV730_949E 0x949E @@ -374,12 +384,16 @@ #define PCI_CHIP_RV710_9552 0x9552 #define PCI_CHIP_RV710_9553 0x9553 #define PCI_CHIP_RV710_9555 0x9555 +#define PCI_CHIP_RV710_9557 0x9557 #define PCI_CHIP_RV740_94A0 0x94A0 #define PCI_CHIP_RV740_94A1 0x94A1 +#define PCI_CHIP_RV740_94A3 0x94A3 #define PCI_CHIP_RV740_94B1 0x94B1 #define PCI_CHIP_RV740_94B3 0x94B3 +#define PCI_CHIP_RV740_94B4 0x94B4 #define PCI_CHIP_RV740_94B5 0x94B5 +#define PCI_CHIP_RV740_94B9 0x94B9 enum { CHIP_FAMILY_R100, @@ -414,6 +428,7 @@ enum { CHIP_FAMILY_RV620, CHIP_FAMILY_RV635, CHIP_FAMILY_RS780, + CHIP_FAMILY_RS880, CHIP_FAMILY_RV770, CHIP_FAMILY_RV730, CHIP_FAMILY_RV710, diff --git a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h index abb023c7de..6fcd1ce7ca 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h +++ b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h @@ -3,7 +3,7 @@ #include "radeon_bocs_wrapper.h" -void rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller); +GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller); int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller); int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller); void rcommonInitCmdBuf(radeonContextPtr rmesa); @@ -54,11 +54,12 @@ void rcommonBeginBatch(radeonContextPtr rmesa, */ #define OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) \ do { \ - if (0 && offset) { \ + int __offset = (offset); \ + if (0 && __offset) { \ fprintf(stderr, "(%s:%s:%d) offset : %d\n", \ - __FILE__, __FUNCTION__, __LINE__, offset); \ + __FILE__, __FUNCTION__, __LINE__, __offset); \ } \ - radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, offset); \ + radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, __offset); \ radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, \ bo, rd, wd, flags); \ if (!b_l_rmesa->radeonScreen->kernel_mm) \ @@ -71,10 +72,7 @@ void rcommonBeginBatch(radeonContextPtr rmesa, */ #define OUT_BATCH_TABLE(ptr,n) \ do { \ - int _i; \ - for (_i=0; _i < n; _i++) {\ - radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, ptr[_i]);\ - }\ + radeon_cs_write_table(b_l_rmesa->cmdbuf.cs, (ptr), (n));\ } while(0) /** diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index dde615a4d9..264392b327 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -45,46 +45,26 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "main/imports.h" #include "main/context.h" -#include "main/arrayobj.h" -#include "main/api_arrayelt.h" #include "main/enums.h" -#include "main/colormac.h" -#include "main/light.h" #include "main/framebuffer.h" -#include "main/simple_list.h" #include "main/renderbuffer.h" -#include "swrast/swrast.h" -#include "vbo/vbo.h" -#include "tnl/tnl.h" -#include "tnl/t_pipeline.h" -#include "swrast_setup/swrast_setup.h" - -#include "main/blend.h" -#include "main/bufferobj.h" -#include "main/buffers.h" -#include "main/depth.h" -#include "main/polygon.h" -#include "main/shaders.h" -#include "main/texstate.h" -#include "main/varray.h" -#include "glapi/dispatch.h" -#include "swrast/swrast.h" -#include "main/stencil.h" -#include "main/matrix.h" -#include "main/attrib.h" -#include "main/enable.h" -#include "main/viewport.h" - -#include "dri_util.h" +#include "drivers/common/meta.h" + #include "vblank.h" #include "radeon_common.h" #include "radeon_bocs_wrapper.h" #include "radeon_lock.h" #include "radeon_drm.h" -#include "radeon_mipmap_tree.h" +#include "radeon_queryobj.h" -#define DEBUG_CMDBUF 0 +/** + * Enable verbose debug output for emit code. + * 0 no output + * 1 most output + * 2 also print state alues + */ +#define RADEON_CMDBUF 0 /* ============================================================= * Scissoring @@ -147,6 +127,9 @@ void radeonRecalcScissorRects(radeonContextPtr radeon) out++; } } + + if (radeon->vtbl.update_scissor) + radeon->vtbl.update_scissor(radeon->glCtx); } void radeon_get_cliprects(radeonContextPtr radeon, @@ -221,26 +204,48 @@ void radeonSetCliprects(radeonContextPtr radeon) void radeonUpdateScissor( GLcontext *ctx ) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLint x = ctx->Scissor.X, y = ctx->Scissor.Y; + GLsizei w = ctx->Scissor.Width, h = ctx->Scissor.Height; + int x1, y1, x2, y2; + int min_x, min_y, max_x, max_y; - if ( !ctx->DrawBuffer->Name ) { - __DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa); - - int x = ctx->Scissor.X; - int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height; - int w = ctx->Scissor.X + ctx->Scissor.Width - 1; - int h = dPriv->h - ctx->Scissor.Y - 1; + if (!ctx->DrawBuffer) + return; + min_x = min_y = 0; + max_x = ctx->DrawBuffer->Width - 1; + max_y = ctx->DrawBuffer->Height - 1; - rmesa->state.scissor.rect.x1 = x + dPriv->x; - rmesa->state.scissor.rect.y1 = y + dPriv->y; - rmesa->state.scissor.rect.x2 = w + dPriv->x + 1; - rmesa->state.scissor.rect.y2 = h + dPriv->y + 1; + if ( !ctx->DrawBuffer->Name ) { + x1 = x; + y1 = ctx->DrawBuffer->Height - (y + h); + x2 = x + w - 1; + y2 = y1 + h - 1; } else { - rmesa->state.scissor.rect.x1 = ctx->Scissor.X; - rmesa->state.scissor.rect.y1 = ctx->Scissor.Y; - rmesa->state.scissor.rect.x2 = ctx->Scissor.X + ctx->Scissor.Width; - rmesa->state.scissor.rect.y2 = ctx->Scissor.Y + ctx->Scissor.Height; + x1 = x; + y1 = y; + x2 = x + w - 1; + y2 = y + h - 1; + + } + if (!rmesa->radeonScreen->kernel_mm) { + /* Fix scissors for dri 1 */ + + __DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa); + x1 += dPriv->x; + x2 += dPriv->x + 1; + min_x += dPriv->x; + max_x += dPriv->x + 1; + y1 += dPriv->y; + y2 += dPriv->y + 1; + min_y += dPriv->y; + max_y += dPriv->y + 1; } + rmesa->state.scissor.rect.x1 = CLAMP(x1, min_x, max_x); + rmesa->state.scissor.rect.y1 = CLAMP(y1, min_y, max_y); + rmesa->state.scissor.rect.x2 = CLAMP(x2, min_x, max_x); + rmesa->state.scissor.rect.y2 = CLAMP(y2, min_y, max_y); + radeonRecalcScissorRects( rmesa ); } @@ -258,6 +263,28 @@ void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h) } } +void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask ) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + GLuint i; + drm_radeon_stipple_t stipple; + + /* Must flip pattern upside down. + */ + for ( i = 0 ; i < 32 ; i++ ) { + stipple.mask[31 - i] = ((GLuint *) mask)[i]; + } + + /* TODO: push this into cmd mechanism + */ + radeon_firevertices(radeon); + LOCK_HARDWARE( radeon ); + + drmCommandWrite( radeon->dri.fd, DRM_RADEON_STIPPLE, + &stipple, sizeof(stipple) ); + UNLOCK_HARDWARE( radeon ); +} + /* ================================================================ * SwapBuffers with client-side throttling @@ -442,7 +469,7 @@ void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, rfb = dPriv->driverPrivate; - if ( RADEON_DEBUG & DEBUG_IOCTL ) { + if ( RADEON_DEBUG & RADEON_IOCTL ) { fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx ); } @@ -481,32 +508,6 @@ void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, if (!n) continue; - if (IS_R600_CLASS(rmesa->radeonScreen)) { - int cpp = rmesa->radeonScreen->cpp; - int src_pitch = rmesa->radeonScreen->backPitch * cpp; - int dst_pitch = rmesa->radeonScreen->frontPitch * cpp; - char *src = (char *)rmesa->radeonScreen->driScreen->pFB + rmesa->radeonScreen->backOffset; - char *dst = (char *)rmesa->radeonScreen->driScreen->pFB + rmesa->radeonScreen->frontOffset; - int j; - drm_clip_rect_t *pb = rmesa->sarea->boxes; - - for (j = 0; j < n; j++) { - int x = pb[j].x1; - int y = pb[j].y1; - int w = pb[j].x2 - x; - int h = pb[j].y2 - y; - - src += (y * src_pitch) + (x * cpp); - dst += (y * dst_pitch) + (x * cpp); - - while (h--) { - memcpy(dst, src, w * cpp); - src += src_pitch; - dst += dst_pitch; - } - } - } - ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); if ( ret ) { @@ -562,7 +563,7 @@ static GLboolean radeonPageFlip( __DRIdrawablePrivate *dPriv ) LOCK_HARDWARE(radeon); - if ( RADEON_DEBUG & DEBUG_IOCTL ) { + if ( RADEON_DEBUG & RADEON_IOCTL ) { fprintf(stderr, "%s: pfCurrentPage: %d %d\n", __FUNCTION__, radeon->sarea->pfCurrentPage, radeon->sarea->pfState); } @@ -822,7 +823,7 @@ void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb) */ void radeonDrawBuffer( GLcontext *ctx, GLenum mode ) { - if (RADEON_DEBUG & DEBUG_DRI) + if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr( mode )); @@ -887,10 +888,11 @@ void radeonUpdatePageFlipping(radeonContextPtr radeon) void radeon_window_moved(radeonContextPtr radeon) { + /* Cliprects has to be updated before doing anything else */ + radeonSetCliprects(radeon); if (!radeon->radeonScreen->driScreen->dri2.enabled) { radeonUpdatePageFlipping(radeon); } - radeonSetCliprects(radeon); } void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height) @@ -905,7 +907,7 @@ void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei he if (!radeon->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) { if (radeon->is_front_buffer_rendering) { - radeonFlush(ctx); + ctx->Driver.Flush(ctx); } radeon_update_renderbuffers(driContext, driContext->driDrawablePriv); if (driContext->driDrawablePriv != driContext->driReadablePriv) @@ -919,7 +921,7 @@ void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei he ctx->Driver.Viewport = old_viewport; } -static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state) +static void radeon_print_state_atom_prekmm(radeonContextPtr radeon, struct radeon_state_atom *state) { int i, j, reg; int dwords = (*state->check) (radeon->glCtx, state); @@ -927,7 +929,10 @@ static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); - if (RADEON_DEBUG & DEBUG_VERBOSE) { + if (radeon_is_debug_enabled(RADEON_STATE, RADEON_TRACE)) { + if (dwords > state->cmd_size) + dwords = state->cmd_size; + for (i = 0; i < dwords;) { cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]); reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo; @@ -944,15 +949,26 @@ static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state } } -static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_state_atom *state) +static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state) { int i, j, reg, count; - int dwords = (*state->check) (radeon->glCtx, state); + int dwords; uint32_t packet0; + if (!radeon_is_debug_enabled(RADEON_STATE, RADEON_VERBOSE) ) + return; + + if (!radeon->radeonScreen->kernel_mm) { + radeon_print_state_atom_prekmm(radeon, state); + return; + } + + dwords = (*state->check) (radeon->glCtx, state); fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); - if (RADEON_DEBUG & DEBUG_VERBOSE) { + if (radeon_is_debug_enabled(RADEON_STATE, RADEON_TRACE)) { + if (dwords > state->cmd_size) + dwords = state->cmd_size; for (i = 0; i < dwords;) { packet0 = state->cmd[i]; reg = (packet0 & 0x1FFF) << 2; @@ -970,40 +986,81 @@ static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_s } } -static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty) +/** + * Count total size for next state emit. + **/ +GLuint radeonCountStateEmitSize(radeonContextPtr radeon) { - BATCH_LOCALS(radeon); struct radeon_state_atom *atom; + GLuint dwords = 0; + /* check if we are going to emit full state */ + + if (radeon->cmdbuf.cs->cdw && !radeon->hw.all_dirty) { + if (!radeon->hw.is_dirty) + goto out; + foreach(atom, &radeon->hw.atomlist) { + if (atom->dirty) { + const GLuint atom_size = atom->check(radeon->glCtx, atom); + dwords += atom_size; + if (RADEON_CMDBUF && atom_size) { + radeon_print_state_atom(radeon, atom); + } + } + } + } else { + foreach(atom, &radeon->hw.atomlist) { + const GLuint atom_size = atom->check(radeon->glCtx, atom); + dwords += atom_size; + if (RADEON_CMDBUF && atom_size) { + radeon_print_state_atom(radeon, atom); + } + + } + } +out: + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %u\n", __func__, dwords); + return dwords; +} + +static INLINE void radeon_emit_atom(radeonContextPtr radeon, struct radeon_state_atom *atom) +{ + BATCH_LOCALS(radeon); int dwords; + dwords = (*atom->check) (radeon->glCtx, atom); + if (dwords) { + + radeon_print_state_atom(radeon, atom); + + if (atom->emit) { + (*atom->emit)(radeon->glCtx, atom); + } else { + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(atom->cmd, dwords); + END_BATCH(); + } + } else { + radeon_print(RADEON_STATE, RADEON_VERBOSE, " skip state %s\n", atom->name); + } + atom->dirty = GL_FALSE; + +} + +static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean emitAll) +{ + struct radeon_state_atom *atom; + if (radeon->vtbl.pre_emit_atoms) radeon->vtbl.pre_emit_atoms(radeon); /* Emit actual atoms */ - foreach(atom, &radeon->hw.atomlist) { - if ((atom->dirty || radeon->hw.all_dirty) == dirty) { - dwords = (*atom->check) (radeon->glCtx, atom); - if (dwords) { - if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { - if (radeon->radeonScreen->kernel_mm) - radeon_print_state_atom_kmm(radeon, atom); - else - radeon_print_state_atom(radeon, atom); - } - if (atom->emit) { - (*atom->emit)(radeon->glCtx, atom); - } else { - BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH_TABLE(atom->cmd, dwords); - END_BATCH(); - } - atom->dirty = GL_FALSE; - } else { - if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { - fprintf(stderr, " skip state %s\n", - atom->name); - } - } + if (radeon->hw.all_dirty || emitAll) { + foreach(atom, &radeon->hw.atomlist) + radeon_emit_atom( radeon, atom ); + } else { + foreach(atom, &radeon->hw.atomlist) { + if ( atom->dirty ) + radeon_emit_atom( radeon, atom ); } } @@ -1023,8 +1080,7 @@ static GLboolean radeon_revalidate_bos(GLcontext *ctx) void radeonEmitState(radeonContextPtr radeon) { - if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) - fprintf(stderr, "%s\n", __FUNCTION__); + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s\n", __FUNCTION__); if (radeon->vtbl.pre_emit_state) radeon->vtbl.pre_emit_state(radeon); @@ -1033,33 +1089,28 @@ void radeonEmitState(radeonContextPtr radeon) if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty) return; - /* To avoid going across the entire set of states multiple times, just check - * for enough space for the case of emitting all state, and inline the - * radeonAllocCmdBuf code here without all the checks. - */ - rcommonEnsureCmdBufSpace(radeon, radeon->hw.max_state_size, __FUNCTION__); - if (!radeon->cmdbuf.cs->cdw) { - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "Begin reemit state\n"); + radeonEmitAtoms(radeon, GL_TRUE); + } else { + + if (RADEON_DEBUG & RADEON_STATE) + fprintf(stderr, "Begin dirty state\n"); + radeonEmitAtoms(radeon, GL_FALSE); } - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "Begin dirty state\n"); - - radeonEmitAtoms(radeon, GL_TRUE); radeon->hw.is_dirty = GL_FALSE; radeon->hw.all_dirty = GL_FALSE; - } void radeonFlush(GLcontext *ctx) { radeonContextPtr radeon = RADEON_CONTEXT(ctx); - if (RADEON_DEBUG & DEBUG_IOCTL) + if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw); /* okay if we have no cmds in the buffer && @@ -1067,7 +1118,7 @@ void radeonFlush(GLcontext *ctx) we have no DMA buffer allocated. then no point flushing anything at all. */ - if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && !radeon->dma.current) + if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && is_empty_list(&radeon->dma.reserved)) return; if (radeon->dma.flush) @@ -1092,11 +1143,14 @@ void radeonFlush(GLcontext *ctx) * each of N places that do rendering. This has worse performances, * but it is much easier to get correct. */ - if (radeon->is_front_buffer_rendering) { + if (!radeon->is_front_buffer_rendering) { radeon->front_buffer_dirty = GL_FALSE; } } } + + make_empty_list(&radeon->query.not_flushed_head); + } /* Make sure all commands have been sent to the hardware and have @@ -1148,11 +1202,13 @@ int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller) } rmesa->cmdbuf.flushing = 1; - if (RADEON_DEBUG & DEBUG_IOCTL) { + if (RADEON_DEBUG & RADEON_IOCTL) { fprintf(stderr, "%s from %s - %i cliprects\n", __FUNCTION__, caller, rmesa->numClipRects); } + radeonEmitQueryEnd(rmesa->glCtx); + if (rmesa->cmdbuf.cs->cdw) { ret = radeon_cs_emit(rmesa->cmdbuf.cs); rmesa->hw.all_dirty = GL_TRUE; @@ -1171,14 +1227,16 @@ int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller) { int ret; - radeonReleaseDmaRegion(rmesa); + radeonReleaseDmaRegions(rmesa); LOCK_HARDWARE(rmesa); ret = rcommonFlushCmdBufLocked(rmesa, caller); UNLOCK_HARDWARE(rmesa); if (ret) { - fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret); + fprintf(stderr, "drmRadeonCmdBuffer: %d. Kernel failed to " + "parse or rejected command stream. See dmesg " + "for more info.\n", ret); _mesa_exit(ret); } @@ -1191,12 +1249,16 @@ int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller) * * \param dwords The number of dwords we need to be free on the command buffer */ -void rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller) +GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller) { - if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size || - radeon_cs_need_flush(rmesa->cmdbuf.cs)) { - rcommonFlushCmdBuf(rmesa, caller); - } + if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size + || radeon_cs_need_flush(rmesa->cmdbuf.cs)) { + /* If we try to flush empty buffer there is too big rendering operation. */ + assert(rmesa->cmdbuf.cs->cdw); + rcommonFlushCmdBuf(rmesa, caller); + return GL_TRUE; + } + return GL_FALSE; } void rcommonInitCmdBuf(radeonContextPtr rmesa) @@ -1211,15 +1273,13 @@ void rcommonInitCmdBuf(radeonContextPtr rmesa) if (size > 64 * 256) size = 64 * 256; - if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) { - fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n", - sizeof(drm_r300_cmd_header_t)); - fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n", - sizeof(drm_radeon_cmd_buffer_t)); - fprintf(stderr, + radeon_print(RADEON_CS, RADEON_VERBOSE, + "sizeof(drm_r300_cmd_header_t)=%zd\n", sizeof(drm_r300_cmd_header_t)); + radeon_print(RADEON_CS, RADEON_VERBOSE, + "sizeof(drm_radeon_cmd_buffer_t)=%zd\n", sizeof(drm_radeon_cmd_buffer_t)); + radeon_print(RADEON_CS, RADEON_VERBOSE, "Allocating %d bytes command buffer (max state is %d bytes)\n", size * 4, rmesa->hw.max_state_size * 4); - } if (rmesa->radeonScreen->kernel_mm) { int fd = rmesa->radeonScreen->driScreen->fd; @@ -1236,7 +1296,7 @@ void rcommonInitCmdBuf(radeonContextPtr rmesa) rmesa->cmdbuf.size = size; radeon_cs_space_set_flush(rmesa->cmdbuf.cs, - (void (*)(void *))radeonFlush, rmesa->glCtx); + (void (*)(void *))rmesa->glCtx->Driver.Flush, rmesa->glCtx); if (!rmesa->radeonScreen->kernel_mm) { radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]); @@ -1271,22 +1331,19 @@ void rcommonBeginBatch(radeonContextPtr rmesa, int n, const char *function, int line) { - rcommonEnsureCmdBufSpace(rmesa, n, function); if (!rmesa->cmdbuf.cs->cdw && dostate) { - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "Reemit state after flush (from %s)\n", function); + radeon_print(RADEON_STATE, RADEON_NORMAL, + "Reemit state after flush (from %s)\n", function); radeonEmitState(rmesa); } radeon_cs_begin(rmesa->cmdbuf.cs, n, file, function, line); - if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "BEGIN_BATCH(%d) at %d, from %s:%i\n", + radeon_print(RADEON_CS, RADEON_VERBOSE, "BEGIN_BATCH(%d) at %d, from %s:%i\n", n, rmesa->cmdbuf.cs->cdw, function, line); } void radeonUserClear(GLcontext *ctx, GLuint mask) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - meta_clear_tris(&rmesa->meta, mask); + _mesa_meta_Clear(ctx, mask); } diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h index cebae18b2d..f3201911ac 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.h +++ b/src/mesa/drivers/dri/radeon/radeon_common.h @@ -10,6 +10,7 @@ void radeonRecalcScissorRects(radeonContextPtr radeon); void radeonSetCliprects(radeonContextPtr radeon); void radeonUpdateScissor( GLcontext *ctx ); void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h); +void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask ); void radeonWaitForIdleLocked(radeonContextPtr radeon); extern uint32_t radeonGetAge(radeonContextPtr radeon); @@ -24,6 +25,7 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa); void radeonFlush(GLcontext *ctx); void radeonFinish(GLcontext * ctx); void radeonEmitState(radeonContextPtr radeon); +GLuint radeonCountStateEmitSize(radeonContextPtr radeon); void radeon_clear_tris(GLcontext *ctx, GLbitfield mask); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 285e015c92..6b9b1e3c5e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -37,6 +37,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "utils.h" #include "vblank.h" #include "drirenderbuffer.h" +#include "drivers/common/meta.h" #include "main/context.h" #include "main/framebuffer.h" #include "main/renderbuffer.h" @@ -46,7 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */ +#if defined(RADEON_R600) #include "r600_context.h" #endif @@ -85,6 +86,18 @@ static const char* get_chip_family_name(int chip_family) case CHIP_FAMILY_R580: return "R580"; case CHIP_FAMILY_RV560: return "RV560"; case CHIP_FAMILY_RV570: return "RV570"; + case CHIP_FAMILY_R600: return "R600"; + case CHIP_FAMILY_RV610: return "RV610"; + case CHIP_FAMILY_RV630: return "RV630"; + case CHIP_FAMILY_RV670: return "RV670"; + case CHIP_FAMILY_RV620: return "RV620"; + case CHIP_FAMILY_RV635: return "RV635"; + case CHIP_FAMILY_RS780: return "RS780"; + case CHIP_FAMILY_RS880: return "RS880"; + case CHIP_FAMILY_RV770: return "RV770"; + case CHIP_FAMILY_RV730: return "RV730"; + case CHIP_FAMILY_RV710: return "RV710"; + case CHIP_FAMILY_RV740: return "RV740"; default: return "unknown"; } } @@ -195,11 +208,15 @@ GLboolean radeonInitContext(radeonContextPtr radeon, driContextPriv->driverPrivate = radeon; meta_init_metaops(ctx, &radeon->meta); + + _mesa_meta_init(ctx); + /* DRI fields */ radeon->dri.context = driContextPriv; radeon->dri.screen = sPriv; radeon->dri.hwContext = driContextPriv->hHWContext; radeon->dri.hwLock = &sPriv->pSAREA->lock; + radeon->dri.hwLockCount = 0; radeon->dri.fd = sPriv->fd; radeon->dri.drmMinor = sPriv->drm_version.minor; @@ -210,11 +227,8 @@ GLboolean radeonInitContext(radeonContextPtr radeon, fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode"); radeon->iw.irq_seq = -1; radeon->irqsEmitted = 0; - if (IS_R600_CLASS(radeon->radeonScreen)) - radeon->do_irqs = 0; - else - radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && - radeon->radeonScreen->irq); + radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && + radeon->radeonScreen->irq); radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); @@ -230,7 +244,29 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->texture_depth = ( glVisual->rgbBits > 16 ) ? DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; - radeon->texture_row_align = 32; + if (IS_R600_CLASS(radeon->radeonScreen)) { + radeon->texture_row_align = 256; + radeon->texture_rect_row_align = 256; + radeon->texture_compressed_row_align = 256; + } else if (IS_R200_CLASS(radeon->radeonScreen) || + IS_R100_CLASS(radeon->radeonScreen)) { + radeon->texture_row_align = 32; + radeon->texture_rect_row_align = 64; + radeon->texture_compressed_row_align = 32; + } else { /* R300 - not sure this is all correct */ + int chip_family = radeon->radeonScreen->chip_family; + if (chip_family == CHIP_FAMILY_RS600 || + chip_family == CHIP_FAMILY_RS690 || + chip_family == CHIP_FAMILY_RS740) + radeon->texture_row_align = 64; + else + radeon->texture_row_align = 32; + radeon->texture_rect_row_align = 64; + radeon->texture_compressed_row_align = 64; + } + + make_empty_list(&radeon->query.not_flushed_head); + radeon_init_dma(radeon); return GL_TRUE; } @@ -265,63 +301,48 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL; - /* +r6/r7 */ - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); - /* --------- */ + assert(radeon); + + _mesa_meta_free(radeon->glCtx); if (radeon == current) { radeon_firevertices(radeon); _mesa_make_current(NULL, NULL, NULL); } - assert(radeon); - if (radeon) - { - -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */ - if (IS_R600_CLASS(screen)) - { - r600DestroyContext(driContextPriv); - } -#endif - - if (radeon->dma.current) { - rcommonFlushCmdBuf( radeon, __FUNCTION__ ); - } - - radeonReleaseArrays(radeon->glCtx, ~0); - meta_destroy_metaops(&radeon->meta); - if (radeon->vtbl.free_context) - radeon->vtbl.free_context(radeon->glCtx); - _swsetup_DestroyContext( radeon->glCtx ); - _tnl_DestroyContext( radeon->glCtx ); - _vbo_DestroyContext( radeon->glCtx ); - _swrast_DestroyContext( radeon->glCtx ); - - /* free atom list */ - /* free the Mesa context */ - _mesa_destroy_context(radeon->glCtx); - - /* _mesa_destroy_context() might result in calls to functions that - * depend on the DriverCtx, so don't set it to NULL before. - * - * radeon->glCtx->DriverCtx = NULL; - */ - /* free the option cache */ - driDestroyOptionCache(&radeon->optionCache); - - rcommonDestroyCmdBuf(radeon); - -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */ - if (!IS_R600_CLASS(screen)) -#endif - radeon_destroy_atom_list(radeon); + if (!is_empty_list(&radeon->dma.reserved)) { + rcommonFlushCmdBuf( radeon, __FUNCTION__ ); + } - if (radeon->state.scissor.pClipRects) { - FREE(radeon->state.scissor.pClipRects); - radeon->state.scissor.pClipRects = 0; - } + radeonFreeDmaRegions(radeon); + radeonReleaseArrays(radeon->glCtx, ~0); + meta_destroy_metaops(&radeon->meta); + if (radeon->vtbl.free_context) + radeon->vtbl.free_context(radeon->glCtx); + _swsetup_DestroyContext( radeon->glCtx ); + _tnl_DestroyContext( radeon->glCtx ); + _vbo_DestroyContext( radeon->glCtx ); + _swrast_DestroyContext( radeon->glCtx ); + + /* free atom list */ + /* free the Mesa context */ + _mesa_destroy_context(radeon->glCtx); + + /* _mesa_destroy_context() might result in calls to functions that + * depend on the DriverCtx, so don't set it to NULL before. + * + * radeon->glCtx->DriverCtx = NULL; + */ + /* free the option cache */ + driDestroyOptionCache(&radeon->optionCache); + + rcommonDestroyCmdBuf(radeon); + + radeon_destroy_atom_list(radeon); + + if (radeon->state.scissor.pClipRects) { + FREE(radeon->state.scissor.pClipRects); + radeon->state.scissor.pClipRects = 0; } #ifdef RADEON_BO_TRACK track = fopen("/tmp/tracklog", "w"); @@ -339,7 +360,7 @@ GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv) { radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; - if (RADEON_DEBUG & DEBUG_DRI) + if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "%s ctx %p\n", __FUNCTION__, radeon->glCtx); @@ -356,88 +377,48 @@ radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon, if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->frontOffset, - 0, - 0, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Front Buf"); -#else rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->frontOffset, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->backOffset, 0, 0, RADEON_GEM_DOMAIN_VRAM, - 0, - "Back Buf"); -#else - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->backOffset, - 0, - 0, - RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->backPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->depthOffset, - 0, - 0, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Z Buf"); -#else rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->depthOffset, - 0, - 0, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Stencil Buf"); -#else rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; @@ -460,16 +441,6 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->frontOffset + - radeon->radeonScreen->fbLocation, - size, - 4096, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Front Buf"); -#else rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->frontOffset + radeon->radeonScreen->fbLocation, @@ -477,23 +448,12 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, 4096, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->backOffset + - radeon->radeonScreen->fbLocation, - size, - 4096, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Back Buf"); -#else rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->backOffset + radeon->radeonScreen->fbLocation, @@ -501,55 +461,32 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, 4096, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->backPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset + radeon->radeonScreen->fbLocation, size, 4096, RADEON_GEM_DOMAIN_VRAM, - 0, - "Z Buf"); -#else - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->depthOffset + - radeon->radeonScreen->fbLocation, - size, - 4096, - RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset + radeon->radeonScreen->fbLocation, size, 4096, RADEON_GEM_DOMAIN_VRAM, - 0, - "Stencil Buf"); -#else - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->depthOffset + - radeon->radeonScreen->fbLocation, - size, - 4096, - RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; @@ -587,7 +524,7 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) char *regname; struct radeon_bo *depth_bo = NULL, *bo; - if (RADEON_DEBUG & DEBUG_DRI) + if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); draw = drawable->driverPrivate; @@ -714,7 +651,7 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) continue; } - if (RADEON_DEBUG & DEBUG_DRI) + if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "attaching buffer %s, %d, at %d, cpp %d, pitch %d\n", regname, buffers[i].name, buffers[i].attachment, @@ -727,33 +664,34 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) rb->has_surface = 0; if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) { - if (RADEON_DEBUG & DEBUG_DRI) + if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "(reusing depth buffer as stencil)\n"); bo = depth_bo; radeon_bo_ref(bo); } else { -#ifdef RADEON_DEBUG_BO - bo = radeon_bo_open(radeon->radeonScreen->bom, - buffers[i].name, - 0, - 0, - RADEON_GEM_DOMAIN_VRAM, - buffers[i].flags, - regname); -#else + uint32_t tiling_flags = 0, pitch = 0; + int ret; + bo = radeon_bo_open(radeon->radeonScreen->bom, buffers[i].name, 0, 0, RADEON_GEM_DOMAIN_VRAM, buffers[i].flags); -#endif /* RADEON_DEBUG_BO */ + if (bo == NULL) { fprintf(stderr, "failed to attach %s %d\n", regname, buffers[i].name); } + + ret = radeon_bo_get_tiling(bo, &tiling_flags, &pitch); + if (tiling_flags & RADEON_TILING_MACRO) + bo->flags |= RADEON_BO_FLAGS_MACRO_TILE; + if (tiling_flags & RADEON_TILING_MICRO) + bo->flags |= RADEON_BO_FLAGS_MICRO_TILE; + } if (buffers[i].attachment == __DRI_BUFFER_DEPTH) { @@ -799,7 +737,7 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, struct gl_framebuffer *readfb; if (!driContextPriv) { - if (RADEON_DEBUG & DEBUG_DRI) + if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "%s ctx is null\n", __FUNCTION__); _mesa_make_current(NULL, NULL, NULL); return GL_TRUE; @@ -821,7 +759,7 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, radeon_make_renderbuffer_current(radeon, drfb); } - if (RADEON_DEBUG & DEBUG_DRI) + if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __FUNCTION__, radeon->glCtx, drfb, readfb); driUpdateFramebufferSize(radeon->glCtx, driDrawPriv); @@ -856,7 +794,7 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, } - if (RADEON_DEBUG & DEBUG_DRI) + if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "End %s\n", __FUNCTION__); return GL_TRUE; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index e4a8da0596..0309345393 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -8,6 +8,7 @@ #include "tnl/t_context.h" #include "main/colormac.h" +#include "radeon_debug.h" #include "radeon_screen.h" #include "radeon_drm.h" #include "dri_util.h" @@ -142,10 +143,6 @@ struct radeon_stencilbuffer_state { GLuint clear; /* rb3d_stencilrefmask value */ }; -struct radeon_stipple_state { - GLuint mask[32]; -}; - struct radeon_state_atom { struct radeon_state_atom *next, *prev; const char *name; /* for debug */ @@ -163,6 +160,7 @@ struct radeon_hw_state { /* Head of the linked list of state atoms. */ struct radeon_state_atom atomlist; int max_state_size; /* Number of bytes necessary for a full state emit. */ + int max_post_flush_size; /* Number of bytes necessary for post flushing emits */ GLboolean is_dirty, all_dirty; }; @@ -239,6 +237,11 @@ struct radeon_tex_obj { GLuint SQ_TEX_SAMPLER1; GLuint SQ_TEX_SAMPLER2; + GLuint TD_PS_SAMPLER0_BORDER_RED; + GLuint TD_PS_SAMPLER0_BORDER_GREEN; + GLuint TD_PS_SAMPLER0_BORDER_BLUE; + GLuint TD_PS_SAMPLER0_BORDER_ALPHA; + GLboolean border_fallback; @@ -249,6 +252,17 @@ static INLINE radeonTexObj* radeon_tex_obj(struct gl_texture_object *texObj) return (radeonTexObj*)texObj; } +/* occlusion query */ +struct radeon_query_object { + struct gl_query_object Base; + struct radeon_bo *bo; + int curr_offset; + GLboolean emitted_begin; + + /* Double linked list of not flushed query objects */ + struct radeon_query_object *prev, *next; +}; + /* Need refcounting on dma buffers: */ struct radeon_dma_buffer { @@ -264,14 +278,25 @@ struct radeon_aos { int count; /** Number of vertices */ }; +#define DMA_BO_FREE_TIME 100 + +struct radeon_dma_bo { + struct radeon_dma_bo *next, *prev; + struct radeon_bo *bo; + int expire_counter; +}; + struct radeon_dma { /* Active dma region. Allocations for vertices and retained * regions come from here. Also used for emitting random vertices, * these may be flushed by calling flush_current(); */ - struct radeon_bo *current; /** Buffer that DMA memory is allocated from */ - int current_used; /** Number of bytes allocated and forgotten about */ - int current_vertexptr; /** End of active vertex region */ + struct radeon_dma_bo free; + struct radeon_dma_bo wait; + struct radeon_dma_bo reserved; + size_t current_used; /** Number of bytes allocated and forgotten about */ + size_t current_vertexptr; /** End of active vertex region */ + size_t minimum_size; /** * If current_vertexptr != current_used then flush must be non-zero. @@ -279,12 +304,6 @@ struct radeon_dma { * performed. */ void (*flush) (GLcontext *); - - /* Number of "in-flight" DMA buffers, i.e. the number of buffers - * for which a DISCARD command is currently queued in the command buffer -. - */ - GLuint nr_released_bufs; }; /* radeon_swtcl.c @@ -304,6 +323,7 @@ struct radeon_swtcl_info { struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; GLuint vertex_attr_count; + GLuint emit_prediction; }; #define RADEON_MAX_AOS_ARRAYS 16 @@ -316,7 +336,8 @@ struct radeon_tcl_info { struct radeon_ioctl { GLuint vertex_offset; - struct radeon_bo *bo; + GLuint vertex_max; + struct radeon_bo *bo; GLuint vertex_size; }; @@ -360,27 +381,11 @@ struct radeon_dri_mirror { drm_context_t hwContext; drm_hw_lock_t *hwLock; + int hwLockCount; int fd; int drmMinor; }; -#define DEBUG_TEXTURE 0x001 -#define DEBUG_STATE 0x002 -#define DEBUG_IOCTL 0x004 -#define DEBUG_PRIMS 0x008 -#define DEBUG_VERTS 0x010 -#define DEBUG_FALLBACKS 0x020 -#define DEBUG_VFMT 0x040 -#define DEBUG_CODEGEN 0x080 -#define DEBUG_VERBOSE 0x100 -#define DEBUG_DRI 0x200 -#define DEBUG_DMA 0x400 -#define DEBUG_SANITY 0x800 -#define DEBUG_SYNC 0x1000 -#define DEBUG_PIXEL 0x2000 -#define DEBUG_MEMORY 0x4000 - - typedef void (*radeon_tri_func) (radeonContextPtr, radeonVertex *, radeonVertex *, radeonVertex *); @@ -424,6 +429,8 @@ struct radeon_context { int texture_depth; float initialMaxAnisotropy; uint32_t texture_row_align; + uint32_t texture_rect_row_align; + uint32_t texture_compressed_row_align; struct radeon_dma dma; struct radeon_hw_state hw; @@ -438,7 +445,6 @@ struct radeon_context { GLuint numClipRects; /* Cliprects for the draw buffer */ drm_clip_rect_t *pClipRects; unsigned int lastStamp; - GLboolean lost_context; drm_radeon_sarea_t *sarea; /* Private SAREA data */ /* Mirrors of some DRI state */ @@ -461,6 +467,8 @@ struct radeon_context { struct radeon_cmdbuf cmdbuf; + struct radeon_debug debug; + drm_clip_rect_t fboRect; GLboolean constant_cliprect; /* use for FBO or DRI2 rendering */ GLboolean front_cliprects; @@ -493,6 +501,12 @@ struct radeon_context { struct dri_metaops meta; struct { + struct radeon_query_object *current; + struct radeon_query_object not_flushed_head; + struct radeon_state_atom queryobj; + } query; + + struct { void (*get_lock)(radeonContextPtr radeon); void (*update_viewport_offset)(GLcontext *ctx); void (*emit_cs_header)(struct radeon_cs *cs, radeonContextPtr rmesa); @@ -501,6 +515,8 @@ struct radeon_context { void (*pre_emit_state)(radeonContextPtr rmesa); void (*fallback)(GLcontext *ctx, GLuint bit, GLboolean mode); void (*free_context)(GLcontext *ctx); + void (*emit_query_finish)(radeonContextPtr radeon); + void (*update_scissor)(GLcontext *ctx); } vtbl; }; @@ -516,7 +532,6 @@ static inline __DRIdrawablePrivate* radeon_get_readable(radeonContextPtr radeon) return radeon->dri.context->driReadablePriv; } - /** * This function takes a float and packs it into a uint32_t */ @@ -576,21 +591,4 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, __DRIdrawablePrivate * driReadPriv); extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv); -/* ================================================================ - * Debugging: - */ -#define DO_DEBUG 1 - -#if DO_DEBUG -extern int RADEON_DEBUG; -#else -#define RADEON_DEBUG 0 -#endif - -#ifndef HAVE_LIBDRM_RADEON -#ifndef RADEON_DEBUG_BO -#define RADEON_DEBUG_BO 1 -#endif -#endif - #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index c457fb654e..8f4485aee7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -62,7 +62,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_swtcl.h" #include "radeon_tcl.h" #include "radeon_maos.h" +#include "radeon_queryobj.h" +#define need_GL_ARB_occlusion_query #define need_GL_EXT_blend_minmax #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color @@ -80,6 +82,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. const struct dri_extension card_extensions[] = { { "GL_ARB_multitexture", NULL }, + { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, { "GL_ARB_texture_border_clamp", NULL }, { "GL_ARB_texture_env_add", NULL }, { "GL_ARB_texture_env_combine", NULL }, @@ -134,25 +137,6 @@ static const struct tnl_pipeline_stage *radeon_pipeline[] = { NULL, }; -static const struct dri_debug_control debug_control[] = -{ - { "fall", DEBUG_FALLBACKS }, - { "tex", DEBUG_TEXTURE }, - { "ioctl", DEBUG_IOCTL }, - { "prim", DEBUG_PRIMS }, - { "vert", DEBUG_VERTS }, - { "state", DEBUG_STATE }, - { "code", DEBUG_CODEGEN }, - { "vfmt", DEBUG_VFMT }, - { "vtxf", DEBUG_VFMT }, - { "verb", DEBUG_VERBOSE }, - { "dri", DEBUG_DRI }, - { "dma", DEBUG_DMA }, - { "san", DEBUG_SANITY }, - { "sync", DEBUG_SYNC }, - { NULL, 0 } -}; - static void r100_get_lock(radeonContextPtr radeon) { r100ContextPtr rmesa = (r100ContextPtr)radeon; @@ -194,6 +178,20 @@ static void r100_vtbl_free_context(GLcontext *ctx) _mesa_vector4f_free( &rmesa->tcl.ObjClean ); } +static void r100_emit_query_finish(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZPASS_ADDR, 0)); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + query->curr_offset += sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + static void r100_init_vtbl(radeonContextPtr radeon) { radeon->vtbl.get_lock = r100_get_lock; @@ -202,6 +200,8 @@ static void r100_init_vtbl(radeonContextPtr radeon) radeon->vtbl.swtcl_flush = r100_swtcl_flush; radeon->vtbl.pre_emit_state = r100_vtbl_pre_emit_state; radeon->vtbl.fallback = radeonFallback; + radeon->vtbl.free_context = r100_vtbl_free_context; + radeon->vtbl.emit_query_finish = r100_emit_query_finish; } /* Create the device specific context. @@ -258,6 +258,7 @@ r100CreateContext( const __GLcontextModes *glVisual, */ _mesa_init_driver_functions( &functions ); radeonInitTextureFuncs( &functions ); + radeonInitQueryObjFunctions(&functions); if (!radeonInitContext(&rmesa->radeon, &functions, glVisual, driContextPriv, @@ -368,11 +369,14 @@ r100CreateContext( const __GLcontextModes *glVisual, if (rmesa->radeon.radeonScreen->kernel_mm || rmesa->radeon.dri.drmMinor >= 9) _mesa_enable_extension( ctx, "GL_NV_texture_rectangle"); + if (!rmesa->radeon.radeonScreen->kernel_mm) + _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); + /* XXX these should really go right after _mesa_init_driver_functions() */ radeon_fbo_init(&rmesa->radeon); radeonInitSpanFuncs( ctx ); radeonInitIoctlFuncs( ctx ); - radeonInitStateFuncs( ctx ); + radeonInitStateFuncs( ctx , rmesa->radeon.radeonScreen->kernel_mm ); radeonInitState( rmesa ); radeonInitSwtcl( ctx ); diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h index 1795d8bdb6..4e2c52c835 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_context.h @@ -295,6 +295,15 @@ struct radeon_texture_state { #define SHN_SHININESS 1 #define SHN_STATE_SIZE 2 +#define R100_QUERYOBJ_CMD_0 0 +#define R100_QUERYOBJ_DATA_0 1 +#define R100_QUERYOBJ_CMDSIZE 2 + +#define STP_CMD_0 0 +#define STP_DATA_0 1 +#define STP_CMD_1 2 +#define STP_STATE_SIZE 35 + struct r100_hw_state { /* Hardware state, stored as cmdbuf commands: * -- Need to doublebuffer for @@ -319,12 +328,11 @@ struct r100_hw_state { struct radeon_state_atom fog; struct radeon_state_atom glt; struct radeon_state_atom txr[3]; /* for NPOT */ - + struct radeon_state_atom stp; }; struct r100_state { - struct radeon_stipple_state stipple; struct radeon_texture_state texture; }; diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_drm.h b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h index ee403d173c..ab4eca31a3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_drm.h +++ b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h @@ -201,6 +201,15 @@ static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword) } } +static inline void radeon_cs_write_table(struct radeon_cs *cs, void *data, uint32_t size) +{ + memcpy(cs->packets + cs->cdw, data, size * 4); + cs->cdw += size; + if (cs->section) { + cs->section_cdw += size; + } +} + static inline void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data) { cs->space_flush_fn = fn; diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c index 4f1065ebcf..f1addb299e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c @@ -32,6 +32,7 @@ #include <errno.h> #include "radeon_bocs_wrapper.h" +#include "radeon_common.h" struct cs_manager_legacy { struct radeon_cs_manager base; @@ -317,7 +318,7 @@ static int cs_emit(struct radeon_cs *cs) if ((!IS_R300_CLASS(csm->ctx->radeonScreen)) && (!IS_R600_CLASS(csm->ctx->radeonScreen))) { /* +r6/r7 : No irq for r6/r7 yet. */ drm_radeon_irq_emit_t emit_cmd; - emit_cmd.irq_seq = &csm->pending_age; + emit_cmd.irq_seq = (int*)&csm->pending_age; r = drmCommandWrite(cs->csm->fd, DRM_RADEON_IRQ_EMIT, &emit_cmd, sizeof(emit_cmd)); if (r) { return r; diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h index e177b4bafe..cafbc9e576 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h @@ -32,7 +32,7 @@ #ifndef RADEON_CS_LEGACY_H #define RADEON_CS_LEGACY_H -#include "radeon_common.h" +struct radeon_context; struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx); void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm); diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c b/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c index 5a8df7bb8c..89cbbb5a6b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c +++ b/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c @@ -82,7 +82,7 @@ static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct ra if (write_domain == RADEON_GEM_DOMAIN_VRAM) { sizes->op_read -= bo->size; sizes->op_vram_write += bo->size; - } else if (write_domain == RADEON_GEM_DOMAIN_VRAM) { + } else if (write_domain == RADEON_GEM_DOMAIN_GTT) { sizes->op_read -= bo->size; sizes->op_gart_write += bo->size; } diff --git a/src/mesa/drivers/dri/radeon/radeon_debug.c b/src/mesa/drivers/dri/radeon/radeon_debug.c new file mode 100644 index 0000000000..413000b6c0 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_debug.c @@ -0,0 +1,107 @@ +/* + * Copyright © 2009 Pauli Nieminen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Pauli Nieminen <suokkos@gmail.com> + */ + +#include "utils.h" + +#include "radeon_debug.h" +#include "radeon_common_context.h" + +#include <stdarg.h> +#include <stdio.h> + +static const struct dri_debug_control debug_control[] = { + {"fall", RADEON_FALLBACKS}, + {"tex", RADEON_TEXTURE}, + {"ioctl", RADEON_IOCTL}, + {"verts", RADEON_VERTS}, + {"render", RADEON_RENDER}, + {"swrender", RADEON_SWRENDER}, + {"state", RADEON_STATE}, + {"shader", RADEON_SHADER}, + {"vfmt", RADEON_VFMT}, + {"vtxf", RADEON_VFMT}, + {"dri", RADEON_DRI}, + {"dma", RADEON_DMA}, + {"sanity", RADEON_SANITY}, + {"sync", RADEON_SYNC}, + {"pixel", RADEON_PIXEL}, + {"mem", RADEON_MEMORY}, + {"cs", RADEON_CS}, + {"allmsg", ~RADEON_SYNC}, /* avoid the term "sync" because the parser uses strstr */ + {NULL, 0} +}; + +radeon_debug_type_t radeon_enabled_debug_types; + +void radeon_init_debug(void) +{ + radeon_enabled_debug_types = driParseDebugString(getenv("RADEON_DEBUG"), debug_control); + + radeon_enabled_debug_types |= RADEON_GENERAL; +} + +void _radeon_debug_add_indent(void) +{ + GET_CURRENT_CONTEXT(ctx); + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + const size_t length = sizeof(radeon->debug.indent) + / sizeof(radeon->debug.indent[0]); + if (radeon->debug.indent_depth < length - 1) { + radeon->debug.indent[radeon->debug.indent_depth] = '\t'; + ++radeon->debug.indent_depth; + }; +} + +void _radeon_debug_remove_indent(void) +{ + GET_CURRENT_CONTEXT(ctx); + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + if (radeon->debug.indent_depth > 0) { + radeon->debug.indent[radeon->debug.indent_depth] = '\0'; + --radeon->debug.indent_depth; + } +} + +void _radeon_print(const radeon_debug_type_t type, + const radeon_debug_level_t level, + const char* message, + ...) +{ + GET_CURRENT_CONTEXT(ctx); + if (ctx) { + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + // FIXME: Make this multi thread safe + if (radeon->debug.indent_depth) + fprintf(stderr, "%s", radeon->debug.indent); + } + va_list values; + va_start( values, message ); + vfprintf(stderr, message, values); + va_end( values ); +} diff --git a/src/mesa/drivers/dri/radeon/radeon_debug.h b/src/mesa/drivers/dri/radeon/radeon_debug.h new file mode 100644 index 0000000000..26da31c1c4 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_debug.h @@ -0,0 +1,170 @@ +/* + * Copyright © 2009 Pauli Nieminen + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Pauli Nieminen <suokkos@gmail.com> + */ + +#ifndef RADEON_DEBUG_H_INCLUDED +#define RADEON_DEBUG_H_INCLUDED + +#include <stdlib.h> + +typedef enum radeon_debug_levels { + RADEON_CRITICAL = 0, /* Only errors */ + RADEON_IMPORTANT = 1, /* Important warnings and messages */ + RADEON_NORMAL = 2, /* Normal log messages usefull for debugging */ + RADEON_VERBOSE = 3, /* Extra details to debugging */ + RADEON_TRACE = 4 /* Log about everything that happens */ +} radeon_debug_level_t; + +/** + * Compile time option to change level of debugging compiled to dri driver. + * Selecting critical level is not recommended because perfromance gains are + * going to minimal but you will lose a lot of important warnings in case of + * errors. + */ +#ifndef RADEON_DEBUG_LEVEL +#define RADEON_DEBUG_LEVEL RADEON_VERBOSE +#endif + +typedef enum radeon_debug_types { + RADEON_TEXTURE = 0x00001, + RADEON_STATE = 0x00002, + RADEON_IOCTL = 0x00004, + RADEON_RENDER = 0x00008, + RADEON_SWRENDER = 0x00010, + RADEON_FALLBACKS = 0x00020, + RADEON_VFMT = 0x00040, + RADEON_SHADER = 0x00080, + RADEON_CS = 0x00100, + RADEON_DRI = 0x00200, + RADEON_DMA = 0x00400, + RADEON_SANITY = 0x00800, + RADEON_SYNC = 0x01000, + RADEON_PIXEL = 0x02000, + RADEON_MEMORY = 0x04000, + RADEON_VERTS = 0x08000, + RADEON_GENERAL = 0x10000 /* Used for errors and warnings */ +} radeon_debug_type_t; + +#define RADEON_MAX_INDENT 5 + +struct radeon_debug { + size_t indent_depth; + char indent[RADEON_MAX_INDENT]; +}; + +extern radeon_debug_type_t radeon_enabled_debug_types; + +/** + * Compabibility layer for old debug code + **/ +#define RADEON_DEBUG radeon_enabled_debug_types + +static inline int radeon_is_debug_enabled(const radeon_debug_type_t type, + const radeon_debug_level_t level) +{ + return RADEON_DEBUG_LEVEL >= level + && (type & radeon_enabled_debug_types); +} +/* + * define macro for gcc specific __attribute__ if using alternative compiler + */ +#ifndef __GNUC__ +#define __attribute__(x) /*empty*/ +#endif + + +extern void _radeon_print(const radeon_debug_type_t type, + const radeon_debug_level_t level, + const char* message, + ...) __attribute__((format(printf,3,4))); +/** + * Print out debug message if channel specified by type is enabled + * and compile time debugging level is at least as high as level parameter + */ +#define radeon_print(type, level, message, ...) do { \ + const radeon_debug_level_t _debug_level = (level); \ + const radeon_debug_type_t _debug_type = (type); \ + /* Compile out if level of message is too high */ \ + if (radeon_is_debug_enabled(type, level)) { \ + _radeon_print(_debug_type, _debug_level, \ + (message), ## __VA_ARGS__); \ + } \ +} while(0) + +/** + * printf style function for writing error messages. + */ +#define radeon_error(message, ...) do { \ + radeon_print(RADEON_GENERAL, RADEON_CRITICAL, \ + (message), ## __VA_ARGS__); \ +} while(0) + +/** + * printf style function for writing warnings. + */ +#define radeon_warning(message, ...) do { \ + radeon_print(RADEON_GENERAL, RADEON_IMPORTANT, \ + (message), ## __VA_ARGS__); \ +} while(0) + +extern void radeon_init_debug(void); +extern void _radeon_debug_add_indent(void); +extern void _radeon_debug_remove_indent(void); + +static inline void radeon_debug_add_indent(void) +{ + if (RADEON_DEBUG_LEVEL >= RADEON_VERBOSE) { + _radeon_debug_add_indent(); + } +} +static inline void radeon_debug_remove_indent(void) +{ + if (RADEON_DEBUG_LEVEL >= RADEON_VERBOSE) { + _radeon_debug_remove_indent(); + } +} + + +/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . + I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble + with other compilers ... GLUE! +*/ +#define WARN_ONCE(a, ...) do { \ + static int __warn_once=1; \ + if(__warn_once){ \ + radeon_warning("*********************************WARN_ONCE*********************************\n"); \ + radeon_warning("File %s function %s line %d\n", \ + __FILE__, __FUNCTION__, __LINE__); \ + radeon_warning( (a), ## __VA_ARGS__);\ + radeon_warning("***************************************************************************\n"); \ + __warn_once=0;\ + } \ + } while(0) + + +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index 48114a0012..c6edbae9a1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -30,7 +30,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ +#include <errno.h> #include "radeon_common.h" +#include "main/simple_list.h" #if defined(USE_X86_ASM) #define COPY_DWORDS( dst, src, nr ) \ @@ -52,11 +54,11 @@ do { \ } while (0) #endif -static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count) +void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) { int i; - if (RADEON_DEBUG & DEBUG_VERTS) + if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", __FUNCTION__, count, stride, (void *)out, (void *)data); @@ -70,11 +72,11 @@ static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count) } } -void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count) +void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count) { int i; - if (RADEON_DEBUG & DEBUG_VERTS) + if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", __FUNCTION__, count, stride, (void *)out, (void *)data); @@ -89,11 +91,11 @@ void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count) } } -void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count) +void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count) { int i; - if (RADEON_DEBUG & DEBUG_VERTS) + if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", __FUNCTION__, count, stride, (void *)out, (void *)data); @@ -110,11 +112,11 @@ void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count) } } -static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count) +void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) { int i; - if (RADEON_DEBUG & DEBUG_VERTS) + if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", __FUNCTION__, count, stride, (void *)out, (void *)data); @@ -132,7 +134,7 @@ static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count) } void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, - GLvoid * data, int size, int stride, int count) + const GLvoid * data, int size, int stride, int count) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); uint32_t *out; @@ -161,61 +163,68 @@ void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, } } +void radeon_init_dma(radeonContextPtr rmesa) +{ + make_empty_list(&rmesa->dma.free); + make_empty_list(&rmesa->dma.wait); + make_empty_list(&rmesa->dma.reserved); + rmesa->dma.minimum_size = MAX_DMA_BUF_SZ; +} + void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) { + struct radeon_dma_bo *dma_bo = NULL; + /* we set minimum sizes to at least requested size + aligned to next 16 bytes. */ + if (size > rmesa->dma.minimum_size) + rmesa->dma.minimum_size = (size + 15) & (~15); - size = MAX2(size, MAX_DMA_BUF_SZ); + radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n", + __FUNCTION__, size, rmesa->dma.minimum_size); - if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) - fprintf(stderr, "%s\n", __FUNCTION__); - if (rmesa->dma.flush) { - rmesa->dma.flush(rmesa->glCtx); - } + /* unmap old reserved bo */ + if (!is_empty_list(&rmesa->dma.reserved)) + radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); - if (rmesa->dma.nr_released_bufs > 4) { - rcommonFlushCmdBuf(rmesa, __FUNCTION__); - rmesa->dma.nr_released_bufs = 0; - } - - if (rmesa->dma.current) { - radeon_bo_unmap(rmesa->dma.current); - radeon_bo_unref(rmesa->dma.current); - rmesa->dma.current = 0; - } + if (is_empty_list(&rmesa->dma.free) + || last_elem(&rmesa->dma.free)->bo->size < size) { + dma_bo = CALLOC_STRUCT(radeon_dma_bo); + assert(dma_bo); again_alloc: -#ifdef RADEON_DEBUG_BO - rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, - 0, size, 4, RADEON_GEM_DOMAIN_GTT, - 0, "dma.current"); -#else - rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, - 0, size, 4, RADEON_GEM_DOMAIN_GTT, - 0); -#endif /* RADEON_DEBUG_BO */ - - if (!rmesa->dma.current) { - rcommonFlushCmdBuf(rmesa, __FUNCTION__); - rmesa->dma.nr_released_bufs = 0; - goto again_alloc; + dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom, + 0, rmesa->dma.minimum_size, 4, + RADEON_GEM_DOMAIN_GTT, 0); + + if (!dma_bo->bo) { + rcommonFlushCmdBuf(rmesa, __FUNCTION__); + goto again_alloc; + } + insert_at_head(&rmesa->dma.reserved, dma_bo); + } else { + /* We push and pop buffers from end of list so we can keep + counter on unused buffers for later freeing them from + begin of list */ + dma_bo = last_elem(&rmesa->dma.free); + remove_from_list(dma_bo); + insert_at_head(&rmesa->dma.reserved, dma_bo); } rmesa->dma.current_used = 0; rmesa->dma.current_vertexptr = 0; if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, - rmesa->dma.current, + first_elem(&rmesa->dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0)) fprintf(stderr,"failure to revalidate BOs - badness\n"); - if (!rmesa->dma.current) { + if (is_empty_list(&rmesa->dma.reserved)) { /* Cmd buff have been flushed in radeon_revalidate_bos */ - rmesa->dma.nr_released_bufs = 0; goto again_alloc; } - radeon_bo_map(rmesa->dma.current, 1); + radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1); } /* Allocates a region from rmesa->dma.current. If there isn't enough @@ -225,7 +234,7 @@ void radeonAllocDmaRegion(radeonContextPtr rmesa, struct radeon_bo **pbo, int *poffset, int bytes, int alignment) { - if (RADEON_DEBUG & DEBUG_IOCTL) + if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); if (rmesa->dma.flush) @@ -236,30 +245,154 @@ void radeonAllocDmaRegion(radeonContextPtr rmesa, alignment--; rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; - if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size) - radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); + if (is_empty_list(&rmesa->dma.reserved) + || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size) + radeonRefillCurrentDmaRegion(rmesa, bytes); *poffset = rmesa->dma.current_used; - *pbo = rmesa->dma.current; + *pbo = first_elem(&rmesa->dma.reserved)->bo; radeon_bo_ref(*pbo); /* Always align to at least 16 bytes */ rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; rmesa->dma.current_vertexptr = rmesa->dma.current_used; - assert(rmesa->dma.current_used <= rmesa->dma.current->size); + assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size); } -void radeonReleaseDmaRegion(radeonContextPtr rmesa) +void radeonFreeDmaRegions(radeonContextPtr rmesa) { - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current); - if (rmesa->dma.current) { - rmesa->dma.nr_released_bufs++; - radeon_bo_unmap(rmesa->dma.current); - radeon_bo_unref(rmesa->dma.current); + struct radeon_dma_bo *dma_bo; + struct radeon_dma_bo *temp; + if (RADEON_DEBUG & RADEON_DMA) + fprintf(stderr, "%s\n", __FUNCTION__); + + foreach_s(dma_bo, temp, &rmesa->dma.free) { + remove_from_list(dma_bo); + radeon_bo_unref(dma_bo->bo); + FREE(dma_bo); + } + + foreach_s(dma_bo, temp, &rmesa->dma.wait) { + remove_from_list(dma_bo); + radeon_bo_unref(dma_bo->bo); + FREE(dma_bo); + } + + foreach_s(dma_bo, temp, &rmesa->dma.reserved) { + remove_from_list(dma_bo); + radeon_bo_unmap(dma_bo->bo); + radeon_bo_unref(dma_bo->bo); + FREE(dma_bo); } - rmesa->dma.current = NULL; +} + +void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes) +{ + if (is_empty_list(&rmesa->dma.reserved)) + return; + + if (RADEON_DEBUG & RADEON_IOCTL) + fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes); + rmesa->dma.current_used -= return_bytes; + rmesa->dma.current_vertexptr = rmesa->dma.current_used; +} + +static int radeon_bo_is_idle(struct radeon_bo* bo) +{ + uint32_t domain; + int ret = radeon_bo_is_busy(bo, &domain); + if (ret == -EINVAL) { + WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n" + "This may cause small performance drop for you.\n"); + } + /* Protect against bug in legacy bo handling that causes bos stay + * referenced even after they should be freed */ + if (bo->cref != 1) + return 0; + return ret != -EBUSY; +} + +void radeonReleaseDmaRegions(radeonContextPtr rmesa) +{ + struct radeon_dma_bo *dma_bo; + struct radeon_dma_bo *temp; + const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME; + const int time = rmesa->dma.free.expire_counter; + + if (RADEON_DEBUG & RADEON_DMA) { + size_t free = 0, + wait = 0, + reserved = 0; + foreach(dma_bo, &rmesa->dma.free) + ++free; + + foreach(dma_bo, &rmesa->dma.wait) + ++wait; + + foreach(dma_bo, &rmesa->dma.reserved) + ++reserved; + + fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n", + __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size); + } + + if (!rmesa->radeonScreen->driScreen->dri2.enabled) { + /* request updated cs processing information from kernel */ + legacy_track_pending(rmesa->radeonScreen->bom, 0); + } + /* move waiting bos to free list. + wait list provides gpu time to handle data before reuse */ + foreach_s(dma_bo, temp, &rmesa->dma.wait) { + if (dma_bo->expire_counter == time) { + WARN_ONCE("Leaking dma buffer object!\n"); + /* force free of buffer so we don't realy start + * leaking stuff now*/ + while ((dma_bo->bo = radeon_bo_unref(dma_bo->bo))) {} + remove_from_list(dma_bo); + FREE(dma_bo); + continue; + } + /* free objects that are too small to be used because of large request */ + if (dma_bo->bo->size < rmesa->dma.minimum_size) { + radeon_bo_unref(dma_bo->bo); + remove_from_list(dma_bo); + FREE(dma_bo); + continue; + } + if (!radeon_bo_is_idle(dma_bo->bo)) + continue; + remove_from_list(dma_bo); + dma_bo->expire_counter = expire_at; + insert_at_tail(&rmesa->dma.free, dma_bo); + } + + /* unmap the last dma region */ + if (!is_empty_list(&rmesa->dma.reserved)) + radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); + /* move reserved to wait list */ + foreach_s(dma_bo, temp, &rmesa->dma.reserved) { + /* free objects that are too small to be used because of large request */ + if (dma_bo->bo->size < rmesa->dma.minimum_size) { + radeon_bo_unref(dma_bo->bo); + remove_from_list(dma_bo); + FREE(dma_bo); + continue; + } + remove_from_list(dma_bo); + dma_bo->expire_counter = expire_at; + insert_at_tail(&rmesa->dma.wait, dma_bo); + } + + /* free bos that have been unused for some time */ + foreach_s(dma_bo, temp, &rmesa->dma.free) { + if (dma_bo->expire_counter != time) + break; + remove_from_list(dma_bo); + radeon_bo_unref(dma_bo->bo); + FREE(dma_bo); + } + } @@ -271,11 +404,11 @@ void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) struct radeon_dma *dma = &rmesa->dma; - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current); + if (RADEON_DEBUG & RADEON_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); dma->flush = NULL; - if (dma->current) { + if (!is_empty_list(&dma->reserved)) { GLuint current_offset = dma->current_used; assert (dma->current_used + @@ -297,19 +430,21 @@ rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) { GLuint bytes = vsize * nverts; void *head; -restart: - if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) { + if (RADEON_DEBUG & RADEON_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + if(is_empty_list(&rmesa->dma.reserved) + ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { + if (rmesa->dma.flush) { + rmesa->dma.flush(rmesa->glCtx); + } + radeonRefillCurrentDmaRegion(rmesa, bytes); + + return NULL; } if (!rmesa->dma.flush) { - /* make sure we have enough space to use this in cmdbuf */ - rcommonEnsureCmdBufSpace(rmesa, - rmesa->hw.max_state_size + (20*sizeof(int)), - __FUNCTION__); /* if cmdbuf flushed DMA restart */ - if (!rmesa->dma.current) - goto restart; rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; rmesa->dma.flush = rcommon_flush_last_swtcl_prim; } @@ -320,7 +455,7 @@ restart: rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == rmesa->dma.current_vertexptr ); - head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr); + head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr); rmesa->dma.current_vertexptr += bytes; rmesa->swtcl.numverts += nverts; return head; @@ -330,18 +465,17 @@ void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) { radeonContextPtr radeon = RADEON_CONTEXT( ctx ); int i; + if (RADEON_DEBUG & RADEON_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); if (radeon->dma.flush) { radeon->dma.flush(radeon->glCtx); } - if (radeon->tcl.elt_dma_bo) { - radeon_bo_unref(radeon->tcl.elt_dma_bo); - radeon->tcl.elt_dma_bo = NULL; - } for (i = 0; i < radeon->tcl.aos_count; i++) { if (radeon->tcl.aos[i].bo) { radeon_bo_unref(radeon->tcl.aos[i].bo); radeon->tcl.aos[i].bo = NULL; + } } } diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h index 06e388fc1d..74e653fd18 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.h +++ b/src/mesa/drivers/dri/radeon/radeon_dma.h @@ -33,20 +33,26 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef RADEON_DMA_H #define RADEON_DMA_H -void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count); -void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count); +void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count); +void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count); +void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count); +void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count); void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, - GLvoid * data, int size, int stride, int count); + const GLvoid * data, int size, int stride, int count); +void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes); void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size); +void radeon_init_dma(radeonContextPtr rmesa); +void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes); void radeonAllocDmaRegion(radeonContextPtr rmesa, struct radeon_bo **pbo, int *poffset, int bytes, int alignment); -void radeonReleaseDmaRegion(radeonContextPtr rmesa); +void radeonReleaseDmaRegions(radeonContextPtr rmesa); void rcommon_flush_last_swtcl_prim(GLcontext *ctx); void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize); +void radeonFreeDmaRegions(radeonContextPtr rmesa); void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index f28efa33e9..7ac53ec0ca 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -35,11 +35,12 @@ #include "main/context.h" #include "main/texformat.h" #include "main/texrender.h" +#include "drivers/common/meta.h" #include "radeon_common.h" #include "radeon_mipmap_tree.h" -#define FILE_DEBUG_FLAG DEBUG_TEXTURE +#define FILE_DEBUG_FLAG RADEON_TEXTURE #define DBG(...) do { \ if (RADEON_DEBUG & FILE_DEBUG_FLAG) \ _mesa_printf(__VA_ARGS__); \ @@ -177,30 +178,21 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, width, height); } else { - uint32_t size = width * height * cpp; + uint32_t size; uint32_t pitch = ((cpp * width + 63) & ~63) / cpp; fprintf(stderr,"Allocating %d x %d radeon RBO (pitch %d)\n", width, height, pitch); + size = pitch * height * cpp; rrb->pitch = pitch * cpp; rrb->cpp = cpp; -#ifdef RADEON_DEBUG_BO - rrb->bo = radeon_bo_open(radeon->radeonScreen->bom, - 0, - size, - 0, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Radeon RBO"); -#else rrb->bo = radeon_bo_open(radeon->radeonScreen->bom, 0, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ rb->Width = width; rb->Height = height; return GL_TRUE; @@ -291,7 +283,7 @@ radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv) rrb->base.RedBits = 8; rrb->base.GreenBits = 8; rrb->base.BlueBits = 8; - rrb->base.AlphaBits = 8; + rrb->base.AlphaBits = 0; rrb->base.DataType = GL_UNSIGNED_BYTE; break; case GL_RGBA8: @@ -407,7 +399,7 @@ restart: rrb->cpp = 2; rrb->base._ActualFormat = GL_RGB5; rrb->base._BaseFormat = GL_RGB; - rrb->base.DataType = GL_UNSIGNED_SHORT; + rrb->base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGB5 texture OK\n"); } else if (texImage->TexFormat == &_mesa_texformat_argb1555) { @@ -581,14 +573,6 @@ radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) { } -static void -radeon_blit_framebuffer(GLcontext *ctx, - GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, - GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, - GLbitfield mask, GLenum filter) -{ -} - void radeon_fbo_init(struct radeon_context *radeon) { radeon->glCtx->Driver.NewFramebuffer = radeon_new_framebuffer; @@ -599,7 +583,7 @@ void radeon_fbo_init(struct radeon_context *radeon) radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture; radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers; radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer; - radeon->glCtx->Driver.BlitFramebuffer = radeon_blit_framebuffer; + radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer; } diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index a5e4df7941..a0106d00fa 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -113,7 +113,7 @@ void radeonSetUpAtomList( r100ContextPtr rmesa ) insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt); } -void radeonEmitScissor(r100ContextPtr rmesa) +static void radeonEmitScissor(r100ContextPtr rmesa) { BATCH_LOCALS(&rmesa->radeon); if (!rmesa->radeon.radeonScreen->kernel_mm) { @@ -127,8 +127,8 @@ void radeonEmitScissor(r100ContextPtr rmesa) OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) | rmesa->radeon.state.scissor.rect.x1); OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0)); - OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2 - 1) << 16) | - (rmesa->radeon.state.scissor.rect.x2 - 1)); + OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2) << 16) | + (rmesa->radeon.state.scissor.rect.x2)); END_BATCH(); } else { BEGIN_BATCH(2); @@ -200,7 +200,7 @@ void radeonFlushElts( GLcontext *ctx ) uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start); int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw); - if (RADEON_DEBUG & DEBUG_IOCTL) + if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); assert( rmesa->radeon.dma.flush == radeonFlushElts ); @@ -236,7 +236,7 @@ void radeonFlushElts( GLcontext *ctx ) END_BATCH(); - if (RADEON_DEBUG & DEBUG_SYNC) { + if (RADEON_DEBUG & RADEON_SYNC) { fprintf(stderr, "%s: Syncing\n", __FUNCTION__); radeonFinish( rmesa->radeon.glCtx ); } @@ -252,7 +252,7 @@ GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, int align_min_nr; BATCH_LOCALS(&rmesa->radeon); - if (RADEON_DEBUG & DEBUG_IOCTL) + if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive); assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); @@ -273,7 +273,7 @@ GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, } else { OUT_BATCH(rmesa->ioctl.vertex_offset); } - OUT_BATCH(0xffff); + OUT_BATCH(rmesa->ioctl.vertex_max); OUT_BATCH(vertex_format); OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_IND | @@ -296,7 +296,7 @@ GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset); - if (RADEON_DEBUG & DEBUG_PRIMS) + if (RADEON_DEBUG & RADEON_RENDER) fprintf(stderr, "%s: header prim %x \n", __FUNCTION__, primitive); @@ -318,7 +318,7 @@ void radeonEmitVertexAOS( r100ContextPtr rmesa, #else BATCH_LOCALS(&rmesa->radeon); - if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL)) + if (RADEON_DEBUG & (RADEON_PRIMS|DEBUG_IOCTL)) fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n", __FUNCTION__, vertex_size, offset); @@ -342,6 +342,7 @@ void radeonEmitAOS( r100ContextPtr rmesa, rmesa->ioctl.bo = rmesa->radeon.tcl.aos[0].bo; rmesa->ioctl.vertex_offset = (rmesa->radeon.tcl.aos[0].offset + offset * rmesa->radeon.tcl.aos[0].stride * 4); + rmesa->ioctl.vertex_max = rmesa->radeon.tcl.aos[0].count; #else BATCH_LOCALS(&rmesa->radeon); uint32_t voffset; @@ -349,7 +350,7 @@ void radeonEmitAOS( r100ContextPtr rmesa, int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; int i; - if (RADEON_DEBUG & DEBUG_IOCTL) + if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); BEGIN_BATCH(sz+2+(nr * 2)); @@ -574,7 +575,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) GLuint color_mask = 0; GLuint orig_mask = mask; - if ( RADEON_DEBUG & DEBUG_IOCTL ) { + if ( RADEON_DEBUG & RADEON_IOCTL ) { fprintf( stderr, "radeonClear\n"); } @@ -610,7 +611,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) } if ( mask ) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask); _swrast_Clear( ctx, mask ); } diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h index 18805d4c57..deb53ae313 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h @@ -132,16 +132,18 @@ static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa, * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts. */ #if RADEON_OLD_PACKETS -#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int)) +#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2))+nr*2) #define VERT_AOS_BUFSZ (0) #define ELTS_BUFSZ(nr) (24 + nr * 2) -#define VBUF_BUFSZ (6 * sizeof(int)) +#define VBUF_BUFSZ (8) #else -#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int)) -#define VERT_AOS_BUFSZ (5 * sizeof(int)) +#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2)) +#define VERT_AOS_BUFSZ (5) #define ELTS_BUFSZ(nr) (16 + nr * 2) -#define VBUF_BUFSZ (4 * sizeof(int)) +#define VBUF_BUFSZ (4) #endif +#define SCISSOR_BUFSZ (8) +#define INDEX_BUFSZ (7) static inline uint32_t cmdpacket3(int cmd_type) diff --git a/src/mesa/drivers/dri/radeon/radeon_lighting.c b/src/mesa/drivers/dri/radeon/radeon_lighting.c index ac3b94e4a6..ba444f2b10 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lighting.c +++ b/src/mesa/drivers/dri/radeon/radeon_lighting.c @@ -195,7 +195,7 @@ void radeonUpdateMaterial( GLcontext *ctx ) if (ctx->Light.ColorMaterialEnabled) mask &= ~ctx->Light.ColorMaterialBitmask; - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "%s\n", __FUNCTION__); @@ -234,7 +234,7 @@ void radeonUpdateMaterial( GLcontext *ctx ) check_twoside_fallback( ctx ); update_global_ambient( ctx ); } - else if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_STATE)) + else if (RADEON_DEBUG & (RADEON_PRIMS|DEBUG_STATE)) fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__); } @@ -624,7 +624,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx ) GLboolean tmp; RADEON_STATECHANGE( rmesa, tcl ); - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "%s %d\n", __FUNCTION__, ctx->_NeedEyeCoords); if (ctx->_NeedEyeCoords) diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 5774f7ebcf..02de8e5fd1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -85,11 +85,35 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) } rmesa->vtbl.get_lock(rmesa); - - rmesa->lost_context = GL_TRUE; } - -void radeon_lock_hardware(radeonContextPtr radeon) +#ifndef NDEBUG +struct lock_debug { + const char* function; + const char* file; + int line; +}; + +static struct lock_debug ldebug = {0}; +#endif + +#if 0 +/** TODO: use atomic operations for reference counting **/ +/** gcc 4.2 has builtin functios for this **/ +#define ATOMIC_INC_AND_FETCH(atomic) __sync_add_and_fetch(&atomic, 1) +#define ATOMIC_DEC_AND_FETCH(atomic) __sync_sub_and_fetch(&atomic, 1) +#else +#define ATOMIC_INC_AND_FETCH(atomic) (++atomic) +#define ATOMIC_DEC_AND_FETCH(atomic) (--atomic) +#endif + + +void radeon_lock_hardware(radeonContextPtr radeon +#ifndef NDEBUG + ,const char* function + ,const char* file + ,const int line +#endif + ) { char ret = 0; struct radeon_framebuffer *rfb = NULL; @@ -104,16 +128,39 @@ void radeon_lock_hardware(radeonContextPtr radeon) } if (!radeon->radeonScreen->driScreen->dri2.enabled) { + if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1) + { +#ifndef NDEBUG + if ( RADEON_DEBUG & RADEON_SANITY ) + fprintf(stderr, "*** %d times of recursive call to %s ***\n" + "Original call was from %s (file: %s line: %d)\n" + "Now call is coming from %s (file: %s line: %d)\n" + , radeon->dri.hwLockCount, __FUNCTION__ + , ldebug.function, ldebug.file, ldebug.line + , function, file, line + ); +#endif + return; + } DRM_CAS(radeon->dri.hwLock, radeon->dri.hwContext, (DRM_LOCK_HELD | radeon->dri.hwContext), ret ); if (ret) radeonGetLock(radeon, 0); +#ifndef NDEBUG + ldebug.function = function; + ldebug.file = file; + ldebug.line = line; +#endif } } void radeon_unlock_hardware(radeonContextPtr radeon) { if (!radeon->radeonScreen->driScreen->dri2.enabled) { + if (ATOMIC_DEC_AND_FETCH(radeon->dri.hwLockCount) > 0) + { + return; + } DRM_UNLOCK( radeon->dri.fd, radeon->dri.hwLock, radeon->dri.hwContext ); diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h index 2817709eed..da5a5b4371 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.h +++ b/src/mesa/drivers/dri/radeon/radeon_lock.h @@ -48,12 +48,22 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); -void radeon_lock_hardware(radeonContextPtr rmesa); +void radeon_lock_hardware(radeonContextPtr rmesa +#ifndef NDEBUG + ,const char* function + ,const char* file + ,const int line +#endif + ); void radeon_unlock_hardware(radeonContextPtr rmesa); /* Lock the hardware and validate our state. */ +#ifdef NDEBUG #define LOCK_HARDWARE( rmesa ) radeon_lock_hardware(rmesa) +#else +#define LOCK_HARDWARE( rmesa ) radeon_lock_hardware(rmesa, __FUNCTION__, __FILE__, __LINE__) +#endif #define UNLOCK_HARDWARE( rmesa ) radeon_unlock_hardware(rmesa) #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c index 7c6ea0530e..08e1c5d00d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c +++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c @@ -56,7 +56,7 @@ static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, int size = 1; radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - if (RADEON_DEBUG & DEBUG_VERTS) + if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d\n", __FUNCTION__, count, stride); @@ -87,7 +87,7 @@ static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count) { int i; - if (RADEON_DEBUG & DEBUG_VERTS) + if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d\n", __FUNCTION__, count, stride); @@ -103,7 +103,7 @@ static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count) { int i; - if (RADEON_DEBUG & DEBUG_VERTS) + if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d\n", __FUNCTION__, count, stride); @@ -126,7 +126,7 @@ static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos, int emitsize; uint32_t *out; - if (RADEON_DEBUG & DEBUG_VERTS) + if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); switch (size) { @@ -188,7 +188,7 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) GLuint vtx, unit; #if 0 - if (RADEON_DEBUG & DEBUG_VERTS) + if (RADEON_DEBUG & RADEON_VERTS) _tnl_print_vert_flags( __FUNCTION__, inputs ); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h index 034cda8a65..515783135d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h +++ b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h @@ -54,8 +54,7 @@ static void TAG(emit)( GLcontext *ctx, union emit_union *v = (union emit_union *)dest; - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s\n", __FUNCTION__); + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __FUNCTION__); coord = (GLuint (*)[4])VB->ObjPtr->data; coord_stride = VB->ObjPtr->stride; diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index f04a07fecd..38db305e2a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -90,16 +90,18 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree GLuint face, GLuint level, GLuint* curOffset) { radeon_mipmap_level *lvl = &mt->levels[level]; - uint32_t row_align = rmesa->texture_row_align - 1; + uint32_t row_align; /* Find image size in bytes */ if (mt->compressed) { /* TODO: Is this correct? Need test cases for compressed textures! */ - lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63; + row_align = rmesa->texture_compressed_row_align - 1; + lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx, lvl->width, lvl->height, lvl->depth, mt->compressed); } else if (mt->target == GL_TEXTURE_RECTANGLE_NV) { - lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63; + row_align = rmesa->texture_rect_row_align - 1; + lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; lvl->size = lvl->rowstride * lvl->height; } else if (mt->tilebits & RADEON_TXO_MICRO_TILE) { /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, @@ -108,6 +110,7 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31; lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth; } else { + row_align = rmesa->texture_row_align - 1; lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; lvl->size = lvl->rowstride * lvl->height * lvl->depth; } @@ -118,7 +121,7 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree lvl->faces[face].offset = *curOffset; *curOffset += lvl->size; - if (RADEON_DEBUG & DEBUG_TEXTURE) + if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, "level %d, face %d: rs:%d %dx%d at %d\n", level, face, lvl->rowstride, lvl->width, lvl->height, lvl->faces[face].offset); @@ -187,13 +190,14 @@ static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_ * Create a new mipmap tree, calculate its layout and allocate memory. */ radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t, - GLenum target, GLuint firstLevel, GLuint lastLevel, + GLenum target, GLenum internal_format, GLuint firstLevel, GLuint lastLevel, GLuint width0, GLuint height0, GLuint depth0, GLuint bpp, GLuint tilebits, GLuint compressed) { radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree); mt->radeon = rmesa; + mt->internal_format = internal_format; mt->refcount = 1; mt->t = t; mt->target = target; @@ -212,18 +216,10 @@ radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj * else calculate_miptree_layout_r100(rmesa, mt); -#ifdef RADEON_DEBUG_BO - mt->bo = radeon_bo_open(rmesa->radeonScreen->bom, - 0, mt->totalsize, 1024, - RADEON_GEM_DOMAIN_VRAM, - 0, - "MIPMAP TREE"); -#else mt->bo = radeon_bo_open(rmesa->radeonScreen->bom, 0, mt->totalsize, 1024, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ return mt; } @@ -323,7 +319,8 @@ GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel) return GL_FALSE; - if (texImage->IsCompressed != mt->compressed) + if (texImage->InternalFormat != mt->internal_format || + texImage->IsCompressed != mt->compressed) return GL_FALSE; if (!texImage->IsCompressed && @@ -363,8 +360,8 @@ GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_textu mt->width0 == firstImage->Width && mt->height0 == firstImage->Height && mt->depth0 == firstImage->Depth && - mt->bpp == firstImage->TexFormat->TexelBytes && - mt->compressed == compressed); + mt->compressed == compressed && + (!mt->compressed ? (mt->bpp == firstImage->TexFormat->TexelBytes) : 1)); } @@ -373,9 +370,9 @@ GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_textu * given image in the given position. */ void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, - struct gl_texture_image *texImage, GLuint face, GLuint level) + radeon_texture_image *image, GLuint face, GLuint level) { - GLuint compressed = texImage->IsCompressed ? texImage->TexFormat->MesaFormat : 0; + GLuint compressed = image->base.IsCompressed ? image->base.TexFormat->MesaFormat : 0; GLuint numfaces = 1; GLuint firstLevel, lastLevel; @@ -389,9 +386,10 @@ void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, return; t->mt = radeon_miptree_create(rmesa, t, t->base.Target, + image->base.InternalFormat, firstLevel, lastLevel, - texImage->Width, texImage->Height, texImage->Depth, - texImage->TexFormat->TexelBytes, t->tile_bits, compressed); + image->base.Width, image->base.Height, image->base.Depth, + image->base.TexFormat->TexelBytes, t->tile_bits, compressed); } /* Although we use the image_offset[] array to store relative offsets diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h index 7ece688493..db28252da3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h @@ -67,6 +67,7 @@ struct _radeon_mipmap_tree { GLuint totalsize; /** total size of the miptree, in bytes */ GLenum target; /** GL_TEXTURE_xxx */ + GLenum internal_format; GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */ GLuint firstLevel; /** First mip level stored in this mipmap tree */ GLuint lastLevel; /** Last mip level stored in this mipmap tree */ @@ -83,7 +84,7 @@ struct _radeon_mipmap_tree { }; radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t, - GLenum target, GLuint firstLevel, GLuint lastLevel, + GLenum target, GLenum internal_format, GLuint firstLevel, GLuint lastLevel, GLuint width0, GLuint height0, GLuint depth0, GLuint bpp, GLuint tilebits, GLuint compressed); void radeon_miptree_reference(radeon_mipmap_tree *mt); @@ -93,7 +94,7 @@ GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, struct gl_texture_image *texImage, GLuint face, GLuint level); GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj); void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, - struct gl_texture_image *texImage, GLuint face, GLuint level); + radeon_texture_image *texImage, GLuint face, GLuint level); GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt, GLuint face, GLuint level); void radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets); diff --git a/src/mesa/drivers/dri/radeon/radeon_queryobj.c b/src/mesa/drivers/dri/radeon/radeon_queryobj.c new file mode 100644 index 0000000000..b79d864ba2 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.c @@ -0,0 +1,235 @@ +/* + * Copyright © 2008-2009 Maciej Cencora <m.cencora@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Maciej Cencora <m.cencora@gmail.com> + * + */ +#include "radeon_common.h" +#include "radeon_queryobj.h" +#include "radeon_debug.h" + +#include "main/imports.h" +#include "main/simple_list.h" + +static int radeonQueryIsFlushed(GLcontext *ctx, struct gl_query_object *q) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *tmp, *query = (struct radeon_query_object *)q; + + foreach(tmp, &radeon->query.not_flushed_head) { + if (tmp == query) { + return 0; + } + } + + return 1; +} + +static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q) +{ + struct radeon_query_object *query = (struct radeon_query_object *)q; + uint32_t *result; + int i; + + radeon_print(RADEON_STATE, RADEON_VERBOSE, + "%s: query id %d, result %d\n", + __FUNCTION__, query->Base.Id, (int) query->Base.Result); + + radeon_bo_map(query->bo, GL_FALSE); + + result = query->bo->ptr; + + query->Base.Result = 0; + for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) { + query->Base.Result += result[i]; + radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, result[i]); + } + + radeon_bo_unmap(query->bo); +} + +static struct gl_query_object * radeonNewQueryObject(GLcontext *ctx, GLuint id) +{ + struct radeon_query_object *query; + + query = _mesa_calloc(sizeof(struct radeon_query_object)); + + query->Base.Id = id; + query->Base.Result = 0; + query->Base.Active = GL_FALSE; + query->Base.Ready = GL_TRUE; + + radeon_print(RADEON_STATE, RADEON_VERBOSE,"%s: query id %d\n", __FUNCTION__, query->Base.Id); + + return &query->Base; +} + +static void radeonDeleteQuery(GLcontext *ctx, struct gl_query_object *q) +{ + struct radeon_query_object *query = (struct radeon_query_object *)q; + + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id); + + if (query->bo) { + radeon_bo_unref(query->bo); + } + + _mesa_free(query); +} + +static void radeonWaitQuery(GLcontext *ctx, struct gl_query_object *q) +{ + struct radeon_query_object *query = (struct radeon_query_object *)q; + + /* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */ + if (!radeonQueryIsFlushed(ctx, q)) + ctx->Driver.Flush(ctx); + + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset); + + radeonQueryGetResult(ctx, q); + + query->Base.Ready = GL_TRUE; +} + + +static void radeonBeginQuery(GLcontext *ctx, struct gl_query_object *q) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = (struct radeon_query_object *)q; + + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id); + + assert(radeon->query.current == NULL); + + if (radeon->dma.flush) + radeon->dma.flush(radeon->glCtx); + + if (!query->bo) { + query->bo = radeon_bo_open(radeon->radeonScreen->bom, 0, RADEON_QUERY_PAGE_SIZE, RADEON_QUERY_PAGE_SIZE, RADEON_GEM_DOMAIN_GTT, 0); + } + query->curr_offset = 0; + + radeon->query.current = query; + + radeon->query.queryobj.dirty = GL_TRUE; + radeon->hw.is_dirty = GL_TRUE; + insert_at_tail(&radeon->query.not_flushed_head, query); + +} + +void radeonEmitQueryEnd(GLcontext *ctx) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + + if (!query) + return; + + if (query->emitted_begin == GL_FALSE) + return; + + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, query->Base.Id, query->bo, query->curr_offset); + + radeon_cs_space_check_with_bo(radeon->cmdbuf.cs, + query->bo, + 0, RADEON_GEM_DOMAIN_GTT); + + radeon->vtbl.emit_query_finish(radeon); +} + +static void radeonEndQuery(GLcontext *ctx, struct gl_query_object *q) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id); + + if (radeon->dma.flush) + radeon->dma.flush(radeon->glCtx); + radeonEmitQueryEnd(ctx); + + radeon->query.current = NULL; +} + +static void radeonCheckQuery(GLcontext *ctx, struct gl_query_object *q) +{ + radeon_print(RADEON_STATE, RADEON_TRACE, "%s: query id %d\n", __FUNCTION__, q->Id); + +#ifdef DRM_RADEON_GEM_BUSY + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + + if (radeon->radeonScreen->kernel_mm) { + struct radeon_query_object *query = (struct radeon_query_object *)q; + uint32_t domain; + + /* Need to perform a flush, as per ARB_occlusion_query spec */ + if (!radeonQueryIsFlushed(ctx, q)) { + ctx->Driver.Flush(ctx); + } + + if (radeon_bo_is_busy(query->bo, &domain) == 0) { + radeonQueryGetResult(ctx, q); + query->Base.Ready = GL_TRUE; + } + } else { + radeonWaitQuery(ctx, q); + } +#else + radeonWaitQuery(ctx, q); +#endif +} + +void radeonInitQueryObjFunctions(struct dd_function_table *functions) +{ + functions->NewQueryObject = radeonNewQueryObject; + functions->DeleteQuery = radeonDeleteQuery; + functions->BeginQuery = radeonBeginQuery; + functions->EndQuery = radeonEndQuery; + functions->CheckQuery = radeonCheckQuery; + functions->WaitQuery = radeonWaitQuery; +} + +int radeon_check_query_active(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + + if (!query || query->emitted_begin) + return 0; + return atom->cmd_size; +} + +void radeon_emit_queryobj(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + BATCH_LOCALS(radeon); + int dwords; + + dwords = (*atom->check) (ctx, atom); + + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(atom->cmd, dwords); + END_BATCH(); + + radeon->query.current->emitted_begin = GL_TRUE; +} diff --git a/src/mesa/drivers/dri/radeon/radeon_queryobj.h b/src/mesa/drivers/dri/radeon/radeon_queryobj.h new file mode 100644 index 0000000000..19374dc76b --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.h @@ -0,0 +1,55 @@ +/* + * Copyright © 2008 Maciej Cencora <m.cencora@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Maciej Cencora <m.cencora@gmail.com> + * + */ + +#include "main/imports.h" +#include "main/simple_list.h" +#include "radeon_common_context.h" + +extern void radeonEmitQueryBegin(GLcontext *ctx); +extern void radeonEmitQueryEnd(GLcontext *ctx); + +extern void radeonInitQueryObjFunctions(struct dd_function_table *functions); + +#define RADEON_QUERY_PAGE_SIZE 4096 + +int radeon_check_query_active(GLcontext *ctx, struct radeon_state_atom *atom); +void radeon_emit_queryobj(GLcontext *ctx, struct radeon_state_atom *atom); + +static inline void radeon_init_query_stateobj(radeonContextPtr radeon, int SZ) +{ + radeon->query.queryobj.cmd_size = (SZ); + radeon->query.queryobj.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t)); + radeon->query.queryobj.name = "queryobj"; + radeon->query.queryobj.idx = 0; + radeon->query.queryobj.check = radeon_check_query_active; + radeon->query.queryobj.dirty = GL_FALSE; + radeon->query.queryobj.emit = radeon_emit_queryobj; + + radeon->hw.max_state_size += (SZ); + insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj); +} + diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.c b/src/mesa/drivers/dri/radeon/radeon_sanity.c index bbed838b59..1ab570f507 100644 --- a/src/mesa/drivers/dri/radeon/radeon_sanity.c +++ b/src/mesa/drivers/dri/radeon/radeon_sanity.c @@ -44,11 +44,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define MORE_VERBOSE 1 #if MORE_VERBOSE -#define VERBOSE (RADEON_DEBUG & DEBUG_VERBOSE) +#define VERBOSE (RADEON_DEBUG & RADEON_VERBOSE) #define NORMAL (1) #else #define VERBOSE 0 -#define NORMAL (RADEON_DEBUG & DEBUG_VERBOSE) +#define NORMAL (RADEON_DEBUG & RADEON_VERBOSE) #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 290ef2394d..573eb6c9c1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -48,18 +48,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_screen.h" #include "radeon_common.h" #include "radeon_span.h" -#if !RADEON_COMMON +#if defined(RADEON_R100) #include "radeon_context.h" #include "radeon_tex.h" -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) #include "r200_context.h" #include "r200_ioctl.h" #include "r200_tex.h" -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#elif defined(RADEON_R300) #include "r300_context.h" -#include "r300_fragprog.h" #include "r300_tex.h" -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) #include "r600_context.h" #include "r700_driconf.h" /* +r6/r7 */ #include "r600_tex.h" /* +r6/r7 */ @@ -83,7 +82,7 @@ DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \ DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \ DRI_CONF_OPT_END -#if !RADEON_COMMON /* R100 */ +#if defined(RADEON_R100) /* R100 */ PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE @@ -110,7 +109,7 @@ DRI_CONF_BEGIN DRI_CONF_END; static const GLuint __driNConfigOptions = 15; -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN @@ -148,7 +147,10 @@ extern const struct dri_extension NV_vp_extension[]; extern const struct dri_extension ATI_fs_extension[]; extern const struct dri_extension point_extensions[]; -#elif RADEON_COMMON && (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600)) +#elif defined(RADEON_R300) || defined(RADEON_R600) + +#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 +#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 /* TODO: integrate these into xmlpool.h! */ #define DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(def,min,max) \ @@ -218,31 +220,7 @@ static const GLuint __driNConfigOptions = 17; extern const struct dri_extension gl_20_extension[]; -#ifndef RADEON_DEBUG - -static const struct dri_debug_control debug_control[] = { - {"fall", DEBUG_FALLBACKS}, - {"tex", DEBUG_TEXTURE}, - {"ioctl", DEBUG_IOCTL}, - {"prim", DEBUG_PRIMS}, - {"vert", DEBUG_VERTS}, - {"state", DEBUG_STATE}, - {"code", DEBUG_CODEGEN}, - {"vfmt", DEBUG_VFMT}, - {"vtxf", DEBUG_VFMT}, - {"verb", DEBUG_VERBOSE}, - {"dri", DEBUG_DRI}, - {"dma", DEBUG_DMA}, - {"san", DEBUG_SANITY}, - {"sync", DEBUG_SYNC}, - {"pix", DEBUG_PIXEL}, - {"mem", DEBUG_MEMORY}, - {"allmsg", ~DEBUG_SYNC}, /* avoid the term "sync" because the parser uses strstr */ - {NULL, 0} -}; -#endif /* RADEON_DEBUG */ - -#endif /* RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) */ +#endif extern const struct dri_extension card_extensions[]; extern const struct dri_extension mm_extensions[]; @@ -257,7 +235,7 @@ radeonGetParam(__DRIscreenPrivate *sPriv, int param, void *value) struct drm_radeon_info info = { 0 }; if (sPriv->drm_version.major >= 2) { - info.value = (uint64_t)value; + info.value = (uint64_t)(uintptr_t)value; switch (param) { case RADEON_PARAM_DEVICE_ID: info.request = RADEON_INFO_DEVICE_ID; @@ -265,6 +243,9 @@ radeonGetParam(__DRIscreenPrivate *sPriv, int param, void *value) case RADEON_PARAM_NUM_GB_PIPES: info.request = RADEON_INFO_NUM_GB_PIPES; break; + case RADEON_PARAM_NUM_Z_PIPES: + info.request = RADEON_INFO_NUM_Z_PIPES; + break; default: return -EINVAL; } @@ -356,7 +337,7 @@ radeonFillInModes( __DRIscreenPrivate *psp, return (const __DRIconfig **) configs; } -#if !RADEON_COMMON +#if defined(RADEON_R100) static const __DRItexOffsetExtension radeonTexOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, radeonSetTexOffset, @@ -369,7 +350,7 @@ static const __DRItexBufferExtension radeonTexBufferExtension = { }; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) static const __DRIallocateExtension r200AllocateExtension = { { __DRI_ALLOCATE, __DRI_ALLOCATE_VERSION }, r200AllocateMemoryMESA, @@ -389,7 +370,7 @@ static const __DRItexBufferExtension r200TexBufferExtension = { }; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#if defined(RADEON_R300) static const __DRItexOffsetExtension r300texOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, r300SetTexOffset, @@ -402,7 +383,7 @@ static const __DRItexBufferExtension r300TexBufferExtension = { }; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) static const __DRItexOffsetExtension r600texOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, r600SetTexOffset, /* +r6/r7 */ @@ -851,10 +832,19 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_family = CHIP_FAMILY_RS780; screen->chip_flags = RADEON_CHIPSET_TCL; break; + case PCI_CHIP_RS880_9710: + case PCI_CHIP_RS880_9711: + case PCI_CHIP_RS880_9712: + case PCI_CHIP_RS880_9713: + case PCI_CHIP_RS880_9714: + screen->chip_family = CHIP_FAMILY_RS880; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; case PCI_CHIP_RV770_9440: case PCI_CHIP_RV770_9441: case PCI_CHIP_RV770_9442: + case PCI_CHIP_RV770_9443: case PCI_CHIP_RV770_9444: case PCI_CHIP_RV770_9446: case PCI_CHIP_RV770_944A: @@ -876,11 +866,14 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_flags = RADEON_CHIPSET_TCL; break; + case PCI_CHIP_RV730_9480: case PCI_CHIP_RV730_9487: + case PCI_CHIP_RV730_9488: case PCI_CHIP_RV730_9489: case PCI_CHIP_RV730_948F: case PCI_CHIP_RV730_9490: case PCI_CHIP_RV730_9491: + case PCI_CHIP_RV730_9495: case PCI_CHIP_RV730_9498: case PCI_CHIP_RV730_949C: case PCI_CHIP_RV730_949E: @@ -897,15 +890,19 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_RV710_9552: case PCI_CHIP_RV710_9553: case PCI_CHIP_RV710_9555: + case PCI_CHIP_RV710_9557: screen->chip_family = CHIP_FAMILY_RV710; screen->chip_flags = RADEON_CHIPSET_TCL; break; case PCI_CHIP_RV740_94A0: case PCI_CHIP_RV740_94A1: + case PCI_CHIP_RV740_94A3: case PCI_CHIP_RV740_94B1: case PCI_CHIP_RV740_94B3: + case PCI_CHIP_RV740_94B4: case PCI_CHIP_RV740_94B5: + case PCI_CHIP_RV740_94B9: screen->chip_family = CHIP_FAMILY_RV740; screen->chip_flags = RADEON_CHIPSET_TCL; break; @@ -945,9 +942,8 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) return NULL; } -#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) - RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control); -#endif + radeon_init_debug(); + /* parse information in __driConfigOptions */ driParseOptionInfo (&screen->optionCache, __driConfigOptions, __driNConfigOptions); @@ -990,6 +986,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13); screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15); screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25); + screen->drmSupportsOcclusionQueries = (sPriv->drm_version.minor >= 30); } ret = radeon_set_screen_flags(screen, dri_priv->deviceID); @@ -1025,7 +1022,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); else screen->scratch = (__volatile__ uint32_t *) - ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); + ((GLubyte *)screen->status.map + R600_SCRATCH_REG_OFFSET); screen->buffers = drmMapBufs( sPriv->fd ); if ( !screen->buffers ) { @@ -1085,7 +1082,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) /* +r6/r7 */ if(screen->chip_family >= CHIP_FAMILY_R600) { - if (ret) + if (ret) { FREE( screen ); fprintf(stderr, "Unable to get fb location need newer drm\n"); @@ -1098,18 +1095,18 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) } else { - if (ret) + if (ret) { if (screen->chip_family < CHIP_FAMILY_RS600 && !screen->kernel_mm) screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16; - else + else { FREE( screen ); fprintf(stderr, "Unable to get fb location need newer drm\n"); return NULL; } - } - else + } + else { screen->fbLocation = (temp & 0xffff) << 16; } @@ -1153,6 +1150,15 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) default: break; } + + if ( sPriv->drm_version.minor >= 31 ) { + ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_Z_PIPES, &temp); + if (ret) + screen->num_z_pipes = 2; + else + screen->num_z_pipes = temp; + } else + screen->num_z_pipes = 2; } if ( sPriv->drm_version.minor >= 10 ) { @@ -1216,22 +1222,22 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->extensions[i++] = &driMediaStreamCounterExtension.base; } -#if !RADEON_COMMON +#if defined(RADEON_R100) screen->extensions[i++] = &radeonTexOffsetExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) if (IS_R200_CLASS(screen)) screen->extensions[i++] = &r200AllocateExtension.base; screen->extensions[i++] = &r200texOffsetExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#if defined(RADEON_R300) screen->extensions[i++] = &r300texOffsetExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) screen->extensions[i++] = &r600texOffsetExtension.base; #endif @@ -1270,9 +1276,7 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) return NULL; } -#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) - RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control); -#endif + radeon_init_debug(); /* parse information in __driConfigOptions */ driParseOptionInfo (&screen->optionCache, @@ -1289,6 +1293,7 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) screen->drmSupportsPointSprites = 1; screen->drmSupportsCubeMapsR100 = 1; screen->drmSupportsVertexProgram = 1; + screen->drmSupportsOcclusionQueries = 1; screen->irq = 1; ret = radeonGetParam(sPriv, RADEON_PARAM_DEVICE_ID, &device_id); @@ -1353,6 +1358,12 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) break; } + ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_Z_PIPES, &temp); + if (ret) + screen->num_z_pipes = 2; + else + screen->num_z_pipes = temp; + } i = 0; @@ -1365,22 +1376,22 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) screen->extensions[i++] = &driMediaStreamCounterExtension.base; } -#if !RADEON_COMMON +#if defined(RADEON_R100) screen->extensions[i++] = &radeonTexBufferExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) if (IS_R200_CLASS(screen)) screen->extensions[i++] = &r200AllocateExtension.base; screen->extensions[i++] = &r200TexBufferExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#if defined(RADEON_R300) screen->extensions[i++] = &r300TexBufferExtension.base; #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#if defined(RADEON_R600) screen->extensions[i++] = &r600TexBufferExtension.base; #endif @@ -1567,38 +1578,6 @@ radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } -/** - * Choose the appropriate CreateContext function based on the chipset. - * Eventually, all drivers will go through this process. - */ -static GLboolean radeonCreateContext(const __GLcontextModes * glVisual, - __DRIcontextPrivate * driContextPriv, - void *sharedContextPriv) -{ - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) - if (IS_R600_CLASS(screen)) - return r600CreateContext(glVisual, driContextPriv, sharedContextPriv); -#endif - -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) - if (IS_R300_CLASS(screen)) - return r300CreateContext(glVisual, driContextPriv, sharedContextPriv); -#endif - -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) - if (IS_R200_CLASS(screen)) - return r200CreateContext(glVisual, driContextPriv, sharedContextPriv); -#endif - -#if !RADEON_COMMON - (void)screen; - return r100CreateContext(glVisual, driContextPriv, sharedContextPriv); -#endif - return GL_FALSE; -} - /** * This is the driver specific part of the createNewScreen entry point. @@ -1610,22 +1589,22 @@ static GLboolean radeonCreateContext(const __GLcontextModes * glVisual, static const __DRIconfig ** radeonInitScreen(__DRIscreenPrivate *psp) { -#if !RADEON_COMMON +#if defined(RADEON_R100) static const char *driver_name = "Radeon"; static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 1, 6, 0 }; -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#elif defined(RADEON_R200) static const char *driver_name = "R200"; static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 1, 6, 0 }; -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#elif defined(RADEON_R300) static const char *driver_name = "R300"; static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 1, 24, 0 }; -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#elif defined(RADEON_R600) static const char *driver_name = "R600"; static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; @@ -1651,13 +1630,13 @@ radeonInitScreen(__DRIscreenPrivate *psp) * Hello chicken. Hello egg. How are you two today? */ driInitExtensions( NULL, card_extensions, GL_FALSE ); -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) driInitExtensions( NULL, blend_extensions, GL_FALSE ); driInitSingleExtension( NULL, ARB_vp_extension ); driInitSingleExtension( NULL, NV_vp_extension ); driInitSingleExtension( NULL, ATI_fs_extension ); driInitExtensions( NULL, point_extensions, GL_FALSE ); -#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600)) +#elif (defined(RADEON_R300) || defined(RADEON_R600)) driInitSingleExtension( NULL, gl_20_extension ); #endif @@ -1705,12 +1684,14 @@ __DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp) */ driInitExtensions( NULL, card_extensions, GL_FALSE ); driInitExtensions( NULL, mm_extensions, GL_FALSE ); -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#if defined(RADEON_R200) driInitExtensions( NULL, blend_extensions, GL_FALSE ); driInitSingleExtension( NULL, ARB_vp_extension ); driInitSingleExtension( NULL, NV_vp_extension ); driInitSingleExtension( NULL, ATI_fs_extension ); driInitExtensions( NULL, point_extensions, GL_FALSE ); +#elif (defined(RADEON_R300) || defined(RADEON_R600)) + driInitSingleExtension( NULL, gl_20_extension ); #endif if (!radeonInitDriver(psp)) { @@ -1791,8 +1772,19 @@ getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) const struct __DriverAPIRec driDriverAPI = { .InitScreen = radeonInitScreen, .DestroyScreen = radeonDestroyScreen, - .CreateContext = radeonCreateContext, +#if defined(RADEON_R200) + .CreateContext = r200CreateContext, + .DestroyContext = r200DestroyContext, +#elif defined(RADEON_R600) + .CreateContext = r600CreateContext, .DestroyContext = radeonDestroyContext, +#elif defined(RADEON_R300) + .CreateContext = r300CreateContext, + .DestroyContext = radeonDestroyContext, +#else + .CreateContext = r100CreateContext, + .DestroyContext = radeonDestroyContext, +#endif .CreateBuffer = radeonCreateBuffer, .DestroyBuffer = radeonDestroyBuffer, .SwapBuffers = radeonSwapBuffers, diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index 2a2f6b1b0b..15744e8828 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -99,6 +99,7 @@ typedef struct radeon_screen { GLboolean drmSupportsPointSprites; /* need radeon kernel module >= 1.13 */ GLboolean drmSupportsCubeMapsR100; /* need radeon kernel module >= 1.15 */ GLboolean drmSupportsVertexProgram; /* need radeon kernel module >= 1.25 */ + GLboolean drmSupportsOcclusionQueries; /* need radeon kernel module >= 1.30 */ GLboolean depthHasSurface; /* Configuration cache with default values for all contexts */ @@ -107,6 +108,7 @@ typedef struct radeon_screen { const __DRIextension *extensions[16]; int num_gb_pipes; + int num_z_pipes; int kernel_mm; drm_radeon_sarea_t *sarea; /* Private SAREA data */ struct radeon_bo_manager *bom; diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index b2a468b4fd..0c49c3713a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -51,6 +51,196 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); + +/* r200 depth buffer is always tiled - this is the formula + according to the docs unless I typo'ed in it +*/ +#if defined(RADEON_R200) +static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset; + if (rrb->has_surface) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + GLuint b; + offset = 0; + b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6))); + offset += (b >> 1) << 12; + offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; + offset += ((y >> 2) & 0x3) << 9; + offset += ((x >> 3) & 0x1) << 8; + offset += ((x >> 4) & 0x3) << 6; + offset += ((x >> 2) & 0x1) << 5; + offset += ((y >> 1) & 0x1) << 4; + offset += ((x >> 1) & 0x1) << 3; + offset += (y & 0x1) << 2; + offset += (x & 0x1) << 1; + } + return &ptr[offset]; +} + +static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset; + if (rrb->has_surface) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + GLuint b; + offset = 0; + b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5)); + offset += (b >> 1) << 12; + offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; + offset += ((y >> 2) & 0x3) << 9; + offset += ((x >> 2) & 0x1) << 8; + offset += ((x >> 3) & 0x3) << 6; + offset += ((y >> 1) & 0x1) << 5; + offset += ((x >> 1) & 0x1) << 4; + offset += (y & 0x1) << 3; + offset += (x & 0x1) << 2; + } + return &ptr[offset]; +} +#endif + +/* r600 tiling + * two main types: + * - 1D (akin to macro-linear/micro-tiled on older asics) + * - 2D (akin to macro-tiled/micro-tiled on older asics) + * only 1D tiling is implemented below + */ +#if defined(RADEON_R600) +static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, + GLint x, GLint y, GLint is_depth, GLint is_stencil) +{ + GLint element_bytes = rrb->cpp; + GLint num_samples = 1; + GLint tile_width = 8; + GLint tile_height = 8; + GLint tile_thickness = 1; + GLint pitch_elements = rrb->pitch / element_bytes; + GLint height = rrb->base.Height; + GLint z = 0; + GLint sample_number = 0; + /* */ + GLint tile_bytes; + GLint tiles_per_row; + GLint tiles_per_slice; + GLint slice_offset; + GLint tile_row_index; + GLint tile_column_index; + GLint tile_offset; + GLint pixel_number = 0; + GLint element_offset; + GLint offset = 0; + + tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples; + tiles_per_row = pitch_elements / tile_width; + tiles_per_slice = tiles_per_row * (height / tile_height); + slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes; + tile_row_index = y / tile_height; + tile_column_index = x / tile_width; + tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes; + + if (is_depth) { + GLint pixel_offset = 0; + + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0] + pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1] + pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] + pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + switch (element_bytes) { + case 2: + pixel_offset = pixel_number * element_bytes * num_samples; + break; + case 4: + /* stencil and depth data are stored separately within a tile. + * stencil is stored in a contiguous tile before the depth tile. + * stencil element is 1 byte, depth element is 3 bytes. + * stencil tile is 64 bytes. + */ + if (is_stencil) + pixel_offset = pixel_number * 1 * num_samples; + else + pixel_offset = (pixel_number * 3 * num_samples) + 64; + break; + } + element_offset = pixel_offset + (sample_number * element_bytes); + } else { + GLint sample_offset; + + switch (element_bytes) { + case 1: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] + pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] + pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + case 2: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] + pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0] + pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + case 4: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0] + pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2] + pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + } + sample_offset = sample_number * (tile_bytes / num_samples); + element_offset = sample_offset + (pixel_number * element_bytes); + } + offset = slice_offset + tile_offset + element_offset; + return offset; +} + +/* depth buffers */ +static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0); + return &ptr[offset]; +} + +static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1); + return &ptr[offset]; +} + +static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; + GLint offset; + + if (rrb->has_surface || !(rrb->bo->flags & mask)) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + offset = r600_1d_tile_helper(rrb, x, y, 0, 0); + } + return &ptr[offset]; +} + +#else + /* radeon tiling on r300-r500 has 4 states, macro-linear/micro-linear macro-linear/micro-tiled @@ -61,7 +251,6 @@ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); 4 byte surface 8/16 byte (unused) */ - static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { @@ -143,7 +332,10 @@ static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb, return &ptr[offset]; } -#ifndef COMPILE_R300 +#endif + +#ifndef RADEON_R300 +#ifndef RADEON_R600 static uint32_t z24s8_to_s8z24(uint32_t val) { @@ -156,6 +348,7 @@ s8z24_to_z24s8(uint32_t val) return (val >> 24) | (val << 8); } #endif +#endif /* * Note that all information needed to access pixels in a renderbuffer @@ -216,7 +409,11 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_RGB565 #define TAG2(x,y) radeon##x##_RGB565##y +#if defined(RADEON_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif #include "spantmp2.h" /* 16 bit, ARGB1555 color spanline and pixel functions @@ -226,7 +423,11 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB1555 #define TAG2(x,y) radeon##x##_ARGB1555##y +#if defined(RADEON_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif #include "spantmp2.h" /* 16 bit, RGBA4 color spanline and pixel functions @@ -236,7 +437,11 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB4444 #define TAG2(x,y) radeon##x##_ARGB4444##y +#if defined(RADEON_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif #include "spantmp2.h" /* 32 bit, xRGB8888 color spanline and pixel functions @@ -246,11 +451,19 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_xRGB8888 #define TAG2(x,y) radeon##x##_xRGB8888##y +#if defined(RADEON_R600) +#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000)) +#define PUT_VALUE(_x, _y, d) { \ + GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) +#else #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000)) #define PUT_VALUE(_x, _y, d) { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ *_ptr = d; \ } while (0) +#endif #include "spantmp2.h" /* 32 bit, ARGB8888 color spanline and pixel functions @@ -260,11 +473,19 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB8888 #define TAG2(x,y) radeon##x##_ARGB8888##y +#if defined(RADEON_R600) +#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))) +#define PUT_VALUE(_x, _y, d) { \ + GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) +#else #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) #define PUT_VALUE(_x, _y, d) { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ *_ptr = d; \ } while (0) +#endif #include "spantmp2.h" /* ================================================================ @@ -285,11 +506,27 @@ s8z24_to_z24s8(uint32_t val) */ #define VALUE_TYPE GLushort +#if defined(RADEON_R200) +#define WRITE_DEPTH( _x, _y, d ) \ + *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d +#elif defined(RADEON_R600) +#define WRITE_DEPTH( _x, _y, d ) \ + *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d +#else #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d +#endif +#if defined(RADEON_R200) +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) +#elif defined(RADEON_R600) +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) +#else #define READ_DEPTH( d, _x, _y ) \ d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) +#endif #define TAG(x) radeon##x##_z16 #include "depthtmp.h" @@ -301,7 +538,7 @@ s8z24_to_z24s8(uint32_t val) */ #define VALUE_TYPE GLuint -#ifdef COMPILE_R300 +#if defined(RADEON_R300) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ @@ -310,6 +547,24 @@ do { \ tmp |= ((d << 8) & 0xffffff00); \ *_ptr = tmp; \ } while (0) +#elif defined(RADEON_R600) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ + *_ptr = tmp; \ +} while (0) +#elif defined(RADEON_R200) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ + *_ptr = tmp; \ +} while (0) #else #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -321,19 +576,26 @@ do { \ } while (0) #endif -#ifdef COMPILE_R300 +#if defined(RADEON_R300) #define READ_DEPTH( d, _x, _y ) \ do { \ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \ }while(0) +#elif defined(RADEON_R600) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \ + }while(0) +#elif defined(RADEON_R200) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \ + }while(0) #else #define READ_DEPTH( d, _x, _y ) \ d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; #endif -/* - fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\ - d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff; -*/ + #define TAG(x) radeon##x##_z24 #include "depthtmp.h" @@ -345,12 +607,33 @@ do { \ */ #define VALUE_TYPE GLuint -#ifdef COMPILE_R300 +#if defined(RADEON_R300) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ *_ptr = d; \ } while (0) +#elif defined(RADEON_R600) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + tmp &= 0xff000000; \ + tmp |= (((d) >> 8) & 0x00ffffff); \ + *_ptr = tmp; \ + _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ + tmp = *_ptr; \ + tmp &= 0xffffff00; \ + tmp |= (d) & 0xff; \ + *_ptr = tmp; \ +} while (0) +#elif defined(RADEON_R200) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = z24s8_to_s8z24(d); \ + *_ptr = tmp; \ +} while (0) #else #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -360,20 +643,28 @@ do { \ } while (0) #endif -#ifdef COMPILE_R300 +#if defined(RADEON_R300) #define READ_DEPTH( d, _x, _y ) \ do { \ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \ }while(0) +#elif defined(RADEON_R600) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \ + d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \ + }while(0) +#elif defined(RADEON_R200) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \ + }while(0) #else #define READ_DEPTH( d, _x, _y ) do { \ d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \ } while (0) #endif -/* - fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\ - d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff; -*/ + #define TAG(x) radeon##x##_z24_s8 #include "depthtmp.h" @@ -383,7 +674,7 @@ do { \ /* 24 bit depth, 8 bit stencil depthbuffer functions */ -#ifdef COMPILE_R300 +#ifdef RADEON_R300 #define WRITE_STENCIL( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \ @@ -392,6 +683,24 @@ do { \ tmp |= (d) & 0xff; \ *_ptr = tmp; \ } while (0) +#elif defined(RADEON_R600) +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ + GLuint tmp = *_ptr; \ + tmp &= 0xffffff00; \ + tmp |= (d) & 0xff; \ + *_ptr = tmp; \ +} while (0) +#elif defined(RADEON_R200) +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \ + GLuint tmp = *_ptr; \ + tmp &= 0x00ffffff; \ + tmp |= (((d) & 0xff) << 24); \ + *_ptr = tmp; \ +} while (0) #else #define WRITE_STENCIL( _x, _y, d ) \ do { \ @@ -403,13 +712,27 @@ do { \ } while (0) #endif -#ifdef COMPILE_R300 +#ifdef RADEON_R300 #define READ_STENCIL( d, _x, _y ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ GLuint tmp = *_ptr; \ d = tmp & 0x000000ff; \ } while (0) +#elif defined(RADEON_R600) +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + d = tmp & 0x000000ff; \ +} while (0) +#elif defined(RADEON_R200) +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + d = (tmp & 0xff000000) >> 24; \ +} while (0) #else #define READ_STENCIL( d, _x, _y ) \ do { \ diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index 0d1728b747..4d0d35ee0c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/state.h" #include "main/context.h" #include "main/framebuffer.h" +#include "main/simple_list.h" #include "vbo/vbo.h" #include "tnl/tnl.h" @@ -549,31 +550,6 @@ static void radeonPolygonOffset( GLcontext *ctx, rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32; } -static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask ) -{ - r100ContextPtr rmesa = R100_CONTEXT(ctx); - GLuint i; - drm_radeon_stipple_t stipple; - - /* Must flip pattern upside down. - */ - for ( i = 0 ; i < 32 ; i++ ) { - rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i]; - } - - /* TODO: push this into cmd mechanism - */ - radeon_firevertices(&rmesa->radeon); - LOCK_HARDWARE( &rmesa->radeon ); - - /* FIXME: Use window x,y offsets into stipple RAM. - */ - stipple.mask = rmesa->state.stipple.mask; - drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, - &stipple, sizeof(drm_radeon_stipple_t) ); - UNLOCK_HARDWARE( &rmesa->radeon ); -} - static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); @@ -838,7 +814,7 @@ void radeonUpdateMaterial( GLcontext *ctx ) if (ctx->Light.ColorMaterialEnabled) mask &= ~ctx->Light.ColorMaterialBitmask; - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "%s\n", __FUNCTION__); @@ -1572,7 +1548,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint p, flag; - if ( RADEON_DEBUG & DEBUG_STATE ) + if ( RADEON_DEBUG & RADEON_STATE ) fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr( cap ), state ? "GL_TRUE" : "GL_FALSE" ); @@ -1866,7 +1842,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx ) GLboolean tmp; RADEON_STATECHANGE( rmesa, tcl ); - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]); @@ -1881,7 +1857,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx ) rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS; } - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & RADEON_STATE) fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]); } @@ -2099,7 +2075,7 @@ static GLboolean r100ValidateBuffers(GLcontext *ctx) RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } - ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) return GL_FALSE; return GL_TRUE; @@ -2221,12 +2197,28 @@ static void radeonWrapRunPipeline( GLcontext *ctx ) } } +static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask ) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + GLint i; + + radeon_firevertices(&r100->radeon); + + RADEON_STATECHANGE(r100, stp); + + /* Must flip pattern upside down. + */ + for ( i = 31 ; i >= 0; i--) { + r100->hw.stp.cmd[3 + i] = ((GLuint *) mask)[i]; + } +} + /* Initialize the driver's state functions. * Many of the ctx->Driver functions might have been initialized to * software defaults in the earlier _mesa_init_driver_functions() call. */ -void radeonInitStateFuncs( GLcontext *ctx ) +void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2 ) { ctx->Driver.UpdateState = radeonInvalidateState; ctx->Driver.LightingSpaceChange = radeonLightingSpaceChange; @@ -2259,7 +2251,10 @@ void radeonInitStateFuncs( GLcontext *ctx ) ctx->Driver.LogicOpcode = radeonLogicOpCode; ctx->Driver.PolygonMode = radeonPolygonMode; ctx->Driver.PolygonOffset = radeonPolygonOffset; - ctx->Driver.PolygonStipple = radeonPolygonStipple; + if (dri2) + ctx->Driver.PolygonStipple = radeonPolygonStipple; + else + ctx->Driver.PolygonStipple = radeonPolygonStipplePreKMS; ctx->Driver.RenderMode = radeonRenderMode; ctx->Driver.Scissor = radeonScissor; ctx->Driver.ShadeModel = radeonShadeModel; diff --git a/src/mesa/drivers/dri/radeon/radeon_state.h b/src/mesa/drivers/dri/radeon/radeon_state.h index a7c8eef32a..c780cff0cf 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.h +++ b/src/mesa/drivers/dri/radeon/radeon_state.h @@ -40,7 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_context.h" extern void radeonInitState( r100ContextPtr rmesa ); -extern void radeonInitStateFuncs( GLcontext *ctx ); +extern void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2); extern void radeonUpdateMaterial( GLcontext *ctx ); diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c index 57aa7f1ca4..f3ad0dd17a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -44,6 +44,7 @@ #include "radeon_tcl.h" #include "radeon_tex.h" #include "radeon_swtcl.h" +#include "radeon_queryobj.h" #include "../r200/r200_reg.h" @@ -197,55 +198,85 @@ static int cmdscl( int offset, int stride, int count ) return h.i; } -#define CHECK( NM, FLAG ) \ +#define CHECK( NM, FLAG, ADD ) \ static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ { \ - return FLAG ? atom->cmd_size : 0; \ + return FLAG ? atom->cmd_size + (ADD) : 0; \ } -#define TCL_CHECK( NM, FLAG ) \ +#define TCL_CHECK( NM, FLAG, ADD ) \ static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ { \ r100ContextPtr rmesa = R100_CONTEXT(ctx); \ - return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0; \ + return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0; \ } -CHECK( always, GL_TRUE ) -CHECK( never, GL_FALSE ) -CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled ) -CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled ) +CHECK( always, GL_TRUE, 0 ) +CHECK( always_add2, GL_TRUE, 2 ) +CHECK( always_add4, GL_TRUE, 4 ) +CHECK( never, GL_FALSE, 0 ) +CHECK( tex0_mm, ctx->Texture.Unit[0]._ReallyEnabled, 3 ) +CHECK( tex1_mm, ctx->Texture.Unit[1]._ReallyEnabled, 3 ) /* need this for the cubic_map on disabled unit 2 bug, maybe r100 only? */ -CHECK( tex2, ctx->Texture._EnabledUnits ) -CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT)) -CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT)) -CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT)) -CHECK( fog, ctx->Fog.Enabled ) -TCL_CHECK( tcl, GL_TRUE ) -TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled ) -TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled ) -TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled ) -TCL_CHECK( tcl_lighting, ctx->Light.Enabled ) -TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled ) -TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled ) -TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled ) -TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled ) -TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled ) -TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled ) -TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled ) -TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled ) -TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled ) -TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1) ) -TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2) ) -TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4) ) -TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8) ) -TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10) ) -TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20) ) -TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled ) - -CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT)) -CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT)) -CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT)) +CHECK( tex2_mm, ctx->Texture._EnabledUnits, 3 ) +CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled, 2 ) +CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled, 2 ) +CHECK( tex2, ctx->Texture._EnabledUnits, 2 ) +CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE ) +CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE ) +CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE ) +CHECK( cube0_mm, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE ) +CHECK( cube1_mm, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE ) +CHECK( cube2_mm, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE ) +CHECK( fog, ctx->Fog.Enabled, 0 ) +CHECK( fog_add4, ctx->Fog.Enabled, 4 ) +TCL_CHECK( tcl, GL_TRUE, 0 ) +TCL_CHECK( tcl_add4, GL_TRUE, 4 ) +TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled, 0 ) +TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled, 0 ) +TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled, 0 ) +TCL_CHECK( tcl_tex0_add4, ctx->Texture.Unit[0]._ReallyEnabled, 4 ) +TCL_CHECK( tcl_tex1_add4, ctx->Texture.Unit[1]._ReallyEnabled, 4 ) +TCL_CHECK( tcl_tex2_add4, ctx->Texture.Unit[2]._ReallyEnabled, 4 ) +TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 ) +TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 ) +TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled, 0 ) +TCL_CHECK( tcl_eyespace_or_lighting_add4, ctx->_NeedEyeCoords || ctx->Light.Enabled, 4 ) +TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 0 ) +TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 0 ) +TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 0 ) +TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 0 ) +TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 0 ) +TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 0 ) +TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 0 ) +TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 0 ) +TCL_CHECK( tcl_lit0_add6, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 6 ) +TCL_CHECK( tcl_lit1_add6, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 6 ) +TCL_CHECK( tcl_lit2_add6, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 6 ) +TCL_CHECK( tcl_lit3_add6, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 6 ) +TCL_CHECK( tcl_lit4_add6, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 6 ) +TCL_CHECK( tcl_lit5_add6, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 6 ) +TCL_CHECK( tcl_lit6_add6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 6 ) +TCL_CHECK( tcl_lit7_add6, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 6 ) +TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1), 0 ) +TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2), 0 ) +TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4), 0 ) +TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8), 0 ) +TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10), 0 ) +TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20), 0 ) +TCL_CHECK( tcl_ucp0_add4, (ctx->Transform.ClipPlanesEnabled & 0x1), 4 ) +TCL_CHECK( tcl_ucp1_add4, (ctx->Transform.ClipPlanesEnabled & 0x2), 4 ) +TCL_CHECK( tcl_ucp2_add4, (ctx->Transform.ClipPlanesEnabled & 0x4), 4 ) +TCL_CHECK( tcl_ucp3_add4, (ctx->Transform.ClipPlanesEnabled & 0x8), 4 ) +TCL_CHECK( tcl_ucp4_add4, (ctx->Transform.ClipPlanesEnabled & 0x10), 4 ) +TCL_CHECK( tcl_ucp5_add4, (ctx->Transform.ClipPlanesEnabled & 0x20), 4 ) +TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 0 ) +TCL_CHECK( tcl_eyespace_or_fog_add4, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 4 ) + +CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT), 0 ) +CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT), 0 ) +CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT), 0 ) #define OUT_VEC(hdr, data) do { \ drm_radeon_cmd_header_t h; \ @@ -271,9 +302,8 @@ static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r100ContextPtr r100 = R100_CONTEXT(ctx); BATCH_LOCALS(&r100->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 2; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_SCL(atom->cmd[0], atom->cmd+1); END_BATCH(); @@ -284,9 +314,8 @@ static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r100ContextPtr r100 = R100_CONTEXT(ctx); BATCH_LOCALS(&r100->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 4; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_VEC(atom->cmd[0], atom->cmd+1); END_BATCH(); @@ -297,9 +326,8 @@ static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r100ContextPtr r100 = R100_CONTEXT(ctx); BATCH_LOCALS(&r100->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 6; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1); OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1); @@ -313,10 +341,10 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom) struct radeon_renderbuffer *rrb; uint32_t cbpitch; uint32_t zbpitch, depth_fmt; - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); /* output the first 7 bytes of context */ - BEGIN_BATCH_NO_AUTOSTATE(dwords + 4); + BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_BATCH_TABLE(atom->cmd, 5); rrb = radeon_get_depthbuffer(&r100->radeon); @@ -371,6 +399,28 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom) END_BATCH(); } +static int check_always_ctx( GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + struct radeon_renderbuffer *rrb, *drb; + uint32_t dwords; + + rrb = radeon_get_colorbuffer(&r100->radeon); + if (!rrb || !rrb->bo) { + return 0; + } + + drb = radeon_get_depthbuffer(&r100->radeon); + + dwords = 10; + if (drb) + dwords += 6; + if (rrb) + dwords += 8; + + return dwords; +} + static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) { r100ContextPtr r100 = R100_CONTEXT(ctx); @@ -378,7 +428,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) struct radeon_renderbuffer *rrb, *drb; uint32_t cbpitch = 0; uint32_t zbpitch = 0; - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); uint32_t depth_fmt; rrb = radeon_get_colorbuffer(&r100->radeon); @@ -418,12 +468,6 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) } - /* output the first 7 bytes of context */ - dwords = 10; - if (drb) - dwords += 6; - if (rrb) - dwords += 8; BEGIN_BATCH_NO_AUTOSTATE(dwords); /* In the CS case we need to split this up */ @@ -474,7 +518,7 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r100ContextPtr r100 = R100_CONTEXT(ctx); BATCH_LOCALS(&r100->radeon); - uint32_t dwords = 3; + uint32_t dwords = atom->check(ctx, atom); int i = atom->idx, j; radeonTexObj *t = r100->state.texture.unit[i].texobj; radeon_mipmap_level *lvl; @@ -488,7 +532,7 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) if (!t->mt) return; - BEGIN_BATCH_NO_AUTOSTATE(dwords + (5 * 3)); + BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_BATCH_TABLE(atom->cmd, 3); lvl = &t->mt->levels[0]; for (j = 0; j < 5; j++) { @@ -502,7 +546,7 @@ static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) { r100ContextPtr r100 = R100_CONTEXT(ctx); BATCH_LOCALS(&r100->radeon); - uint32_t dwords = 2; + uint32_t dwords = atom->check(ctx, atom); int i = atom->idx, j; radeonTexObj *t = r100->state.texture.unit[i].texobj; radeon_mipmap_level *lvl; @@ -523,13 +567,13 @@ static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) default: case 0: base_reg = RADEON_PP_CUBIC_OFFSET_T0_0; break; }; - BEGIN_BATCH_NO_AUTOSTATE(dwords + (5 * 4)); + BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_BATCH_TABLE(atom->cmd, 2); lvl = &t->mt->levels[0]; for (j = 0; j < 5; j++) { OUT_BATCH(CP_PACKET0(base_reg + (4 * j), 0)); OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset, - RADEON_GEM_DOMAIN_VRAM, 0, 0); + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); } END_BATCH(); } @@ -665,10 +709,11 @@ void radeonInitState( r100ContextPtr rmesa ) /* Allocate state buffers: */ - ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 ); - if (rmesa->radeon.radeonScreen->kernel_mm) + ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE, "CTX/context", 0 ); + if (rmesa->radeon.radeonScreen->kernel_mm) { rmesa->hw.ctx.emit = ctx_emit_cs; - else + rmesa->hw.ctx.check = check_always_ctx; + } else rmesa->hw.ctx.emit = ctx_emit; ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 ); ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); @@ -678,14 +723,68 @@ void radeonInitState( r100ContextPtr rmesa ) ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 ); ALLOC_STATE( tcl, always, TCL_STATE_SIZE, "TCL/tcl", 1 ); ALLOC_STATE( mtl, tcl_lighting, MTL_STATE_SIZE, "MTL/material", 1 ); - ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 ); - ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 ); - ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 ); - ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 ); - ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0); - ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1); - ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2); + if (rmesa->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE( grd, always_add2, GRD_STATE_SIZE, "GRD/guard-band", 1 ); + ALLOC_STATE( fog, fog_add4, FOG_STATE_SIZE, "FOG/fog", 1 ); + ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 1 ); + ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 1 ); + ALLOC_STATE_IDX( tex[0], tex0_mm, TEX_STATE_SIZE, "TEX/tex-0", 0, 0); + ALLOC_STATE_IDX( tex[1], tex1_mm, TEX_STATE_SIZE, "TEX/tex-1", 0, 1); + ALLOC_STATE_IDX( tex[2], tex2_mm, TEX_STATE_SIZE, "TEX/tex-2", 0, 2); + ALLOC_STATE( mat[0], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 1 ); + ALLOC_STATE( mat[1], tcl_eyespace_or_fog_add4, MAT_STATE_SIZE, "MAT/modelview", 1 ); + ALLOC_STATE( mat[2], tcl_eyespace_or_lighting_add4, MAT_STATE_SIZE, "MAT/it-modelview", 1 ); + ALLOC_STATE( mat[3], tcl_tex0_add4, MAT_STATE_SIZE, "MAT/texmat0", 1 ); + ALLOC_STATE( mat[4], tcl_tex1_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 ); + ALLOC_STATE( mat[5], tcl_tex2_add4, MAT_STATE_SIZE, "MAT/texmat2", 1 ); + ALLOC_STATE( lit[0], tcl_lit0_add6, LIT_STATE_SIZE, "LIT/light-0", 1 ); + ALLOC_STATE( lit[1], tcl_lit1_add6, LIT_STATE_SIZE, "LIT/light-1", 1 ); + ALLOC_STATE( lit[2], tcl_lit2_add6, LIT_STATE_SIZE, "LIT/light-2", 1 ); + ALLOC_STATE( lit[3], tcl_lit3_add6, LIT_STATE_SIZE, "LIT/light-3", 1 ); + ALLOC_STATE( lit[4], tcl_lit4_add6, LIT_STATE_SIZE, "LIT/light-4", 1 ); + ALLOC_STATE( lit[5], tcl_lit5_add6, LIT_STATE_SIZE, "LIT/light-5", 1 ); + ALLOC_STATE( lit[6], tcl_lit6_add6, LIT_STATE_SIZE, "LIT/light-6", 1 ); + ALLOC_STATE( lit[7], tcl_lit7_add6, LIT_STATE_SIZE, "LIT/light-7", 1 ); + ALLOC_STATE( ucp[0], tcl_ucp0_add4, UCP_STATE_SIZE, "UCP/userclip-0", 1 ); + ALLOC_STATE( ucp[1], tcl_ucp1_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); + ALLOC_STATE( ucp[2], tcl_ucp2_add4, UCP_STATE_SIZE, "UCP/userclip-2", 1 ); + ALLOC_STATE( ucp[3], tcl_ucp3_add4, UCP_STATE_SIZE, "UCP/userclip-3", 1 ); + ALLOC_STATE( ucp[4], tcl_ucp4_add4, UCP_STATE_SIZE, "UCP/userclip-4", 1 ); + ALLOC_STATE( ucp[5], tcl_ucp5_add4, UCP_STATE_SIZE, "UCP/userclip-5", 1 ); + } else { + ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 ); + ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 ); + ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 ); + ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 ); + ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0); + ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1); + ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2); + ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 ); + ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 ); + ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 ); + ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 ); + ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 ); + ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 ); + ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 ); + ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 ); + ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 ); + ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 ); + ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 ); + ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 ); + ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 ); + ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 ); + ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 ); + ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); + ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 ); + ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 ); + ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 ); + ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 ); + } + if (rmesa->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 ); + } + for (i = 0; i < 3; i++) { if (rmesa->radeon.radeonScreen->kernel_mm) rmesa->hw.tex[i].emit = tex_emit_cs; @@ -694,14 +793,19 @@ void radeonInitState( r100ContextPtr rmesa ) } if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100) { - ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 ); - ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 ); - ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 ); - for (i = 0; i < 3; i++) - if (rmesa->radeon.radeonScreen->kernel_mm) - rmesa->hw.cube[i].emit = cube_emit_cs; - else - rmesa->hw.cube[i].emit = cube_emit; + if (rmesa->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE_IDX( cube[0], cube0_mm, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 ); + ALLOC_STATE_IDX( cube[1], cube1_mm, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 ); + ALLOC_STATE_IDX( cube[2], cube2_mm, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 ); + for (i = 0; i < 3; i++) + rmesa->hw.cube[i].emit = cube_emit_cs; + } else { + ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 ); + ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 ); + ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 ); + for (i = 0; i < 3; i++) + rmesa->hw.cube[i].emit = cube_emit; + } } else { @@ -709,26 +813,6 @@ void radeonInitState( r100ContextPtr rmesa ) ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 ); ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 ); } - ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 ); - ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 ); - ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 ); - ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 ); - ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 ); - ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 ); - ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 ); - ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); - ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 ); - ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 ); - ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 ); - ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 ); - ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 ); - ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 ); - ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 ); - ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 ); - ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 ); - ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 ); - ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 ); - ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 ); ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 ); ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 ); ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 ); @@ -793,12 +877,16 @@ void radeonInitState( r100ContextPtr rmesa ) } if (rmesa->radeon.radeonScreen->kernel_mm) { + rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0); + rmesa->hw.stp.cmd[STP_DATA_0] = 0; + rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31); + rmesa->hw.grd.emit = scl_emit; rmesa->hw.fog.emit = vec_emit; rmesa->hw.glt.emit = vec_emit; rmesa->hw.eye.emit = vec_emit; - for (i = 0; i <= 6; i++) + for (i = 0; i < 6; i++) rmesa->hw.mat[i].emit = vec_emit; for (i = 0; i < 8; i++) @@ -1063,7 +1151,13 @@ void radeonInitState( r100ContextPtr rmesa ) rmesa->hw.eye.cmd[EYE_Y] = 0; rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE; rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE; - + + if (rmesa->radeon.radeonScreen->kernel_mm) { + radeon_init_query_stateobj(&rmesa->radeon, R100_QUERYOBJ_CMDSIZE); + rmesa->radeon.query.queryobj.cmd[R100_QUERYOBJ_CMD_0] = CP_PACKET0(RADEON_RB3D_ZPASS_DATA, 0); + rmesa->radeon.query.queryobj.cmd[R100_QUERYOBJ_DATA_0] = 0; + } + rmesa->radeon.hw.all_dirty = GL_TRUE; rcommonInitCmdBuf(&rmesa->radeon); diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index e31f045991..e61f59eaea 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/enums.h" #include "main/imports.h" #include "main/macros.h" +#include "main/simple_list.h" #include "swrast_setup/swrast_setup.h" #include "math/m_translate.h" @@ -50,6 +51,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_state.h" #include "radeon_swtcl.h" #include "radeon_tcl.h" +#include "radeon_debug.h" /* R100: xyzw, c0, c1/fog, stq[0..2] = 4+1+1+3*3 = 15 right? */ @@ -213,22 +215,41 @@ static void radeonSetVertexFormat( GLcontext *ctx ) NULL, 0 ); rmesa->radeon.swtcl.vertex_size /= 4; RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset ); - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf( stderr, "%s: vertex_size= %d floats\n", - __FUNCTION__, rmesa->radeon.swtcl.vertex_size); + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, + "%s: vertex_size= %d floats\n", __FUNCTION__, rmesa->radeon.swtcl.vertex_size); } } +static void radeon_predict_emit_size( r100ContextPtr rmesa ) +{ + + if (!rmesa->radeon.swtcl.emit_prediction) { + const int state_size = radeonCountStateEmitSize( &rmesa->radeon ); + const int scissor_size = 8; + const int prims_size = 8; + const int vertex_size = 7; + + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, + state_size + + (scissor_size + prims_size + vertex_size), + __FUNCTION__)) + rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize( &rmesa->radeon ); + else + rmesa->radeon.swtcl.emit_prediction = state_size; + rmesa->radeon.swtcl.emit_prediction += scissor_size + prims_size + vertex_size + + rmesa->radeon.cmdbuf.cs->cdw; + } +} static void radeonRenderStart( GLcontext *ctx ) { - r100ContextPtr rmesa = R100_CONTEXT( ctx ); + r100ContextPtr rmesa = R100_CONTEXT( ctx ); - radeonSetVertexFormat( ctx ); - - if (rmesa->radeon.dma.flush != 0 && - rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim) - rmesa->radeon.dma.flush( ctx ); + radeonSetVertexFormat( ctx ); + + if (rmesa->radeon.dma.flush != 0 && + rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim) + rmesa->radeon.dma.flush( ctx ); } @@ -283,15 +304,12 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - rcommonEnsureCmdBufSpace(&rmesa->radeon, - rmesa->radeon.hw.max_state_size + (12*sizeof(int)), - __FUNCTION__); radeonEmitState(&rmesa->radeon); radeonEmitVertexAOS( rmesa, rmesa->radeon.swtcl.vertex_size, - rmesa->radeon.dma.current, + first_elem(&rmesa->radeon.dma.reserved)->bo, current_offset); @@ -299,6 +317,13 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset) rmesa->swtcl.vertex_format, rmesa->radeon.swtcl.hw_primitive, rmesa->radeon.swtcl.numverts); + if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", + rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); + + + rmesa->radeon.swtcl.emit_prediction = 0; } @@ -341,6 +366,16 @@ radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim ) // assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start); } +static void* radeon_alloc_verts( r100ContextPtr rmesa , GLuint nr, GLuint size ) +{ + void *rv; + do { + radeon_predict_emit_size( rmesa ); + rv = rcommonAllocDmaLowVerts( &rmesa->radeon, nr, size ); + } while (!rv); + return rv; +} + #define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx) #define INIT( prim ) radeonDmaPrimitive( rmesa, prim ) #define FLUSH() RADEON_NEWPRIM( rmesa ) @@ -348,8 +383,7 @@ radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim ) // (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4)) #define GET_SUBSEQUENT_VB_MAX_VERTS() \ ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4)) -#define ALLOC_VERTS( nr ) \ - rcommonAllocDmaLowVerts( &rmesa->radeon, nr, rmesa->radeon.swtcl.vertex_size * 4 ) +#define ALLOC_VERTS( nr ) radeon_alloc_verts( rmesa, nr, rmesa->radeon.swtcl.vertex_size * 4 ) #define EMIT_VERTS( ctx, j, nr, buf ) \ _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf) @@ -386,8 +420,8 @@ static GLboolean radeon_run_render( GLcontext *ctx, if (!length) continue; - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "radeon_render.c: prim %s %d..%d\n", + radeon_print(RADEON_SWRENDER, RADEON_NORMAL, + "radeon_render.c: prim %s %d..%d\n", _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK), start, start+length); @@ -442,7 +476,7 @@ static void radeonResetLineStipple( GLcontext *ctx ); #undef ALLOC_VERTS #define CTX_ARG r100ContextPtr rmesa #define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size -#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, (size) * 4 ) +#define ALLOC_VERTS( n, size ) radeon_alloc_verts( rmesa, n, (size) * 4 ) #undef LOCAL_VARS #define LOCAL_VARS \ r100ContextPtr rmesa = R100_CONTEXT(ctx); \ @@ -551,7 +585,7 @@ do { \ #define LOCAL_VARS(n) \ r100ContextPtr rmesa = R100_CONTEXT(ctx); \ - GLuint color[n], spec[n]; \ + GLuint color[n] = {0}, spec[n] = {0}; \ GLuint coloroffset = rmesa->swtcl.coloroffset; \ GLuint specoffset = rmesa->swtcl.specoffset; \ (void) color; (void) spec; (void) coloroffset; (void) specoffset; @@ -750,7 +784,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE ); _swsetup_Wakeup( ctx ); rmesa->radeon.swtcl.RenderIndex = ~0; - if (RADEON_DEBUG & DEBUG_FALLBACKS) { + if (RADEON_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n", bit, getFallbackString(bit)); } @@ -781,7 +815,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) radeonChooseVertexState( ctx ); radeonChooseRenderState( ctx ); } - if (RADEON_DEBUG & DEBUG_FALLBACKS) { + if (RADEON_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "Radeon end rasterization fallback: 0x%x %s\n", bit, getFallbackString(bit)); } @@ -804,6 +838,7 @@ void radeonInitSwtcl( GLcontext *ctx ) init_rast_tab(); firsttime = 0; } + rmesa->radeon.swtcl.emit_prediction = 0; tnl->Driver.Render.Start = radeonRenderStart; tnl->Driver.Render.Finish = radeonRenderFinish; diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index df6708f05e..b334ea05e5 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -50,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_tcl.h" #include "radeon_swtcl.h" #include "radeon_maos.h" +#include "radeon_common_context.h" @@ -149,9 +150,6 @@ static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) if (rmesa->radeon.dma.flush) rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); - rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->radeon.hw.max_state_size + ELTS_BUFSZ(nr) + - AOS_BUFSZ(rmesa->radeon.tcl.aos_count), __FUNCTION__); - radeonEmitAOS( rmesa, rmesa->radeon.tcl.aos_count, 0 ); @@ -176,10 +174,6 @@ static void radeonEmitPrim( GLcontext *ctx, r100ContextPtr rmesa = R100_CONTEXT( ctx ); radeonTclPrimitive( ctx, prim, hwprim ); - rcommonEnsureCmdBufSpace( &rmesa->radeon, - AOS_BUFSZ(rmesa->radeon.tcl.aos_count) + - rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ ); - radeonEmitAOS( rmesa, rmesa->radeon.tcl.aos_count, start ); @@ -196,6 +190,8 @@ static void radeonEmitPrim( GLcontext *ctx, radeonEmitPrim( ctx, prim, hwprim, start, count ); \ (void) rmesa; } while (0) +#define MAX_CONVERSION_SIZE 40 + /* Try & join small primitives */ #if 0 @@ -360,6 +356,73 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) } } +/** + * Predict total emit size for next rendering operation so there is no flush in middle of rendering + * Prediction has to aim towards the best possible value that is worse than worst case scenario + */ +static GLuint radeonEnsureEmitSize( GLcontext * ctx , GLuint inputs ) +{ + r100ContextPtr rmesa = R100_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint space_required; + GLuint state_size; + GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */ + int i; + /* list of flags that are allocating aos object */ + const GLuint flags_to_check[] = { + VERT_BIT_NORMAL, + VERT_BIT_COLOR0, + VERT_BIT_COLOR1, + VERT_BIT_FOG + }; + /* predict number of aos to emit */ + for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i) + { + if (inputs & flags_to_check[i]) + ++nr_aos; + } + for (i = 0; i < ctx->Const.MaxTextureUnits; ++i) + { + if (inputs & VERT_BIT_TEX(i)) + ++nr_aos; + } + + { + /* count the prediction for state size */ + space_required = 0; + state_size = radeonCountStateEmitSize( &rmesa->radeon ); + /* tcl may be changed in radeonEmitArrays so account for it if not dirty */ + if (!rmesa->hw.tcl.dirty) + state_size += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl ); + /* predict size for elements */ + for (i = 0; i < VB->PrimitiveCount; ++i) + { + if (!VB->Primitive[i].count) + continue; + /* If primitive.count is less than MAX_CONVERSION_SIZE + rendering code may decide convert to elts. + In that case we have to make pessimistic prediction. + and use larger of 2 paths. */ + const GLuint elts = ELTS_BUFSZ(nr_aos); + const GLuint index = INDEX_BUFSZ; + const GLuint vbuf = VBUF_BUFSZ; + if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) + || vbuf > index + elts) + space_required += vbuf; + else + space_required += index + elts; + space_required += AOS_BUFSZ(nr_aos); + } + space_required += SCISSOR_BUFSZ; + } + /* flush the buffer in case we need more than is left. */ + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__)) + return space_required + radeonCountStateEmitSize( &rmesa->radeon ); + else + return space_required + state_size; +} + /**********************************************************************/ /* Render pipeline stage */ /**********************************************************************/ @@ -410,6 +473,8 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, } radeonReleaseArrays( ctx, ~0 ); + GLuint emit_end = radeonEnsureEmitSize( ctx, inputs ) + + rmesa->radeon.cmdbuf.cs->cdw; radeonEmitArrays( ctx, inputs ); rmesa->tcl.Elts = VB->Elts; @@ -429,6 +494,10 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, radeonEmitPrimitive( ctx, start, start+length, prim ); } + if (emit_end < rmesa->radeon.cmdbuf.cs->cdw) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end); + return GL_FALSE; /* finished the pipe */ } @@ -518,7 +587,7 @@ static void transition_to_hwtnl( GLcontext *ctx ) // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, // __FUNCTION__ ); - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback\n"); } @@ -555,7 +624,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) if (mode) { rmesa->radeon.TclFallback |= bit; if (oldfallback == 0) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "Radeon begin tcl fallback %s\n", getFallbackString( bit )); transition_to_swtnl( ctx ); @@ -564,7 +633,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) else { rmesa->radeon.TclFallback &= ~bit; if (oldfallback == bit) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback %s\n", getFallbackString( bit )); transition_to_hwtnl( ctx ); diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 2549d5cb5c..99865fff27 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -263,7 +263,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target, GLuint unit = ctx->Texture.CurrentUnit; struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - if ( RADEON_DEBUG & DEBUG_STATE ) { + if ( RADEON_DEBUG & RADEON_STATE ) { fprintf( stderr, "%s( %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) ); } @@ -325,10 +325,8 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, { radeonTexObj* t = radeon_tex_obj(texObj); - if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { - fprintf( stderr, "%s( %s )\n", __FUNCTION__, + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) ); - } switch ( pname ) { case GL_TEXTURE_MIN_FILTER: @@ -376,10 +374,9 @@ static void radeonDeleteTexture( GLcontext *ctx, radeonTexObj* t = radeon_tex_obj(texObj); int i; - if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { - fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, + "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, _mesa_lookup_enum_by_nr( texObj->Target ) ); - } if ( rmesa ) { radeon_firevertices(&rmesa->radeon); diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index c29105d7b8..ae41b90efe 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -277,7 +277,7 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit ) assert( (texUnit->_ReallyEnabled == 0) || (texUnit->_Current != NULL) ); - if ( RADEON_DEBUG & DEBUG_TEXTURE ) { + if ( RADEON_DEBUG & RADEON_TEXTURE ) { fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit ); } @@ -833,11 +833,14 @@ static void import_tex_obj_state( r100ContextPtr rmesa, cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK; cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; - if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) { - GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] ); + if (texobj->pp_txformat & RADEON_TXFORMAT_NON_POWER2) { + uint32_t *txr_cmd = &rmesa->hw.txr[unit].cmd[TXR_CMD_0]; txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */ txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */ - RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.txr[unit] ); + RADEON_STATECHANGE( rmesa, txr[unit] ); + } + + if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) { se_coord_fmt |= RADEON_VTX_ST0_NONPARAMETRIC << unit; } else { @@ -933,7 +936,7 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) (texUnit->GenS.Mode != texUnit->GenQ.Mode)) ) { /* Mixed modes, fallback: */ - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "fallback mixed texgen\n"); return GL_FALSE; } @@ -941,7 +944,7 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) } else { /* some texgen mode not including both S and T bits */ - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "fallback mixed texgen/nontexgen\n"); return GL_FALSE; } @@ -991,7 +994,7 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) default: /* Unsupported mode, fallback: */ - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "fallback GL_SPHERE_MAP\n"); return GL_FALSE; } @@ -1114,7 +1117,6 @@ static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_objec RADEON_STATECHANGE( rmesa, ctx ); rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit; - RADEON_STATECHANGE( rmesa, tcl ); rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit); diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index ad501c454c..049284ef8c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -579,7 +579,7 @@ static void radeon_teximage( } if (!t->mt) - radeon_try_alloc_miptree(rmesa, t, texImage, face, level); + radeon_try_alloc_miptree(rmesa, t, image, face, level); if (t->mt && radeon_miptree_matches_image(t->mt, texImage, face, level)) { radeon_mipmap_level *lvl; image->mt = t->mt; @@ -610,9 +610,17 @@ static void radeon_teximage( if (pixels) { radeon_teximage_map(image, GL_TRUE); - if (compressed) { - memcpy(texImage->Data, pixels, imageSize); + if (image->mt) { + uint32_t srcRowStride, bytesPerRow, rows; + srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); + bytesPerRow = srcRowStride; + rows = (height + 3) / 4; + copy_rows(texImage->Data, image->mt->levels[level].rowstride, + pixels, srcRowStride, rows, bytesPerRow); + } else { + memcpy(texImage->Data, pixels, imageSize); + } } else { GLuint dstRowStride; GLuint *dstImageOffsets; @@ -651,11 +659,6 @@ static void radeon_teximage( if (dims == 3) _mesa_free(dstImageOffsets); } - - /* SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - radeon_generate_mipmap(ctx, target, texObj); - } } _mesa_unmap_teximage_pbo(ctx, packing); @@ -756,14 +759,23 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int leve } if (compressed) { - uint32_t srcRowStride, bytesPerRow, rows; - dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width); + uint32_t srcRowStride, bytesPerRow, rows; + GLubyte *img_start; + if (!image->mt) { + dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width); + img_start = _mesa_compressed_image_address(xoffset, yoffset, 0, + texImage->TexFormat->MesaFormat, + texImage->Width, texImage->Data); + } + else { + uint32_t blocks_x = dstRowStride / (image->mt->bpp * 4); + img_start = texImage->Data + image->mt->bpp * 4 * (blocks_x * (yoffset / 4) + xoffset / 4); + } srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); bytesPerRow = srcRowStride; - rows = height / 4; + rows = (height + 3) / 4; - copy_rows(texImage->Data, dstRowStride, image->base.Data, srcRowStride, rows, - bytesPerRow); + copy_rows(img_start, dstRowStride, pixels, srcRowStride, rows, bytesPerRow); } else { if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, @@ -775,11 +787,6 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int leve format, type, pixels, packing)) _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage"); } - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - radeon_generate_mipmap(ctx, target, texObj); - } } radeon_teximage_unmap(image); @@ -884,8 +891,8 @@ static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_texture_imag uint32_t height; /* need to confirm this value is correct */ if (mt->compressed) { - height = image->base.Height / 4; - srcrowstride = image->base.RowStride * mt->bpp; + height = (image->base.Height + 3) / 4; + srcrowstride = _mesa_compressed_row_stride(image->base.TexFormat->MesaFormat, image->base.Width); } else { height = image->base.Height * image->base.Depth; srcrowstride = image->base.Width * image->base.TexFormat->TexelBytes; @@ -919,7 +926,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t if (t->validated || t->image_override) return GL_TRUE; - if (RADEON_DEBUG & DEBUG_TEXTURE) + if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, "%s: Validating texture %p now\n", __FUNCTION__, texObj); if (baseimage->base.Border > 0) @@ -947,11 +954,11 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t } if (!t->mt) { - if (RADEON_DEBUG & DEBUG_TEXTURE) + if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, " Allocate new miptree\n"); - radeon_try_alloc_miptree(rmesa, t, &baseimage->base, 0, texObj->BaseLevel); + radeon_try_alloc_miptree(rmesa, t, baseimage, 0, texObj->BaseLevel); if (!t->mt) { - _mesa_problem(ctx, "r300_validate_texture failed to alloc miptree"); + _mesa_problem(ctx, "radeon_validate_texture failed to alloc miptree"); return GL_FALSE; } } @@ -960,16 +967,16 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t for(face = 0; face < t->mt->faces; ++face) { for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) { radeon_texture_image *image = get_radeon_texture_image(texObj->Image[face][level]); - if (RADEON_DEBUG & DEBUG_TEXTURE) + if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, " face %i, level %i... %p vs %p ", face, level, t->mt, image->mt); if (t->mt == image->mt) { - if (RADEON_DEBUG & DEBUG_TEXTURE) + if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, "OK\n"); continue; } - if (RADEON_DEBUG & DEBUG_TEXTURE) + if (RADEON_DEBUG & RADEON_TEXTURE) fprintf(stderr, "migrating\n"); migrate_image_to_miptree(t->mt, image, face, level); } @@ -1000,6 +1007,8 @@ radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level, } if (compressed) { + /* FIXME: this can't work for small textures (mips) which + use different hw stride */ _mesa_get_compressed_teximage(ctx, target, level, pixels, texObj, texImage); } else { diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h index 866807462a..e81d7fdcd0 100644 --- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h +++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h @@ -1601,6 +1601,8 @@ # define RADEON_STENCIL_VALUE_MASK (0xff << 16) # define RADEON_STENCIL_WRITEMASK_SHIFT 24 # define RADEON_STENCIL_WRITE_MASK (0xff << 24) +#define RADEON_RB3D_ZPASS_DATA 0x3290 +#define RADEON_RB3D_ZPASS_ADDR 0x3294 #define RADEON_RB3D_ZSTENCILCNTL 0x1c2c # define RADEON_DEPTH_FORMAT_MASK (0xf << 0) # define RADEON_DEPTH_FORMAT_16BIT_INT_Z (0 << 0) @@ -1661,6 +1663,9 @@ # define RADEON_FORCE_Z_DIRTY (1 << 29) # define RADEON_Z_WRITE_ENABLE (1 << 30) # define RADEON_Z_DECOMPRESSION_ENABLE (1 << 31) + +#define RADEON_RE_STIPPLE_ADDR 0x1cc8 +#define RADEON_RE_STIPPLE_DATA 0x1ccc #define RADEON_RE_LINE_PATTERN 0x1cd0 # define RADEON_LINE_PATTERN_MASK 0x0000ffff # define RADEON_LINE_REPEAT_COUNT_SHIFT 16 diff --git a/src/mesa/drivers/dri/s3v/s3v_state.c b/src/mesa/drivers/dri/s3v/s3v_state.c index c71c89a3e1..561f42c705 100644 --- a/src/mesa/drivers/dri/s3v/s3v_state.c +++ b/src/mesa/drivers/dri/s3v/s3v_state.c @@ -2,7 +2,6 @@ * Author: Max Lingua <sunmax@libero.it> */ -#include <X11/Xlibint.h> #include "s3v_context.h" #include "s3v_macros.h" #include "s3v_dri.h" @@ -24,7 +23,7 @@ static void s3vUpdateAlphaMode( GLcontext *ctx ) { s3vContextPtr vmesa = S3V_CONTEXT(ctx); - CARD32 cmd = vmesa->CMD; + uint32_t cmd = vmesa->CMD; cmd &= ~ALPHA_BLEND_MASK; if ( ctx->Color.BlendEnabled ) { @@ -173,7 +172,7 @@ static void s3vDDClear( GLcontext *ctx, GLbitfield mask ) static void s3vUpdateZMode( GLcontext *ctx ) { s3vContextPtr vmesa = S3V_CONTEXT(ctx); - CARD32 cmd = vmesa->CMD; + uint32_t cmd = vmesa->CMD; DEBUG(("Depth.Test = %i\n", ctx->Depth.Test)); DEBUG(("CMD was = 0x%x ", cmd)); diff --git a/src/mesa/drivers/dri/s3v/s3v_tex.c b/src/mesa/drivers/dri/s3v/s3v_tex.c index 9b92519862..ec1182f34f 100644 --- a/src/mesa/drivers/dri/s3v/s3v_tex.c +++ b/src/mesa/drivers/dri/s3v/s3v_tex.c @@ -536,24 +536,13 @@ void s3vInitTextureFuncs( GLcontext *ctx ) #endif ctx->Driver.TexEnv = s3vTexEnv; - ctx->Driver.ChooseTextureFormat = _mesa_choose_tex_format; - ctx->Driver.TexImage1D = _mesa_store_teximage1d; ctx->Driver.TexImage2D = s3vTexImage2D; - ctx->Driver.TexImage3D = _mesa_store_teximage3d; - ctx->Driver.TexSubImage1D = _mesa_store_texsubimage1d; ctx->Driver.TexSubImage2D = s3vTexSubImage2D; - ctx->Driver.TexSubImage3D = _mesa_store_texsubimage3d; - ctx->Driver.CopyTexImage1D = _swrast_copy_teximage1d; - ctx->Driver.CopyTexImage2D = _swrast_copy_teximage2d; - ctx->Driver.CopyTexSubImage1D = _swrast_copy_texsubimage1d; - ctx->Driver.CopyTexSubImage2D = _swrast_copy_texsubimage2d; - ctx->Driver.CopyTexSubImage3D = _swrast_copy_texsubimage3d; ctx->Driver.BindTexture = s3vBindTexture; ctx->Driver.DeleteTexture = s3vDeleteTexture; ctx->Driver.TexParameter = s3vTexParameter; ctx->Driver.UpdateTexturePalette = 0; ctx->Driver.IsTextureResident = s3vIsTextureResident; - ctx->Driver.TestProxyTexImage = _mesa_test_proxy_teximage; s3vInitTextureObjects( ctx ); } diff --git a/src/mesa/drivers/dri/savage/savage_xmesa.c b/src/mesa/drivers/dri/savage/savage_xmesa.c index a94f1c076c..931ceff0a8 100644 --- a/src/mesa/drivers/dri/savage/savage_xmesa.c +++ b/src/mesa/drivers/dri/savage/savage_xmesa.c @@ -23,7 +23,6 @@ */ -#include <X11/Xlibint.h> #include <stdio.h> #include "main/context.h" @@ -180,7 +179,7 @@ savageInitDriver(__DRIscreenPrivate *sPriv) } /* Allocate the private area */ - savageScreen = (savageScreenPrivate *)Xmalloc(sizeof(savageScreenPrivate)); + savageScreen = (savageScreenPrivate *)_mesa_malloc(sizeof(savageScreenPrivate)); if (!savageScreen) return GL_FALSE; @@ -227,7 +226,7 @@ savageInitDriver(__DRIscreenPrivate *sPriv) savageScreen->agpTextures.handle, savageScreen->agpTextures.size, (drmAddress *)&(savageScreen->agpTextures.map)) != 0) { - Xfree(savageScreen); + _mesa_free(savageScreen); sPriv->private = NULL; return GL_FALSE; } @@ -247,7 +246,7 @@ savageInitDriver(__DRIscreenPrivate *sPriv) savageScreen->aperture.size, (drmAddress *)&savageScreen->aperture.map) != 0) { - Xfree(savageScreen); + _mesa_free(savageScreen); sPriv->private = NULL; return GL_FALSE; } @@ -283,7 +282,7 @@ savageDestroyScreen(__DRIscreenPrivate *sPriv) /* free all option information */ driDestroyOptionInfo (&savageScreen->optionCache); - Xfree(savageScreen); + _mesa_free(savageScreen); sPriv->private = NULL; } @@ -301,7 +300,7 @@ savageCreateContext( const __GLcontextModes *mesaVis, savageScreen->sarea_priv_offset); int textureSize[SAVAGE_NR_TEX_HEAPS]; int i; - imesa = (savageContextPtr)Xcalloc(sizeof(savageContext), 1); + imesa = (savageContextPtr)_mesa_calloc(sizeof(savageContext)); if (!imesa) { return GL_FALSE; } @@ -318,7 +317,7 @@ savageCreateContext( const __GLcontextModes *mesaVis, shareCtx = NULL; ctx = _mesa_create_context(mesaVis, shareCtx, &functions, imesa); if (!ctx) { - Xfree(imesa); + _mesa_free(imesa); return GL_FALSE; } driContextPriv->driverPrivate = imesa; diff --git a/src/mesa/drivers/dri/savage/savagecontext.h b/src/mesa/drivers/dri/savage/savagecontext.h index fd6399d6a6..53a37db1cb 100644 --- a/src/mesa/drivers/dri/savage/savagecontext.h +++ b/src/mesa/drivers/dri/savage/savagecontext.h @@ -31,7 +31,6 @@ typedef struct savage_context_t savageContext; typedef struct savage_context_t *savageContextPtr; typedef struct savage_texture_object_t *savageTextureObjectPtr; -#include <X11/Xlibint.h> #include "dri_util.h" #include "main/mtypes.h" #include "xf86drm.h" diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index fbfa49c99d..cd499cd5d2 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -43,6 +43,7 @@ #include "tnl/t_pipeline.h" #include "vbo/vbo.h" #include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" #include "utils.h" #include "swrast_priv.h" @@ -62,8 +63,11 @@ #define need_GL_SGI_color_table /* sw extensions not associated with some GL version */ +#define need_GL_ARB_draw_elements_base_vertex #define need_GL_ARB_shader_objects +#define need_GL_ARB_vertex_array_object #define need_GL_ARB_vertex_program +#define need_GL_ARB_sync #define need_GL_APPLE_vertex_array_object #define need_GL_ATI_fragment_shader #define need_GL_ATI_separate_stencil @@ -93,8 +97,12 @@ const struct dri_extension card_extensions[] = { "GL_EXT_histogram", GL_EXT_histogram_functions }, { "GL_SGI_color_table", GL_SGI_color_table_functions }, + { "GL_ARB_depth_clamp", NULL }, + { "GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions }, { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, + { "GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions }, { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, + { "GL_ARB_sync", GL_ARB_sync_functions }, { "GL_APPLE_vertex_array_object", GL_APPLE_vertex_array_object_functions }, { "GL_ATI_fragment_shader", GL_ATI_fragment_shader_functions }, { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions }, @@ -105,6 +113,7 @@ const struct dri_extension card_extensions[] = { "GL_EXT_paletted_texture", GL_EXT_paletted_texture_functions }, { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions }, { "GL_MESA_resize_buffers", GL_MESA_resize_buffers_functions }, + { "GL_NV_depth_clamp", NULL }, { "GL_NV_vertex_program", GL_NV_vertex_program_functions }, { "GL_NV_fragment_program", GL_NV_fragment_program_functions }, { NULL, NULL } @@ -641,6 +650,8 @@ driCreateNewContext(__DRIscreen *screen, const __DRIconfig *config, _mesa_enable_2_0_extensions(mesaCtx); _mesa_enable_2_1_extensions(mesaCtx); + _mesa_meta_init(mesaCtx); + return ctx; } @@ -652,6 +663,7 @@ driDestroyContext(__DRIcontext *ctx) if (ctx) { mesaCtx = &ctx->Base; + _mesa_meta_free(mesaCtx); _swsetup_DestroyContext( mesaCtx ); _swrast_DestroyContext( mesaCtx ); _tnl_DestroyContext( mesaCtx ); diff --git a/src/mesa/drivers/dri/tdfx/tdfx_tex.c b/src/mesa/drivers/dri/tdfx/tdfx_tex.c index 1f7257eaea..f6a48b3ae1 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_tex.c +++ b/src/mesa/drivers/dri/tdfx/tdfx_tex.c @@ -176,6 +176,54 @@ logbase2(int n) } +static void +tdfxGenerateMipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + GLint mipWidth, mipHeight; + tdfxMipMapLevel *mip; + struct gl_texture_image *mipImage; /* the new/next image */ + struct gl_texture_image *texImage; + const GLint maxLevels = _mesa_max_texture_levels(ctx, texObj->Target); + GLint level = texObj->BaseLevel; + GLsizei width, height, texelBytes; + const tdfxMipMapLevel *mml; + + texImage = _mesa_get_tex_image(ctx, texObj, target, level); + assert(!texImage->IsCompressed); + + mml = TDFX_TEXIMAGE_DATA(texImage); + + width = texImage->Width; + height = texImage->Height; + while (level < texObj->MaxLevel && level < maxLevels - 1) { + mipWidth = width / 2; + if (!mipWidth) { + mipWidth = 1; + } + mipHeight = height / 2; + if (!mipHeight) { + mipHeight = 1; + } + if ((mipWidth == width) && (mipHeight == height)) { + break; + } + ++level; + mipImage = _mesa_select_tex_image(ctx, texObj, target, level); + mip = TDFX_TEXIMAGE_DATA(mipImage); + _mesa_halve2x2_teximage2d(ctx, + texImage, + texelBytes, + mml->width, mml->height, + texImage->Data, mipImage->Data); + texImage = mipImage; + mml = mip; + width = mipWidth; + height = mipHeight; + } +} + + /* * Compute various texture image parameters. * Input: w, h - source texture width and height @@ -1397,45 +1445,6 @@ tdfxTexImage2D(GLcontext *ctx, GLenum target, GLint level, width, height, 1, format, type, pixels, packing); } - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - GLint mipWidth, mipHeight; - tdfxMipMapLevel *mip; - struct gl_texture_image *mipImage; - const GLint maxLevels = _mesa_max_texture_levels(ctx, texObj->Target); - - assert(!texImage->IsCompressed); - - while (level < texObj->MaxLevel && level < maxLevels - 1) { - mipWidth = width / 2; - if (!mipWidth) { - mipWidth = 1; - } - mipHeight = height / 2; - if (!mipHeight) { - mipHeight = 1; - } - if ((mipWidth == width) && (mipHeight == height)) { - break; - } - _mesa_TexImage2D(target, ++level, internalFormat, - mipWidth, mipHeight, border, - format, type, - NULL); - mipImage = _mesa_select_tex_image(ctx, texObj, target, level); - mip = TDFX_TEXIMAGE_DATA(mipImage); - _mesa_halve2x2_teximage2d(ctx, - texImage, - texelBytes, - mml->width, mml->height, - texImage->Data, mipImage->Data); - texImage = mipImage; - mml = mip; - width = mipWidth; - height = mipHeight; - } - } } RevalidateTexture(ctx, texObj); @@ -1507,44 +1516,6 @@ tdfxTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, format, type, pixels, packing); } - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - GLint mipWidth, mipHeight; - tdfxMipMapLevel *mip; - struct gl_texture_image *mipImage; - const GLint maxLevels = _mesa_max_texture_levels(ctx, texObj->Target); - - assert(!texImage->IsCompressed); - - width = texImage->Width; - height = texImage->Height; - while (level < texObj->MaxLevel && level < maxLevels - 1) { - mipWidth = width / 2; - if (!mipWidth) { - mipWidth = 1; - } - mipHeight = height / 2; - if (!mipHeight) { - mipHeight = 1; - } - if ((mipWidth == width) && (mipHeight == height)) { - break; - } - ++level; - mipImage = _mesa_select_tex_image(ctx, texObj, target, level); - mip = TDFX_TEXIMAGE_DATA(mipImage); - _mesa_halve2x2_teximage2d(ctx, - texImage, - texelBytes, - mml->width, mml->height, - texImage->Data, mipImage->Data); - texImage = mipImage; - mml = mip; - width = mipWidth; - height = mipHeight; - } - } - ti->reloadImages = GL_TRUE; /* signal the image needs to be reloaded */ fxMesa->new_state |= TDFX_NEW_TEXTURE; /* XXX this might be a bit much */ } @@ -1703,11 +1674,6 @@ tdfxCompressedTexImage2D (GLcontext *ctx, GLenum target, MEMCPY(texImage->Data, data, texImage->CompressedSize); } - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - assert(!texImage->IsCompressed); - } - RevalidateTexture(ctx, texObj); ti->reloadImages = GL_TRUE; @@ -1770,11 +1736,6 @@ tdfxCompressedTexSubImage2D( GLcontext *ctx, GLenum target, texImage->Data); } - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - assert(!texImage->IsCompressed); - } - RevalidateTexture(ctx, texObj); ti->reloadImages = GL_TRUE; @@ -1914,4 +1875,5 @@ void tdfxInitTextureFuncs( struct dd_function_table *functions ) functions->CompressedTexImage2D = tdfxCompressedTexImage2D; functions->CompressedTexSubImage2D = tdfxCompressedTexSubImage2D; functions->UpdateTexturePalette = tdfxUpdateTexturePalette; + functions->GenerateMipmap = tdfxGenerateMipmap; } diff --git a/src/mesa/drivers/dri/unichrome/via_tex.c b/src/mesa/drivers/dri/unichrome/via_tex.c index d2010f0907..54073e7691 100644 --- a/src/mesa/drivers/dri/unichrome/via_tex.c +++ b/src/mesa/drivers/dri/unichrome/via_tex.c @@ -818,11 +818,6 @@ static void viaTexImage(GLcontext *ctx, } } - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, texObj); - } - _mesa_unmap_teximage_pbo(ctx, packing); } diff --git a/src/mesa/drivers/osmesa/osmesa.c b/src/mesa/drivers/osmesa/osmesa.c index 904659e345..692657a5df 100644 --- a/src/mesa/drivers/osmesa/osmesa.c +++ b/src/mesa/drivers/osmesa/osmesa.c @@ -50,6 +50,7 @@ #include "tnl/t_context.h" #include "tnl/t_pipeline.h" #include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" #include "vbo/vbo.h" @@ -1258,6 +1259,8 @@ OSMesaCreateContextExt( GLenum format, GLint depthBits, GLint stencilBits, osmesa->bInd = bind; osmesa->aInd = aind; + _mesa_meta_init(&osmesa->mesa); + /* Initialize the software rasterizer and helper modules. */ { GLcontext *ctx = &osmesa->mesa; @@ -1304,6 +1307,8 @@ OSMesaDestroyContext( OSMesaContext osmesa ) if (osmesa->rb) _mesa_reference_renderbuffer(&osmesa->rb, NULL); + _mesa_meta_free( &osmesa->mesa ); + _swsetup_DestroyContext( &osmesa->mesa ); _tnl_DestroyContext( &osmesa->mesa ); _vbo_DestroyContext( &osmesa->mesa ); diff --git a/src/mesa/drivers/windows/gdi/mesa.def b/src/mesa/drivers/windows/gdi/mesa.def index ede43ef4c0..5abcd1d927 100644 --- a/src/mesa/drivers/windows/gdi/mesa.def +++ b/src/mesa/drivers/windows/gdi/mesa.def @@ -902,7 +902,6 @@ EXPORTS _mesa_generate_mipmap _mesa_get_compressed_teximage _mesa_get_current_context - _mesa_get_program_register _mesa_get_teximage _mesa_init_driver_functions _mesa_init_glsl_driver_functions @@ -944,6 +943,15 @@ EXPORTS _mesa_update_framebuffer_visual _mesa_use_program _mesa_Viewport + _mesa_meta_CopyColorSubTable + _mesa_meta_CopyColorTable + _mesa_meta_CopyConvolutionFilter1D + _mesa_meta_CopyConvolutionFilter2D + _mesa_meta_CopyTexImage1D + _mesa_meta_CopyTexImage2D + _mesa_meta_CopyTexSubImage1D + _mesa_meta_CopyTexSubImage2D + _mesa_meta_CopyTexSubImage3D _mesa_wait_query _swrast_Accum _swrast_Bitmap @@ -954,15 +962,6 @@ EXPORTS _swrast_Clear _swrast_choose_line _swrast_choose_triangle - _swrast_CopyColorSubTable - _swrast_CopyColorTable - _swrast_CopyConvolutionFilter1D - _swrast_CopyConvolutionFilter2D - _swrast_copy_teximage1d - _swrast_copy_teximage2d - _swrast_copy_texsubimage1d - _swrast_copy_texsubimage2d - _swrast_copy_texsubimage3d _swrast_CreateContext _swrast_DestroyContext _swrast_exec_fragment_program diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c index e1971db693..8929b22af1 100644 --- a/src/mesa/drivers/windows/gdi/wmesa.c +++ b/src/mesa/drivers/windows/gdi/wmesa.c @@ -12,6 +12,7 @@ #include "framebuffer.h" #include "renderbuffer.h" #include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" #include "vbo/vbo.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" @@ -1515,6 +1516,8 @@ WMesaContext WMesaCreateContext(HDC hDC, _mesa_enable_2_0_extensions(ctx); _mesa_enable_2_1_extensions(ctx); + _mesa_meta_init(ctx); + /* Initialize the software rasterizer and helper modules. */ if (!_swrast_CreateContext(ctx) || !_vbo_CreateContext(ctx) || @@ -1558,6 +1561,8 @@ void WMesaDestroyContext( WMesaContext pwc ) DeleteObject(pwc->clearPen); DeleteObject(pwc->clearBrush); + _mesa_meta_free(ctx); + _swsetup_DestroyContext(ctx); _tnl_DestroyContext(ctx); _vbo_DestroyContext(ctx); diff --git a/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c b/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c index 342a742867..7ac425a109 100644 --- a/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c +++ b/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c @@ -1346,6 +1346,8 @@ static void wmesa_update_state_first_time( struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference( ctx ); TNLcontext *tnl = TNL_CONTEXT(ctx); + _mesa_init_driver_functions(&ctx->Driver); + /* * XXX these function pointers could be initialized just once during * context creation since they don't depend on any state changes. @@ -1362,8 +1364,6 @@ static void wmesa_update_state_first_time( ctx->Driver.Viewport = wmesa_viewport; - ctx->Driver.Accum = _swrast_Accum; - ctx->Driver.Bitmap = _swrast_Bitmap; ctx->Driver.Clear = clear; ctx->Driver.Flush = flush; @@ -1371,28 +1371,6 @@ static void wmesa_update_state_first_time( ctx->Driver.ClearColor = clear_color; ctx->Driver.Enable = enable; - ctx->Driver.CopyPixels = _swrast_CopyPixels; - ctx->Driver.DrawPixels = _swrast_DrawPixels; - ctx->Driver.ReadPixels = _swrast_ReadPixels; - - ctx->Driver.ChooseTextureFormat = _mesa_choose_tex_format; - ctx->Driver.TexImage1D = _mesa_store_teximage1d; - ctx->Driver.TexImage2D = _mesa_store_teximage2d; - ctx->Driver.TexImage3D = _mesa_store_teximage3d; - ctx->Driver.TexSubImage1D = _mesa_store_texsubimage1d; - ctx->Driver.TexSubImage2D = _mesa_store_texsubimage2d; - ctx->Driver.TexSubImage3D = _mesa_store_texsubimage3d; - ctx->Driver.TestProxyTexImage = _mesa_test_proxy_teximage; - - ctx->Driver.CopyTexImage1D = _swrast_copy_teximage1d; - ctx->Driver.CopyTexImage2D = _swrast_copy_teximage2d; - ctx->Driver.CopyTexSubImage1D = _swrast_copy_texsubimage1d; - ctx->Driver.CopyTexSubImage2D = _swrast_copy_texsubimage2d; - ctx->Driver.CopyTexSubImage3D = _swrast_copy_texsubimage3d; - ctx->Driver.CopyColorTable = _swrast_CopyColorTable; - ctx->Driver.CopyColorSubTable = _swrast_CopyColorSubTable; - ctx->Driver.CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D; - ctx->Driver.CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D; // Does not apply for Mesa 5.x //ctx->Driver.BaseCompressedTexFormat = _mesa_base_compressed_texformat; diff --git a/src/mesa/drivers/windows/icd/mesa.def b/src/mesa/drivers/windows/icd/mesa.def index 465b380a0c..25ac08a2f0 100644 --- a/src/mesa/drivers/windows/icd/mesa.def +++ b/src/mesa/drivers/windows/icd/mesa.def @@ -75,6 +75,15 @@ EXPORTS _mesa_strcmp _mesa_test_proxy_teximage _mesa_Viewport + _mesa_meta_CopyColorSubTable + _mesa_meta_CopyColorTable + _mesa_meta_CopyConvolutionFilter1D + _mesa_meta_CopyConvolutionFilter2D + _mesa_meta_CopyTexImage1D + _mesa_meta_CopyTexImage2D + _mesa_meta_CopyTexSubImage1D + _mesa_meta_CopyTexSubImage2D + _mesa_meta_CopyTexSubImage3D _swrast_Accum _swrast_Bitmap _swrast_CopyPixels @@ -84,15 +93,6 @@ EXPORTS _swrast_Clear _swrast_choose_line _swrast_choose_triangle - _swrast_CopyColorSubTable - _swrast_CopyColorTable - _swrast_CopyConvolutionFilter1D - _swrast_CopyConvolutionFilter2D - _swrast_copy_teximage1d - _swrast_copy_teximage2d - _swrast_copy_texsubimage1d - _swrast_copy_texsubimage2d - _swrast_copy_texsubimage3d _swrast_CreateContext _swrast_DestroyContext _swrast_InvalidateState diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 6ae5b6fd58..79b058634c 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -79,6 +79,7 @@ #include "tnl/t_context.h" #include "tnl/t_pipeline.h" #include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" /** * Global X driver lock @@ -1316,7 +1317,9 @@ xmesa_convert_from_x_visual_type( int visualType ) #define need_GL_SGI_color_table /* sw extensions not associated with some GL version */ +#define need_GL_ARB_draw_elements_base_vertex #define need_GL_ARB_shader_objects +#define need_GL_ARB_sync #define need_GL_ARB_vertex_program #define need_GL_APPLE_vertex_array_object #define need_GL_ATI_fragment_shader @@ -1345,7 +1348,10 @@ const struct dri_extension card_extensions[] = { "GL_EXT_histogram", GL_EXT_histogram_functions }, { "GL_SGI_color_table", GL_SGI_color_table_functions }, + { "GL_ARB_depth_clamp", NULL }, + { "GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions }, { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, + { "GL_ARB_sync", GL_ARB_sync_functions }, { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, { "GL_APPLE_vertex_array_object", GL_APPLE_vertex_array_object_functions }, { "GL_ATI_fragment_shader", GL_ATI_fragment_shader_functions }, @@ -1355,6 +1361,7 @@ const struct dri_extension card_extensions[] = { "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions }, { "GL_EXT_paletted_texture", GL_EXT_paletted_texture_functions }, { "GL_MESA_resize_buffers", GL_MESA_resize_buffers_functions }, + { "GL_NV_depth_clamp", NULL }, { "GL_NV_vertex_program", GL_NV_vertex_program_functions }, { "GL_NV_fragment_program", GL_NV_fragment_program_functions }, { NULL, NULL } @@ -1641,6 +1648,8 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) xmesa_register_swrast_functions( mesaCtx ); _swsetup_Wakeup(mesaCtx); + _mesa_meta_init(mesaCtx); + return c; } @@ -1655,6 +1664,8 @@ void XMesaDestroyContext( XMesaContext c ) FXdestroyContext( XMESA_BUFFER(mesaCtx->DrawBuffer) ); #endif + _mesa_meta_free( mesaCtx ); + _swsetup_DestroyContext( mesaCtx ); _swrast_DestroyContext( mesaCtx ); _tnl_DestroyContext( mesaCtx ); diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index 9a01465bdf..e2d4aa9b2d 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -51,6 +51,7 @@ #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" #include "tnl/t_context.h" +#include "drivers/common/meta.h" #include "xmesaP.h" @@ -1147,19 +1148,28 @@ xmesa_init_driver_functions( XMesaVisual xmvisual, driver->IndexMask = index_mask; driver->ColorMask = color_mask; driver->Enable = enable; - driver->Clear = clear_buffers; driver->Viewport = xmesa_viewport; -#ifndef XFree86Server - driver->CopyPixels = xmesa_CopyPixels; - if (xmvisual->undithered_pf == PF_8R8G8B && - xmvisual->dithered_pf == PF_8R8G8B && - xmvisual->BitsPerPixel == 32) { - driver->DrawPixels = xmesa_DrawPixels_8R8G8B; - } - else if (xmvisual->undithered_pf == PF_5R6G5B) { - driver->DrawPixels = xmesa_DrawPixels_5R6G5B; + if (TEST_META_FUNCS) { + driver->Clear = _mesa_meta_Clear; + driver->CopyPixels = _mesa_meta_CopyPixels; + driver->BlitFramebuffer = _mesa_meta_BlitFramebuffer; + driver->DrawPixels = _mesa_meta_DrawPixels; + driver->Bitmap = _mesa_meta_Bitmap; } + else { + driver->Clear = clear_buffers; +#ifndef XFree86Server + driver->CopyPixels = xmesa_CopyPixels; + if (xmvisual->undithered_pf == PF_8R8G8B && + xmvisual->dithered_pf == PF_8R8G8B && + xmvisual->BitsPerPixel == 32) { + driver->DrawPixels = xmesa_DrawPixels_8R8G8B; + } + else if (xmvisual->undithered_pf == PF_5R6G5B) { + driver->DrawPixels = xmesa_DrawPixels_5R6G5B; + } #endif + } driver->TestProxyTexImage = test_proxy_teximage; #if ENABLE_EXT_texure_compression_s3tc driver->ChooseTextureFormat = choose_tex_format; diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h index 65e747d7b9..25db55862e 100644 --- a/src/mesa/drivers/x11/xmesaP.h +++ b/src/mesa/drivers/x11/xmesaP.h @@ -581,4 +581,8 @@ extern void xmesa_register_swrast_functions( GLcontext *ctx ); #define ENABLE_EXT_timer_query 0 /* may not have 64-bit GLuint64EXT */ #endif + +#define TEST_META_FUNCS 0 + + #endif |