diff options
Diffstat (limited to 'src/mesa')
116 files changed, 8367 insertions, 3074 deletions
diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 633bfb19a3..08d723553e 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -24,25 +24,38 @@ GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) # Figure out what to make here -default: +default: depend @for driver in $(DRIVER_DIRS) ; do \ case "$$driver" in \ - x11) $(MAKE) stand-alone ;; \ - dri) $(MAKE) linux-solo ;; \ - osmesa) $(MAKE) osmesa-only ;; \ - beos) $(MAKE) beos ;; \ - directfb) $(MAKE) directfb ;; \ - fbdev) $(MAKE) fbdev ;; \ + x11) $(MAKE) stand-alone || exit 1 ;; \ + dri) $(MAKE) linux-solo || exit 1 ;; \ + osmesa) $(MAKE) osmesa-only || exit 1 ;; \ + beos) $(MAKE) beos || exit 1 ;; \ + directfb) $(MAKE) directfb || exit 1 ;; \ + fbdev) $(MAKE) fbdev || exit 1 ;; \ *) echo "$$driver is invalid in DRIVER_DIRS" >&2; exit 1;; \ esac ; \ done +install: default + @for driver in $(DRIVER_DIRS) ; do \ + case "$$driver" in \ + osmesa) if [ "$(DRIVER_DIRS)" = osmesa ]; then \ + $(MAKE) install-headers install-osmesa || exit 1 ; \ + else \ + $(MAKE) install-osmesa || exit 1 ; \ + fi ;; \ + dri) $(MAKE) install-libgl install-dri || exit 1 ;; \ + *) $(MAKE) install-libgl || exit 1 ;; \ + esac ; \ + done + ###################################################################### # BeOS driver target beos: depend subdirs libmesa.a - cd drivers/beos; $(MAKE) + cd drivers/beos && $(MAKE) ###################################################################### @@ -56,7 +69,7 @@ libmesa.a: $(SOLO_OBJECTS) fi linux-solo: depend subdirs libmesa.a - cd drivers/dri ; $(MAKE) + cd drivers/dri && $(MAKE) ##################################################################### @@ -69,7 +82,7 @@ libgl-core: $(CORE_OBJECTS) $(GL_LIB_DEPS) directfb: depend subdirs libgl-core - cd drivers/directfb ; $(MAKE) + cd drivers/directfb && $(MAKE) ##################################################################### @@ -145,11 +158,11 @@ depend: $(ALL_SOURCES) subdirs: @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_ASM ; then \ - (cd x86 ; $(MAKE)) ; \ + (cd x86 && $(MAKE)) || exit 1 ; \ fi @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_64_ASM ; then \ - (cd x86 ; $(MAKE)) ; \ - (cd x86-64 ; $(MAKE)) ; \ + (cd x86 && $(MAKE)) || exit 1 ; \ + (cd x86-64 && $(MAKE)) || exit 1 ; \ fi pcedit = sed \ @@ -160,31 +173,25 @@ pcedit = sed \ gl.pc: gl.pc.in $(pcedit) $< > $@ -install-libgl: gl.pc +install-headers: $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/include/GL + $(INSTALL) -m 644 $(TOP)/include/GL/*.h \ + $(DESTDIR)$(INSTALL_DIR)/include/GL + +install-libgl: default gl.pc install-headers $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR) $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR)/pkgconfig - $(INSTALL) -m 644 $(TOP)/include/GL/*.h $(DESTDIR)$(INSTALL_DIR)/include/GL - @if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \ - $(INSTALL) $(TOP)/$(LIB_DIR)/libGL* \ - $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR); \ - fi + $(INSTALL) $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME)* \ + $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR) $(INSTALL) -m 644 gl.pc $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR)/pkgconfig -install-osmesa: - @if [ -e $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME) ]; then \ - $(INSTALL) $(TOP)/$(LIB_DIR)/libOSMesa* \ - $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR); \ - fi - -install-drivers: - @for target in $(DRIVER_DIRS); do \ - case "$$target" in \ - dri) cd drivers/dri ; $(MAKE) install ;; \ - esac; \ - done +install-osmesa: default + $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR) + $(INSTALL) $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME)* \ + $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR) -install: default install-libgl install-osmesa install-drivers +install-dri: + cd drivers/dri && $(MAKE) install ## NOT INSTALLED YET: ## $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/include/GLES @@ -200,9 +207,10 @@ clean: -rm -f */*/*.o -rm -f depend depend.bak libmesa.a -rm -f drivers/*/*.o - (cd drivers/dri && $(MAKE) clean) - (cd x86 && $(MAKE) clean) - (cd x86-64 && $(MAKE) clean) + -@cd drivers/dri && $(MAKE) clean + -@cd drivers/xorg && $(MAKE) clean + -@cd x86 && $(MAKE) clean + -@cd x86-64 && $(MAKE) clean -include depend diff --git a/src/mesa/drivers/beos/Makefile b/src/mesa/drivers/beos/Makefile index 9c7d6affc3..0448650a8c 100644 --- a/src/mesa/drivers/beos/Makefile +++ b/src/mesa/drivers/beos/Makefile @@ -170,10 +170,10 @@ OBJECTS := $(DRIVER_OBJECTS:.cpp=.o) default: depend $(TOP)/$(LIB_DIR) $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) $(MESA_MODULES): - cd $(TOP)/src/mesa; $(MAKE) mesa.a ; + cd $(TOP)/src/mesa && $(MAKE) mesa.a ; $(GLU_MODULES): - cd $(GLU_DIR); $(MAKE) $(subst $(GLU_DIR)/,,$(GLU_MODULES)) ; + cd $(GLU_DIR) && $(MAKE) $(subst $(GLU_DIR)/,,$(GLU_MODULES)) ; $(TOP)/$(LIB_DIR): mkdir $(TOP)/$(LIB_DIR) @@ -184,13 +184,13 @@ $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) $(MESA_MODULES) $(GLU_MODULES) $(MKLIB_OPTIONS) $(GL_LIB_DEPS) $(OBJECTS) $(MESA_MODULES) $(GLU_MODULES) # $(GLU_OBJECTS): -# cd $(GLU_DIR); $(MAKE) $< ; +# cd $(GLU_DIR) && $(MAKE) $< ; depend: $(DRIVER_SOURCES) $(GLU_SOURCES) touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(DRIVER_SOURCES) $(GLU_SOURCES) > /dev/null clean: - rm -f depend $(OBJECTS) + -rm -f depend depend.bak $(OBJECTS) include depend diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 03fbab69e3..e61b9f59cf 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -29,6 +29,7 @@ #include "buffers.h" #include "context.h" #include "framebuffer.h" +#include "mipmap.h" #include "queryobj.h" #include "renderbuffer.h" #include "texcompress.h" @@ -98,6 +99,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->CopyTexSubImage1D = _swrast_copy_texsubimage1d; driver->CopyTexSubImage2D = _swrast_copy_texsubimage2d; driver->CopyTexSubImage3D = _swrast_copy_texsubimage3d; + driver->GenerateMipmap = _mesa_generate_mipmap; driver->TestProxyTexImage = _mesa_test_proxy_teximage; driver->CompressedTexImage1D = _mesa_store_compressed_teximage1d; driver->CompressedTexImage2D = _mesa_store_compressed_teximage2d; @@ -260,43 +262,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) /** - * Plug in Mesa's GLSL functions. - */ -void -_mesa_init_glsl_driver_functions(struct dd_function_table *driver) -{ - driver->AttachShader = _mesa_attach_shader; - driver->BindAttribLocation = _mesa_bind_attrib_location; - driver->CompileShader = _mesa_compile_shader; - driver->CreateProgram = _mesa_create_program; - driver->CreateShader = _mesa_create_shader; - driver->DeleteProgram2 = _mesa_delete_program2; - driver->DeleteShader = _mesa_delete_shader; - driver->DetachShader = _mesa_detach_shader; - driver->GetActiveAttrib = _mesa_get_active_attrib; - driver->GetActiveUniform = _mesa_get_active_uniform; - driver->GetAttachedShaders = _mesa_get_attached_shaders; - driver->GetAttribLocation = _mesa_get_attrib_location; - driver->GetHandle = _mesa_get_handle; - driver->GetProgramiv = _mesa_get_programiv; - driver->GetProgramInfoLog = _mesa_get_program_info_log; - driver->GetShaderiv = _mesa_get_shaderiv; - driver->GetShaderInfoLog = _mesa_get_shader_info_log; - driver->GetShaderSource = _mesa_get_shader_source; - driver->GetUniformfv = _mesa_get_uniformfv; - driver->GetUniformLocation = _mesa_get_uniform_location; - driver->IsProgram = _mesa_is_program; - driver->IsShader = _mesa_is_shader; - driver->LinkProgram = _mesa_link_program; - driver->ShaderSource = _mesa_shader_source; - driver->Uniform = _mesa_uniform; - driver->UniformMatrix = _mesa_uniform_matrix; - driver->UseProgram = _mesa_use_program; - driver->ValidateProgram = _mesa_validate_program; -} - - -/** * Call the ctx->Driver.* state functions with current values to initialize * driver state. * Only the Intel drivers use this so far. diff --git a/src/mesa/drivers/common/driverfuncs.h b/src/mesa/drivers/common/driverfuncs.h index 6ed23c4520..4c90ed12f6 100644 --- a/src/mesa/drivers/common/driverfuncs.h +++ b/src/mesa/drivers/common/driverfuncs.h @@ -31,10 +31,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver); extern void -_mesa_init_glsl_driver_functions(struct dd_function_table *driver); - - -extern void _mesa_init_driver_state(GLcontext *ctx); diff --git a/src/mesa/drivers/directfb/Makefile b/src/mesa/drivers/directfb/Makefile index c515785b2a..dc71b385ca 100644 --- a/src/mesa/drivers/directfb/Makefile +++ b/src/mesa/drivers/directfb/Makefile @@ -50,5 +50,5 @@ install: clean: - rm -f *.o *.so + -rm -f *.o *.so diff --git a/src/mesa/drivers/dri/Makefile b/src/mesa/drivers/dri/Makefile index 69a8c55394..41dfc67e0c 100644 --- a/src/mesa/drivers/dri/Makefile +++ b/src/mesa/drivers/dri/Makefile @@ -27,13 +27,13 @@ install: (cd $$dir && $(MAKE) install) || exit 1 ; \ fi \ done - $(TOP)/bin/minstall -d $(DESTDIR)$(INSTALL_DIR)/include/GL/internal - $(TOP)/bin/minstall -m 0644 $(TOP)/include/GL/internal/dri_interface.h $(DESTDIR)$(INSTALL_DIR)/include/GL/internal - $(TOP)/bin/minstall -m 0644 $(TOP)/include/GL/internal/dri_sarea.h $(DESTDIR)$(INSTALL_DIR)/include/GL/internal + $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/include/GL/internal + $(INSTALL) -m 0644 $(TOP)/include/GL/internal/dri_interface.h $(DESTDIR)$(INSTALL_DIR)/include/GL/internal + $(INSTALL) -m 0644 $(TOP)/include/GL/internal/dri_sarea.h $(DESTDIR)$(INSTALL_DIR)/include/GL/internal clean: - @for dir in $(DRI_DIRS) ; do \ + -@for dir in $(DRI_DIRS) ; do \ if [ -d $$dir ] ; then \ (cd $$dir && $(MAKE) clean) ; \ fi \ diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index b7718f50fd..6efdf4312a 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -31,6 +31,7 @@ #include "dri_util.h" #include "drm_sarea.h" +#include "utils.h" #ifndef GLX_OML_sync_control typedef GLboolean ( * PFNGLXGETMSCRATEOMLPROC) (__DRIdrawable *drawable, int32_t *numerator, int32_t *denominator); @@ -66,6 +67,18 @@ __driUtilMessage(const char *f, ...) } } +GLint +driIntersectArea( drm_clip_rect_t rect1, drm_clip_rect_t rect2 ) +{ + if (rect2.x1 > rect1.x1) rect1.x1 = rect2.x1; + if (rect2.x2 < rect1.x2) rect1.x2 = rect2.x2; + if (rect2.y1 > rect1.y1) rect1.y1 = rect2.y1; + if (rect2.y2 < rect1.y2) rect1.y2 = rect2.y2; + + if (rect1.x1 > rect1.x2 || rect1.y1 > rect1.y2) return 0; + + return (rect1.x2 - rect1.x1) * (rect1.y2 - rect1.y1); +} /*****************************************************************/ /** \name Context (un)binding functions */ @@ -497,9 +510,9 @@ static void driCopySubBuffer(__DRIdrawable *dPriv, dPriv->driScreenPriv->DriverAPI.CopySubBuffer(dPriv, x, y, w, h); rect.x1 = x; - rect.y1 = y; + rect.y1 = dPriv->h - y - h; rect.x2 = x + w; - rect.y2 = y + w; + rect.y2 = rect.y1 + h; driReportDamage(dPriv, &rect, 1); } @@ -758,8 +771,10 @@ static void driDestroyScreen(__DRIscreen *psp) (*psp->DriverAPI.DestroyScreen)(psp); if (psp->dri2.enabled) { +#ifdef TTM_API drmBOUnmap(psp->fd, &psp->dri2.sareaBO); drmBOUnreference(psp->fd, &psp->dri2.sareaBO); +#endif } else { (void)drmUnmap((drmAddress)psp->pSAREA, SAREA_MAX); (void)drmUnmap((drmAddress)psp->pFB, psp->fbSize); @@ -891,6 +906,7 @@ dri2CreateNewScreen(int scrn, int fd, unsigned int sarea_handle, const __DRIextension **extensions, const __DRIconfig ***driver_configs, void *data) { +#ifdef TTM_API static const __DRIextension *emptyExtensionList[] = { NULL }; __DRIscreen *psp; unsigned int *p; @@ -958,6 +974,9 @@ dri2CreateNewScreen(int scrn, int fd, unsigned int sarea_handle, psp->DriverAPI = driDriverAPI; return psp; +#else + return NULL; +#endif } static const __DRIextension **driGetExtensions(__DRIscreen *psp) @@ -965,117 +984,6 @@ static const __DRIextension **driGetExtensions(__DRIscreen *psp) return psp->extensions; } -#define __ATTRIB(attrib, field) \ - { attrib, offsetof(__GLcontextModes, field) } - -static const struct { unsigned int attrib, offset; } attribMap[] = { - __ATTRIB(__DRI_ATTRIB_BUFFER_SIZE, rgbBits), - __ATTRIB(__DRI_ATTRIB_LEVEL, level), - __ATTRIB(__DRI_ATTRIB_RED_SIZE, redBits), - __ATTRIB(__DRI_ATTRIB_GREEN_SIZE, greenBits), - __ATTRIB(__DRI_ATTRIB_BLUE_SIZE, blueBits), - __ATTRIB(__DRI_ATTRIB_ALPHA_SIZE, alphaBits), - __ATTRIB(__DRI_ATTRIB_DEPTH_SIZE, depthBits), - __ATTRIB(__DRI_ATTRIB_STENCIL_SIZE, stencilBits), - __ATTRIB(__DRI_ATTRIB_ACCUM_RED_SIZE, accumRedBits), - __ATTRIB(__DRI_ATTRIB_ACCUM_GREEN_SIZE, accumGreenBits), - __ATTRIB(__DRI_ATTRIB_ACCUM_BLUE_SIZE, accumBlueBits), - __ATTRIB(__DRI_ATTRIB_ACCUM_ALPHA_SIZE, accumAlphaBits), - __ATTRIB(__DRI_ATTRIB_SAMPLE_BUFFERS, sampleBuffers), - __ATTRIB(__DRI_ATTRIB_SAMPLES, samples), - __ATTRIB(__DRI_ATTRIB_DOUBLE_BUFFER, doubleBufferMode), - __ATTRIB(__DRI_ATTRIB_STEREO, stereoMode), - __ATTRIB(__DRI_ATTRIB_AUX_BUFFERS, numAuxBuffers), - __ATTRIB(__DRI_ATTRIB_TRANSPARENT_TYPE, transparentPixel), - __ATTRIB(__DRI_ATTRIB_TRANSPARENT_INDEX_VALUE, transparentPixel), - __ATTRIB(__DRI_ATTRIB_TRANSPARENT_RED_VALUE, transparentRed), - __ATTRIB(__DRI_ATTRIB_TRANSPARENT_GREEN_VALUE, transparentGreen), - __ATTRIB(__DRI_ATTRIB_TRANSPARENT_BLUE_VALUE, transparentBlue), - __ATTRIB(__DRI_ATTRIB_TRANSPARENT_ALPHA_VALUE, transparentAlpha), - __ATTRIB(__DRI_ATTRIB_FLOAT_MODE, floatMode), - __ATTRIB(__DRI_ATTRIB_RED_MASK, redMask), - __ATTRIB(__DRI_ATTRIB_GREEN_MASK, greenMask), - __ATTRIB(__DRI_ATTRIB_BLUE_MASK, blueMask), - __ATTRIB(__DRI_ATTRIB_ALPHA_MASK, alphaMask), - __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_WIDTH, maxPbufferWidth), - __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_HEIGHT, maxPbufferHeight), - __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_PIXELS, maxPbufferPixels), - __ATTRIB(__DRI_ATTRIB_OPTIMAL_PBUFFER_WIDTH, optimalPbufferWidth), - __ATTRIB(__DRI_ATTRIB_OPTIMAL_PBUFFER_HEIGHT, optimalPbufferHeight), - __ATTRIB(__DRI_ATTRIB_SWAP_METHOD, swapMethod), - __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_RGB, bindToTextureRgb), - __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_RGBA, bindToTextureRgba), - __ATTRIB(__DRI_ATTRIB_BIND_TO_MIPMAP_TEXTURE, bindToMipmapTexture), - __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_TARGETS, bindToTextureTargets), - __ATTRIB(__DRI_ATTRIB_YINVERTED, yInverted), - - /* The struct field doesn't matter here, these are handled by the - * switch in driGetConfigAttribIndex. We need them in the array - * so the iterator includes them though.*/ - __ATTRIB(__DRI_ATTRIB_RENDER_TYPE, level), - __ATTRIB(__DRI_ATTRIB_CONFIG_CAVEAT, level), - __ATTRIB(__DRI_ATTRIB_SWAP_METHOD, level) -}; - -#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0])) - -static int -driGetConfigAttribIndex(const __DRIconfig *config, - unsigned int index, unsigned int *value) -{ - switch (attribMap[index].attrib) { - case __DRI_ATTRIB_RENDER_TYPE: - if (config->modes.rgbMode) - *value = __DRI_ATTRIB_RGBA_BIT; - else - *value = __DRI_ATTRIB_COLOR_INDEX_BIT; - break; - case __DRI_ATTRIB_CONFIG_CAVEAT: - if (config->modes.visualRating == GLX_NON_CONFORMANT_CONFIG) - *value = __DRI_ATTRIB_NON_CONFORMANT_CONFIG; - else if (config->modes.visualRating == GLX_SLOW_CONFIG) - *value = __DRI_ATTRIB_SLOW_BIT; - else - *value = 0; - break; - case __DRI_ATTRIB_SWAP_METHOD: - break; - - default: - *value = *(unsigned int *) - ((char *) &config->modes + attribMap[index].offset); - - break; - } - - return GL_TRUE; -} - -static int -driGetConfigAttrib(const __DRIconfig *config, - unsigned int attrib, unsigned int *value) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(attribMap); i++) - if (attribMap[i].attrib == attrib) - return driGetConfigAttribIndex(config, i, value); - - return GL_FALSE; -} - -static int -driIndexConfigAttrib(const __DRIconfig *config, int index, - unsigned int *attrib, unsigned int *value) -{ - if (index >= 0 && index < ARRAY_SIZE(attribMap)) { - *attrib = attribMap[index].attrib; - return driGetConfigAttribIndex(config, index, value); - } - - return GL_FALSE; -} - const __DRIlegacyExtension driLegacyExtension = { { __DRI_LEGACY, __DRI_LEGACY_VERSION }, driCreateNewScreen, diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h index 06e1d20a3c..203479e326 100644 --- a/src/mesa/drivers/dri/common/dri_util.h +++ b/src/mesa/drivers/dri/common/dri_util.h @@ -53,7 +53,6 @@ #include <drm.h> #include <drm_sarea.h> #include <xf86drm.h> -#include <xf86mm.h> #include "GL/internal/glcore.h" #include "GL/internal/dri_interface.h" #include "GL/internal/dri_sarea.h" @@ -61,7 +60,6 @@ #define GLX_BAD_CONTEXT 5 typedef struct __DRIswapInfoRec __DRIswapInfo; -typedef struct __DRIutilversionRec2 __DRIutilversion2; /* Typedefs to avoid rewriting the world. */ typedef struct __DRIscreenRec __DRIscreenPrivate; @@ -525,7 +523,9 @@ struct __DRIscreenRec { /* Flag to indicate that this is a DRI2 screen. Many of the above * fields will not be valid or initializaed in that case. */ int enabled; +#ifdef TTM_API drmBO sareaBO; +#endif void *sarea; __DRIEventBuffer *buffer; __DRILock *lock; @@ -536,22 +536,6 @@ struct __DRIscreenRec { drmLock *lock; }; -struct __DRIconfigRec { - __GLcontextModes modes; -}; - -/** - * Used to store a version which includes a major range instead of a single - * major version number. - */ -struct __DRIutilversionRec2 { - int major_min; /** min allowed Major version number. */ - int major_max; /** max allowed Major version number. */ - int minor; /**< Minor version number. */ - int patch; /**< Patch-level. */ -}; - - extern void __driUtilMessage(const char *f, ...); @@ -566,4 +550,7 @@ extern float driCalculateSwapUsage( __DRIdrawable *dPriv, int64_t last_swap_ust, int64_t current_ust ); +extern GLint +driIntersectArea( drm_clip_rect_t rect1, drm_clip_rect_t rect2 ); + #endif /* _DRI_UTIL_H_ */ diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index 237d51cf22..7fbe0d855d 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -419,21 +419,6 @@ driCheckDriDdxDrmVersions2(const char * driver_name, drmActual, drmExpected); } - - -GLint -driIntersectArea( drm_clip_rect_t rect1, drm_clip_rect_t rect2 ) -{ - if (rect2.x1 > rect1.x1) rect1.x1 = rect2.x1; - if (rect2.x2 < rect1.x2) rect1.x2 = rect2.x2; - if (rect2.y1 > rect1.y1) rect1.y1 = rect2.y1; - if (rect2.y2 < rect1.y2) rect1.y2 = rect2.y2; - - if (rect1.x1 > rect1.x2 || rect1.y1 > rect1.y2) return 0; - - return (rect1.x2 - rect1.x1) * (rect1.y2 - rect1.y1); -} - GLboolean driClipRectToFramebuffer( const GLframebuffer *buffer, GLint *x, GLint *y, GLsizei *width, GLsizei *height ) @@ -540,68 +525,62 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type, unsigned num_depth_stencil_bits, const GLenum * db_modes, unsigned num_db_modes) { - static const u_int8_t bits_table[3][4] = { + static const u_int8_t bits_table[4][4] = { /* R G B A */ + { 3, 3, 2, 0 }, /* Any GL_UNSIGNED_BYTE_3_3_2 */ { 5, 6, 5, 0 }, /* Any GL_UNSIGNED_SHORT_5_6_5 */ { 8, 8, 8, 0 }, /* Any RGB with any GL_UNSIGNED_INT_8_8_8_8 */ { 8, 8, 8, 8 } /* Any RGBA with any GL_UNSIGNED_INT_8_8_8_8 */ }; - /* The following arrays are all indexed by the fb_type masked with 0x07. - * Given the four supported fb_type values, this results in valid array - * indices of 3, 4, 5, and 7. - */ - static const u_int32_t masks_table_rgb[8][4] = { - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + static const u_int32_t masks_table_rgb[6][4] = { + { 0x000000E0, 0x0000001C, 0x00000003, 0x00000000 }, /* 3_3_2 */ + { 0x00000007, 0x00000038, 0x000000C0, 0x00000000 }, /* 2_3_3_REV */ { 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 }, /* 5_6_5 */ { 0x0000001F, 0x000007E0, 0x0000F800, 0x00000000 }, /* 5_6_5_REV */ { 0xFF000000, 0x00FF0000, 0x0000FF00, 0x00000000 }, /* 8_8_8_8 */ - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, { 0x000000FF, 0x0000FF00, 0x00FF0000, 0x00000000 } /* 8_8_8_8_REV */ }; - static const u_int32_t masks_table_rgba[8][4] = { - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + static const u_int32_t masks_table_rgba[6][4] = { + { 0x000000E0, 0x0000001C, 0x00000003, 0x00000000 }, /* 3_3_2 */ + { 0x00000007, 0x00000038, 0x000000C0, 0x00000000 }, /* 2_3_3_REV */ { 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 }, /* 5_6_5 */ { 0x0000001F, 0x000007E0, 0x0000F800, 0x00000000 }, /* 5_6_5_REV */ { 0xFF000000, 0x00FF0000, 0x0000FF00, 0x000000FF }, /* 8_8_8_8 */ - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 }, /* 8_8_8_8_REV */ }; - static const u_int32_t masks_table_bgr[8][4] = { - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + static const u_int32_t masks_table_bgr[6][4] = { + { 0x00000007, 0x00000038, 0x000000C0, 0x00000000 }, /* 3_3_2 */ + { 0x000000E0, 0x0000001C, 0x00000003, 0x00000000 }, /* 2_3_3_REV */ { 0x0000001F, 0x000007E0, 0x0000F800, 0x00000000 }, /* 5_6_5 */ { 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 }, /* 5_6_5_REV */ { 0x0000FF00, 0x00FF0000, 0xFF000000, 0x00000000 }, /* 8_8_8_8 */ - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, { 0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000 }, /* 8_8_8_8_REV */ }; - static const u_int32_t masks_table_bgra[8][4] = { - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + static const u_int32_t masks_table_bgra[6][4] = { + { 0x00000007, 0x00000038, 0x000000C0, 0x00000000 }, /* 3_3_2 */ + { 0x000000E0, 0x0000001C, 0x00000003, 0x00000000 }, /* 2_3_3_REV */ { 0x0000001F, 0x000007E0, 0x0000F800, 0x00000000 }, /* 5_6_5 */ { 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 }, /* 5_6_5_REV */ { 0x0000FF00, 0x00FF0000, 0xFF000000, 0x000000FF }, /* 8_8_8_8 */ - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, { 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000 }, /* 8_8_8_8_REV */ }; - static const u_int8_t bytes_per_pixel[8] = { - 0, 0, 0, 2, 2, 4, 0, 4 + static const u_int8_t bytes_per_pixel[6] = { + 1, /* 3_3_2 */ + 1, /* 2_3_3_REV */ + 2, /* 5_6_5 */ + 2, /* 5_6_5_REV */ + 4, /* 8_8_8_8 */ + 4 /* 8_8_8_8_REV */ }; const u_int8_t * bits; const u_int32_t * masks; - const int index = fb_type & 0x07; + int index; __DRIconfig **configs, **c; __GLcontextModes *modes; unsigned i; @@ -610,10 +589,29 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type, unsigned num_modes; unsigned num_accum_bits = 2; - if ( bytes_per_pixel[ index ] == 0 ) { - fprintf( stderr, "[%s:%u] Framebuffer type 0x%04x has 0 bytes per pixel.\n", - __FUNCTION__, __LINE__, fb_type ); - return NULL; + switch ( fb_type ) { + case GL_UNSIGNED_BYTE_3_3_2: + index = 0; + break; + case GL_UNSIGNED_BYTE_2_3_3_REV: + index = 1; + break; + case GL_UNSIGNED_SHORT_5_6_5: + index = 2; + break; + case GL_UNSIGNED_SHORT_5_6_5_REV: + index = 3; + break; + case GL_UNSIGNED_INT_8_8_8_8: + index = 4; + break; + case GL_UNSIGNED_INT_8_8_8_8_REV: + index = 5; + break; + default: + fprintf( stderr, "[%s:%u] Unknown framebuffer type 0x%04x.\n", + __FUNCTION__, __LINE__, fb_type ); + return NULL; } @@ -625,35 +623,41 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type, switch ( fb_format ) { case GL_RGB: - bits = (bytes_per_pixel[ index ] == 2) - ? bits_table[0] : bits_table[1]; masks = masks_table_rgb[ index ]; break; case GL_RGBA: - bits = (bytes_per_pixel[ index ] == 2) - ? bits_table[0] : bits_table[2]; masks = masks_table_rgba[ index ]; break; case GL_BGR: - bits = (bytes_per_pixel[ index ] == 2) - ? bits_table[0] : bits_table[1]; masks = masks_table_bgr[ index ]; break; case GL_BGRA: - bits = (bytes_per_pixel[ index ] == 2) - ? bits_table[0] : bits_table[2]; masks = masks_table_bgra[ index ]; break; default: - fprintf( stderr, "[%s:%u] Framebuffer format 0x%04x is not GL_RGB, GL_RGBA, GL_BGR, or GL_BGRA.\n", - __FUNCTION__, __LINE__, fb_format ); + fprintf( stderr, "[%s:%u] Unknown framebuffer format 0x%04x.\n", + __FUNCTION__, __LINE__, fb_format ); return NULL; } + switch ( bytes_per_pixel[ index ] ) { + case 1: + bits = bits_table[0]; + break; + case 2: + bits = bits_table[1]; + break; + default: + bits = ((fb_format == GL_RGB) || (fb_format == GL_BGR)) + ? bits_table[2] + : bits_table[3]; + break; + } + num_modes = num_depth_stencil_bits * num_db_modes * num_accum_bits; configs = _mesa_calloc((num_modes + 1) * sizeof *configs); if (configs == NULL) @@ -755,3 +759,114 @@ const __DRIconfig **driConcatConfigs(__DRIconfig **a, __DRIconfig **b) return all; } + +#define __ATTRIB(attrib, field) \ + { attrib, offsetof(__GLcontextModes, field) } + +static const struct { unsigned int attrib, offset; } attribMap[] = { + __ATTRIB(__DRI_ATTRIB_BUFFER_SIZE, rgbBits), + __ATTRIB(__DRI_ATTRIB_LEVEL, level), + __ATTRIB(__DRI_ATTRIB_RED_SIZE, redBits), + __ATTRIB(__DRI_ATTRIB_GREEN_SIZE, greenBits), + __ATTRIB(__DRI_ATTRIB_BLUE_SIZE, blueBits), + __ATTRIB(__DRI_ATTRIB_ALPHA_SIZE, alphaBits), + __ATTRIB(__DRI_ATTRIB_DEPTH_SIZE, depthBits), + __ATTRIB(__DRI_ATTRIB_STENCIL_SIZE, stencilBits), + __ATTRIB(__DRI_ATTRIB_ACCUM_RED_SIZE, accumRedBits), + __ATTRIB(__DRI_ATTRIB_ACCUM_GREEN_SIZE, accumGreenBits), + __ATTRIB(__DRI_ATTRIB_ACCUM_BLUE_SIZE, accumBlueBits), + __ATTRIB(__DRI_ATTRIB_ACCUM_ALPHA_SIZE, accumAlphaBits), + __ATTRIB(__DRI_ATTRIB_SAMPLE_BUFFERS, sampleBuffers), + __ATTRIB(__DRI_ATTRIB_SAMPLES, samples), + __ATTRIB(__DRI_ATTRIB_DOUBLE_BUFFER, doubleBufferMode), + __ATTRIB(__DRI_ATTRIB_STEREO, stereoMode), + __ATTRIB(__DRI_ATTRIB_AUX_BUFFERS, numAuxBuffers), + __ATTRIB(__DRI_ATTRIB_TRANSPARENT_TYPE, transparentPixel), + __ATTRIB(__DRI_ATTRIB_TRANSPARENT_INDEX_VALUE, transparentPixel), + __ATTRIB(__DRI_ATTRIB_TRANSPARENT_RED_VALUE, transparentRed), + __ATTRIB(__DRI_ATTRIB_TRANSPARENT_GREEN_VALUE, transparentGreen), + __ATTRIB(__DRI_ATTRIB_TRANSPARENT_BLUE_VALUE, transparentBlue), + __ATTRIB(__DRI_ATTRIB_TRANSPARENT_ALPHA_VALUE, transparentAlpha), + __ATTRIB(__DRI_ATTRIB_FLOAT_MODE, floatMode), + __ATTRIB(__DRI_ATTRIB_RED_MASK, redMask), + __ATTRIB(__DRI_ATTRIB_GREEN_MASK, greenMask), + __ATTRIB(__DRI_ATTRIB_BLUE_MASK, blueMask), + __ATTRIB(__DRI_ATTRIB_ALPHA_MASK, alphaMask), + __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_WIDTH, maxPbufferWidth), + __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_HEIGHT, maxPbufferHeight), + __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_PIXELS, maxPbufferPixels), + __ATTRIB(__DRI_ATTRIB_OPTIMAL_PBUFFER_WIDTH, optimalPbufferWidth), + __ATTRIB(__DRI_ATTRIB_OPTIMAL_PBUFFER_HEIGHT, optimalPbufferHeight), + __ATTRIB(__DRI_ATTRIB_SWAP_METHOD, swapMethod), + __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_RGB, bindToTextureRgb), + __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_RGBA, bindToTextureRgba), + __ATTRIB(__DRI_ATTRIB_BIND_TO_MIPMAP_TEXTURE, bindToMipmapTexture), + __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_TARGETS, bindToTextureTargets), + __ATTRIB(__DRI_ATTRIB_YINVERTED, yInverted), + + /* The struct field doesn't matter here, these are handled by the + * switch in driGetConfigAttribIndex. We need them in the array + * so the iterator includes them though.*/ + __ATTRIB(__DRI_ATTRIB_RENDER_TYPE, level), + __ATTRIB(__DRI_ATTRIB_CONFIG_CAVEAT, level), + __ATTRIB(__DRI_ATTRIB_SWAP_METHOD, level) +}; + +#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0])) + +static int +driGetConfigAttribIndex(const __DRIconfig *config, + unsigned int index, unsigned int *value) +{ + switch (attribMap[index].attrib) { + case __DRI_ATTRIB_RENDER_TYPE: + if (config->modes.rgbMode) + *value = __DRI_ATTRIB_RGBA_BIT; + else + *value = __DRI_ATTRIB_COLOR_INDEX_BIT; + break; + case __DRI_ATTRIB_CONFIG_CAVEAT: + if (config->modes.visualRating == GLX_NON_CONFORMANT_CONFIG) + *value = __DRI_ATTRIB_NON_CONFORMANT_CONFIG; + else if (config->modes.visualRating == GLX_SLOW_CONFIG) + *value = __DRI_ATTRIB_SLOW_BIT; + else + *value = 0; + break; + case __DRI_ATTRIB_SWAP_METHOD: + break; + + default: + *value = *(unsigned int *) + ((char *) &config->modes + attribMap[index].offset); + + break; + } + + return GL_TRUE; +} + +int +driGetConfigAttrib(const __DRIconfig *config, + unsigned int attrib, unsigned int *value) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(attribMap); i++) + if (attribMap[i].attrib == attrib) + return driGetConfigAttribIndex(config, i, value); + + return GL_FALSE; +} + +int +driIndexConfigAttrib(const __DRIconfig *config, int index, + unsigned int *attrib, unsigned int *value) +{ + if (index >= 0 && index < ARRAY_SIZE(attribMap)) { + *attrib = attribMap[index].attrib; + return driGetConfigAttribIndex(config, index, value); + } + + return GL_FALSE; +} diff --git a/src/mesa/drivers/dri/common/utils.h b/src/mesa/drivers/dri/common/utils.h index 45a78e5ca5..0c09a7e68f 100644 --- a/src/mesa/drivers/dri/common/utils.h +++ b/src/mesa/drivers/dri/common/utils.h @@ -29,8 +29,11 @@ #ifndef DRI_DEBUG_H #define DRI_DEBUG_H +#include <GL/gl.h> +#include <GL/internal/dri_interface.h> #include "context.h" -#include "dri_util.h" + +typedef struct __DRIutilversionRec2 __DRIutilversion2; struct dri_debug_control { const char * string; @@ -84,6 +87,17 @@ struct dri_extension { const struct dri_extension_function * functions; }; +/** + * Used to store a version which includes a major range instead of a single + * major version number. + */ +struct __DRIutilversionRec2 { + int major_min; /** min allowed Major version number. */ + int major_max; /** max allowed Major version number. */ + int minor; /**< Minor version number. */ + int patch; /**< Patch-level. */ +}; + extern unsigned driParseDebugString( const char * debug, const struct dri_debug_control * control ); @@ -106,12 +120,14 @@ extern GLboolean driCheckDriDdxDrmVersions3(const char * driver_name, const __DRIversion * ddxActual, const __DRIutilversion2 * ddxExpected, const __DRIversion * drmActual, const __DRIversion * drmExpected); -extern GLint driIntersectArea( drm_clip_rect_t rect1, drm_clip_rect_t rect2 ); - extern GLboolean driClipRectToFramebuffer( const GLframebuffer *buffer, GLint *x, GLint *y, GLsizei *width, GLsizei *height ); +struct __DRIconfigRec { + __GLcontextModes modes; +}; + extern __DRIconfig ** driCreateConfigs(GLenum fb_format, GLenum fb_type, const u_int8_t * depth_bits, const u_int8_t * stencil_bits, @@ -120,4 +136,11 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type, const __DRIconfig **driConcatConfigs(__DRIconfig **a, __DRIconfig **b); +int +driGetConfigAttrib(const __DRIconfig *config, + unsigned int attrib, unsigned int *value); +int +driIndexConfigAttrib(const __DRIconfig *config, int index, + unsigned int *attrib, unsigned int *value); + #endif /* DRI_DEBUG_H */ diff --git a/src/mesa/drivers/dri/common/vblank.c b/src/mesa/drivers/dri/common/vblank.c index e81cc6886f..dc2cbcc84d 100644 --- a/src/mesa/drivers/dri/common/vblank.c +++ b/src/mesa/drivers/dri/common/vblank.c @@ -260,8 +260,8 @@ static int do_wait( drmVBlank * vbl, GLuint * vbl_seq, int fd ) if ( first_time ) { fprintf(stderr, "%s: drmWaitVBlank returned %d, IRQs don't seem to be" - " working correctly.\nTry running with LIBGL_THROTTLE_REFRESH" - " and LIBL_SYNC_REFRESH unset.\n", __FUNCTION__, ret); + " working correctly.\nTry adjusting the vblank_mode" + " configuration parameter.\n", __FUNCTION__, ret); first_time = GL_FALSE; } diff --git a/src/mesa/drivers/dri/common/xmlpool/Makefile b/src/mesa/drivers/dri/common/xmlpool/Makefile index b077809cd1..62ec919ea6 100644 --- a/src/mesa/drivers/dri/common/xmlpool/Makefile +++ b/src/mesa/drivers/dri/common/xmlpool/Makefile @@ -57,8 +57,8 @@ all: options.h # Only intermediate files are cleaned up. options.h is not deleted because # it's in CVS. clean: - rm -f $(POT) *~ - rm -rf $(LANGS) + -rm -f $(POT) *~ + -rm -rf $(LANGS) # Default target options.h options.h: t_options.h mo diff --git a/src/mesa/drivers/dri/glcore/Makefile b/src/mesa/drivers/dri/glcore/Makefile deleted file mode 100644 index 968190acfc..0000000000 --- a/src/mesa/drivers/dri/glcore/Makefile +++ /dev/null @@ -1,84 +0,0 @@ -# src/mesa/drivers/dri/glcore/Makefile - -TOP = ../../../../.. -include $(TOP)/configs/current - -LIBNAME = glcore_dri.so - -DRIVER_SOURCES = glcore_driver.c \ - $(TOP)/src/mesa/drivers/common/driverfuncs.c \ - ../common/dri_util.c - -C_SOURCES = \ - $(DRIVER_SOURCES) \ - $(DRI_SOURCES) - - -# Include directories -INCLUDE_DIRS = \ - -I. \ - -I../common \ - -I../dri_client \ - -I../dri_client/imports \ - -Iserver \ - -I$(TOP)/include \ - -I$(DRM_SOURCE_PATH)/shared-core \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main \ - -I$(TOP)/src/mesa/glapi \ - -I$(TOP)/src/mesa/math \ - -I$(TOP)/src/mesa/transform \ - -I$(TOP)/src/mesa/shader \ - -I$(TOP)/src/mesa/swrast \ - -I$(TOP)/src/mesa/swrast_setup - -# Core Mesa objects -MESA_MODULES = $(TOP)/src/mesa/libmesa.a - -# Libraries that the driver shared lib depends on -LIB_DEPS = -lm -lpthread -lc -# LIB_DEPS = -lGL -lm -lpthread -lc - - -ASM_SOURCES = - -OBJECTS = $(C_SOURCES:.c=.o) \ - $(ASM_SOURCES:.S=.o) - - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(DEFINES) $< -o $@ - -.S.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(DEFINES) $< -o $@ - - -##### TARGETS ##### - -default: depend $(TOP)/$(LIB_DIR)/$(LIBNAME) - - -$(TOP)/$(LIB_DIR)/$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile - CC="$(CC)" CXX="$(CXX)" $(TOP)/bin/mklib -o $(LIBNAME) -noprefix \ - -ldflags '$(LDFLAGS)' -install $(TOP)/$(LIB_DIR) \ - $(OBJECTS) $(WINLIB) $(LIB_DEPS) $(WINOBJ) $(MESA_MODULES) - - -depend: $(C_SOURCES) $(ASM_SOURCES) - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(C_SOURCES) $(ASM_SOURCES) \ - > /dev/null - - -# Emacs tags -tags: - etags `find . -name \*.[ch]` `find ../include` - - -clean: - -rm -f *.o server/*.o - - -include depend diff --git a/src/mesa/drivers/dri/glcore/glcore_driver.c b/src/mesa/drivers/dri/glcore/glcore_driver.c deleted file mode 100644 index 2577816041..0000000000 --- a/src/mesa/drivers/dri/glcore/glcore_driver.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright 2006 Red Hat, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER - * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * This implements a software-only "DRI" driver. It doesn't actually speak - * any DRI protocol or talk to the DRM, it just looks enough like a DRI driver - * that libglx in the server can load it for software rendering in the - * unaccelerated case. - */ - -static GLboolean -glcoreInitDriver(__DRIscreenPrivate *driScreenPriv) -{ -} - -static void -glcoreDestroyScreen(__DRIScreenPrivate *driScreenPriv) -{ -} - -static GLboolean -glcoreCreateContext(const __GLcontextModes *glVisual, - __DRIcontextPrivate *driContextPriv, - void *shared_context) -{ -} - -static void -glcoreDestroyContext(__DRIcontextPrivate *driContextPriv) -{ -} - -static GLboolean -glcoreCreateBuffer(__DRIscreenPrivate *driScreenPriv, - __DRIdrawablePrivate *driDrawablePriv, - const __GLcontextModes *mesaVisual, - GLboolean isPixmap) -{ -} - -static void -glcoreDestroyBuffer(__DRIdrawablePrivate *driDrawablePriv) -{ -} - -static void -glcoreSwapBuffers(__DRIdrawablePrivate *driDrawablePriv) -{ -} - -static GLboolean -glcoreMakeCurrent(__DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawablePriv, - __DRIdrawablePrivate *driReadablePriv) -{ -} - -static GLboolean -glcoreUnbindContext(__DRIcontextPrivate *driContextPriv) -{ -} - -static struct __DriverAPIRec glcore_api = { - .InitDriver = glcoreInitDriver, - .DestroyScreen = glcoreDestroyScreen, - .CreateContext = glcoreCreateContext, - .DestroyContext = glcoreDestroyContext, - .CreateBuffer = glcoreCreateBuffer, - .DestroyBuffer = glcoreDestroyBuffer, - .SwapBuffers = glcoreSwapBuffers, - .MakeCurrent = glcoreMakeCurrent, - .UnbindContext = glcoreUnbindContext, -}; - -static __GLcontextModes * -glcoreFillInModes(unsigned pixel_bits) -{ -} - -PUBLIC void * -__driCreateNewScreen_20050727(__DRInativeDisplay *dpy, int scrn, - __DRIscreen *psc, const __GLcontextModes *modes, - const __DRIversion *ddx_version, - const __DRIversion *dri_version, - const __DRIversion *drm_version, - const __DRIframebuffer *fb, drmAddress pSarea, - int fd, int internal_api_version, - const ___DRIinterfaceMethods *interface, - __GLcontextModes **driver_modes) -{ - __DRIscreenPrivate *driScreenPriv; - glcoreDriverPrivate *glcoreDriverPriv; - - /* would normally check ddx/dri/drm versions here */ - - driScreenPriv = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, ddx_version, - dri_version, drm_version, fb, - internal_api_version, &glcore_api); - if (!driScreenPriv) - return NULL; - - glcoreDriverPriv = driScreenPriv->pDrvPriv; - - *driver_modes = glcoreFillInModes(glcoreDriverPriv->bpp); - - driInitExtensions(NULL, NULL, GL_FALSE); - - return driScreenPriv; -} diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 6124ab6b0f..f90c5f7b08 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -257,10 +257,12 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; - GLuint i, ret; + GLuint i; GLuint ib_offset; dri_bo *ib_bo; GLboolean force_flush = GL_FALSE; + int ret; + if (ctx->NewState) _mesa_update_state( ctx ); @@ -316,6 +318,14 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, goto flush; } + /* Various fallback checks: + */ + if (brw->intel.Fallback) + goto out; + + if (check_fallbacks( brw, prim, nr_prims )) + goto out; + /* need to account for index buffer and vertex buffer */ if (ib) { ret = brw_prepare_indices( brw, ib , &ib_bo, &ib_offset); @@ -333,16 +343,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, force_flush = GL_TRUE; goto flush; } - - - - /* Various fallback checks: - */ - if (brw->intel.Fallback) - goto out; - - if (check_fallbacks( brw, prim, nr_prims )) - goto out; /* Upload index, vertex data: */ diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 026c8ed898..18ba02423d 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -401,6 +401,7 @@ int brw_prepare_vertices( struct brw_context *brw, */ copy_array_to_vbo_array(brw, upload[0], interleave); + ret |= dri_bufmgr_check_aperture_space(upload[0]->bo); for (i = 1; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->stride = interleave; @@ -413,13 +414,13 @@ int brw_prepare_vertices( struct brw_context *brw, else { /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { - copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); + copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); + if (upload[i]->bo) { + ret |= dri_bufmgr_check_aperture_space(upload[i]->bo); + } } } - if (brw->vb.upload.bo) { - ret |= dri_bufmgr_check_aperture_space(brw->vb.upload.bo); - } if (ret) return 1; diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index 58c81766cd..c0a18fa225 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -150,6 +150,7 @@ intel_batch_ioctl(struct intel_context *intel, return 0; } +#ifdef TTM_API int intel_exec_ioctl(struct intel_context *intel, GLuint used, @@ -183,3 +184,13 @@ intel_exec_ioctl(struct intel_context *intel, return 0; } +#else +int +int intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + struct drm_i915_gem_execbuffer *execbuf); +{ + return -EINVAL; +} +#endif diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 329af0d1b0..f1d6a6dbfc 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -172,14 +172,13 @@ timed_memcpy(void *dest, const void *src, size_t n) */ void intel_generate_mipmap(GLcontext *ctx, GLenum target, - const struct gl_texture_unit *texUnit, struct gl_texture_object *texObj) { struct intel_texture_object *intelObj = intel_texture_object(texObj); GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; int face, i; - _mesa_generate_mipmap(ctx, target, texUnit, texObj); + _mesa_generate_mipmap(ctx, target, texObj); /* Update the level information in our private data in the new images, since * it didn't get set as part of a normal TexImage path. @@ -198,6 +197,15 @@ intel_generate_mipmap(GLcontext *ctx, GLenum target, } } +static void intelGenerateMipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + intel_tex_map_images(intel, intelObj); + intel_generate_mipmap(ctx, target, texObj); + intel_tex_unmap_images(intel, intelObj); +} void intelInitTextureFuncs(struct dd_function_table *functions) @@ -221,6 +229,7 @@ intelInitTextureFuncs(struct dd_function_table *functions) functions->CopyTexSubImage2D = _swrast_copy_texsubimage2d; #endif functions->GetTexImage = intelGetTexImage; + functions->GenerateMipmap = intelGenerateMipmap; /* compressed texture functions */ functions->CompressedTexImage2D = intelCompressedTexImage2D; diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index 3a87137cc9..60ab8203e5 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -151,7 +151,6 @@ void intel_tex_unmap_images(struct intel_context *intel, int intel_compressed_num_bytes(GLuint mesaFormat); void intel_generate_mipmap(GLcontext *ctx, GLenum target, - const struct gl_texture_unit *texUnit, struct gl_texture_object *texObj); #endif diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 7facc469f4..1add7c6188 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -161,9 +161,7 @@ do_copy_texsubimage(struct intel_context *intel, /* GL_SGIS_generate_mipmap */ if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + intel_generate_mipmap(ctx, target, texObj); } return GL_TRUE; diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index bcb65835c6..95ddbd5920 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -522,9 +522,7 @@ intelTexImage(GLcontext * ctx, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + intel_generate_mipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, unpack); diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index 78621814c3..5428a1d068 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -103,9 +103,7 @@ intelTexSubimage(GLcontext * ctx, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + intel_generate_mipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index 7008832965..20482a4124 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -477,16 +477,18 @@ void r200CopyBuffer( __DRIdrawablePrivate *dPriv, if (rect->y2 < b->y2) b->y2 = rect->y2; - if (b->x1 < b->x2 && b->y1 < b->y2) - b++; + if (b->x1 >= b->x2 || b->y1 >= b->y2) + continue; } - else - b++; + b++; n++; } rmesa->sarea->nbox = n; + if (!n) + continue; + ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); if ( ret ) { diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 44248964fd..5b2bd0bc2b 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -39,6 +39,7 @@ DRIVER_SOURCES = \ r300_texstate.c \ r300_vertprog.c \ r300_fragprog.c \ + r500_fragprog.c \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 3497738eac..8d4d604ba9 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -164,7 +164,7 @@ static inline void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) r300->cmdbuf.count_used++; /* Emit cache flush */ - *dest = cmdpacket0(R300_TX_CNTL, 1); + *dest = cmdpacket0(R300_TX_INVALTAGS, 1); dest++; r300->cmdbuf.count_used++; @@ -242,6 +242,7 @@ void r300EmitState(r300ContextPtr r300) #define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) +#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) static int check_always(r300ContextPtr r300, struct r300_state_atom *atom) { @@ -262,6 +263,20 @@ static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom) return cnt ? (cnt * 4) + 1 : 0; } +static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 6) + 1 : 0; +} + +static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 4) + 1 : 0; +} + #define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ do { \ r300->hw.ATOM.cmd_size = (SZ); \ @@ -281,10 +296,15 @@ void r300InitCmdBuf(r300ContextPtr r300) { int size, mtu; int has_tcl = 1; + int is_r500 = 0; + int i; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ mtu = r300->radeon.glCtx->Const.MaxTextureUnits; @@ -299,8 +319,15 @@ void r300InitCmdBuf(r300ContextPtr r300) /* Initialize state atoms */ ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0); r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6); - ALLOC_STATE(vap_cntl, always, 2, 0); - r300->hw.vap_cntl.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1); + ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0; + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1); + if (is_r500) { + ALLOC_STATE(vap_index_offset, always, 2, 0); + r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1); + r300->hw.vap_index_offset.cmd[1] = 0; + } ALLOC_STATE(vte, always, 3, 0); r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2); ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0); @@ -309,12 +336,12 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1); ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0); r300->hw.vir[0].cmd[R300_VIR_CMD_0] = - cmdpacket0(R300_VAP_INPUT_ROUTE_0_0, 1); + cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1); ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1); r300->hw.vir[1].cmd[R300_VIR_CMD_0] = - cmdpacket0(R300_VAP_INPUT_ROUTE_1_0, 1); + cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0); - r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_INPUT_CNTL_0, 2); + r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2); ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0); r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE); @@ -322,7 +349,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(vap_clip_cntl, always, 2, 0); r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1); ALLOC_STATE(vap_clip, always, 5, 0); - r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_CLIP_X_0, 4); + r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4); ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0); r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1); } @@ -334,7 +361,7 @@ void r300InitCmdBuf(r300ContextPtr r300) if (has_tcl) { ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0); r300->hw.pvs.cmd[R300_PVS_CMD_0] = - cmdpacket0(R300_VAP_PVS_CNTL_1, 3); + cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3); } ALLOC_STATE(gb_enable, always, 2, 0); @@ -344,69 +371,99 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1); ALLOC_STATE(ga_point_s0, always, 5, 0); - r300->hw.ga_point_s0.cmd[0] = cmdpacket0(GA_POINT_S0, 4); + r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4); ALLOC_STATE(ga_triangle_stipple, always, 2, 0); - r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(GA_TRIANGLE_STIPPLE, 1); + r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1); ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0); r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1); ALLOC_STATE(ga_point_minmax, always, 4, 0); r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3); ALLOC_STATE(lcntl, always, 2, 0); - r300->hw.lcntl.cmd[0] = cmdpacket0(GA_LINE_CNTL, 1); + r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1); ALLOC_STATE(ga_line_stipple, always, 4, 0); r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3); ALLOC_STATE(shade, always, 5, 0); - r300->hw.shade.cmd[0] = cmdpacket0(GA_ENHANCE, 4); + r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4); ALLOC_STATE(polygon_mode, always, 4, 0); - r300->hw.polygon_mode.cmd[0] = cmdpacket0(GA_POLY_MODE, 3); + r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3); ALLOC_STATE(fogp, always, 3, 0); - r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2); + r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2); ALLOC_STATE(zbias_cntl, always, 2, 0); - r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_RE_ZBIAS_CNTL, 1); + r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1); ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0); r300->hw.zbs.cmd[R300_ZBS_CMD_0] = - cmdpacket0(R300_RE_ZBIAS_T_FACTOR, 4); + cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); ALLOC_STATE(occlusion_cntl, always, 2, 0); - r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1); + r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1); ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0); - r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_RE_CULL_CNTL, 1); + r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1); ALLOC_STATE(su_depth_scale, always, 3, 0); r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2); ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2); - ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); - r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); - ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1); + if (is_r500) { + ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16); + for (i = 0; i < 8; i++) { + r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] = + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); + } + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1); + } else { + ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1); + } ALLOC_STATE(sc_hyperz, always, 3, 0); r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2); ALLOC_STATE(sc_screendoor, always, 2, 0); r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1); - ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); - r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); - r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4); - ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); - r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); ALLOC_STATE(us_out_fmt, always, 6, 0); - r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); - ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); - r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); - ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); - r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1); - ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); - r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); - ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); - r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5); + + if (is_r500) { + ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2); + r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO; + r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3); + r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1); + r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */ + + ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); + ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); + r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); + } else { + ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3); + r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4); + ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); + r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0); + + ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); + r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1); + ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); + r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1); + ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); + r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1); + ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); + r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1); + ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); + r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); + } ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); - r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(FG_FOG_BLEND, 1); + r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1); ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0); - r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(FG_FOG_COLOR_R, 3); + r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3); ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0); - r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(FG_ALPHA_FUNC, 2); + r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2); ALLOC_STATE(fg_depth_src, always, 2, 0); r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1); - ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); - r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); ALLOC_STATE(rb3d_cctl, always, 2, 0); r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1); ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0); @@ -421,45 +478,61 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9); ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); - r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(RB3D_AARESOLVE_CTL, 1); + r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1); ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); r300->hw.zs.cmd[R300_ZS_CMD_0] = - cmdpacket0(R300_RB3D_ZSTENCIL_CNTL_0, 3); + cmdpacket0(R300_ZB_CNTL, 3); ALLOC_STATE(zstencil_format, always, 5, 0); r300->hw.zstencil_format.cmd[0] = - cmdpacket0(ZB_FORMAT, 4); + cmdpacket0(R300_ZB_FORMAT, 4); ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); - r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(ZB_DEPTHOFFSET, 2); + r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2); ALLOC_STATE(zb_depthclearvalue, always, 2, 0); - r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(ZB_DEPTHCLEARVALUE, 1); + r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1); ALLOC_STATE(unk4F30, always, 3, 0); r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2); ALLOC_STATE(zb_hiz_offset, always, 2, 0); - r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(ZB_HIZ_OFFSET, 1); + r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1); ALLOC_STATE(zb_hiz_pitch, always, 2, 0); - r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(ZB_HIZ_PITCH, 1); + r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1); /* VPU only on TCL */ if (has_tcl) { int i; ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); r300->hw.vpi.cmd[R300_VPI_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_PROGRAM, 0); + cmdvpu(R300_PVS_CODE_START, 0); - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[R300_VPP_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_PARAMETERS, 0); + if (is_r500) { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[R300_VPP_CMD_0] = + cmdvpu(R500_PVS_CONST_START, 0); - ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[R300_VPS_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1); + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[R300_VPS_CMD_0] = + cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1); - for (i = 0; i < 6; i++) { - ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); - r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1); + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = + cmdvpu(R500_PVS_UCP_START + i, 1); + } + } else { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[R300_VPP_CMD_0] = + cmdvpu(R300_PVS_CONST_START, 0); + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[R300_VPS_CMD_0] = + cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1); + + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = + cmdvpu(R300_PVS_UCP_START + i, 1); + } } } diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index c56a762289..31cc00a081 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -278,6 +278,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxTextureCoordUnits); ctx->Const.MaxTextureMaxAnisotropy = 16.0; + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ctx->Const.MaxTextureLevels = 13; + ctx->Const.MaxTextureRectSize = 4096; + } + ctx->Const.MinPointSize = 1.0; ctx->Const.MinPointSizeAA = 1.0; ctx->Const.MaxPointSize = R300_POINTSIZE_MAX; diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 780d9aa5d2..a5ec5ee46e 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -74,6 +74,7 @@ typedef struct r300_context *r300ContextPtr; #include "r300_vertprog.h" #include "r300_fragprog.h" +#include "r500_fragprog.h" /** * This function takes a float and packs it into a uint32_t @@ -330,6 +331,8 @@ struct r300_state_atom { #define R300_RI_INTERP_7 8 #define R300_RI_CMDSIZE 9 +#define R500_RI_CMDSIZE 17 + #define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */ #define R300_RR_INST_0 1 #define R300_RR_INST_1 2 @@ -352,6 +355,17 @@ struct r300_state_atom { #define R300_FP_NODE3 8 #define R300_FP_CMDSIZE 9 +#define R500_FP_CMD_0 0 +#define R500_FP_CNTL 1 +#define R500_FP_PIXSIZE 2 +#define R500_FP_CMD_1 3 +#define R500_FP_CODE_ADDR 4 +#define R500_FP_CODE_RANGE 5 +#define R500_FP_CODE_OFFSET 6 +#define R500_FP_CMD_2 7 +#define R500_FP_FC_CNTL 8 +#define R500_FP_CMDSIZE 9 + #define R300_FPT_CMD_0 0 #define R300_FPT_INSTR_0 1 #define R300_FPT_CMDSIZE 65 @@ -359,10 +373,14 @@ struct r300_state_atom { #define R300_FPI_CMD_0 0 #define R300_FPI_INSTR_0 1 #define R300_FPI_CMDSIZE 65 +/* R500 has space for 512 instructions - 6 dwords per instruction */ +#define R500_FPI_CMDSIZE (512*6+1) #define R300_FPP_CMD_0 0 #define R300_FPP_PARAM_0 1 #define R300_FPP_CMDSIZE (32*4+1) +/* R500 has spcae for 256 constants - 4 dwords per constant */ +#define R500_FPP_CMDSIZE (256*4+1) #define R300_FOGS_CMD_0 0 #define R300_FOGS_STATE 1 @@ -410,6 +428,12 @@ struct r300_state_atom { #define R300_ZB_PITCH 2 #define R300_ZB_CMDSIZE 3 +#define R300_VAP_CNTL_FLUSH 0 +#define R300_VAP_CNTL_FLUSH_1 1 +#define R300_VAP_CNTL_CMD 2 +#define R300_VAP_CNTL_INSTR 3 +#define R300_VAP_CNTL_SIZE 4 + #define R300_VPI_CMD_0 0 #define R300_VPI_INSTR_0 1 #define R300_VPI_CMDSIZE 1025 /* 256 16 byte instructions */ @@ -451,6 +475,7 @@ struct r300_hw_state { struct r300_state_atom vpt; /* viewport (1D98) */ struct r300_state_atom vap_cntl; + struct r300_state_atom vap_index_offset; /* 0x208c r5xx only */ struct r300_state_atom vof; /* VAP output format register 0x2090 */ struct r300_state_atom vte; /* (20B0) */ struct r300_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */ @@ -473,7 +498,7 @@ struct r300_hw_state { struct r300_state_atom shade; struct r300_state_atom polygon_mode; struct r300_state_atom fogp; /* fog parameters (4294) */ - struct r300_state_atom unk429C; /* (429C) */ + struct r300_state_atom ga_soft_reset; /* (429C) */ struct r300_state_atom zbias_cntl; struct r300_state_atom zbs; /* zbias (42A4) */ struct r300_state_atom occlusion_cntl; @@ -487,6 +512,8 @@ struct r300_hw_state { struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */ struct r300_state_atom fpt; /* texi - (4620) */ struct r300_state_atom us_out_fmt; /* (46A4) */ + struct r300_state_atom r500fp; /* r500 fp instructions */ + struct r300_state_atom r500fp_const; /* r500 fp constants */ struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ struct r300_state_atom fogs; /* fog state (4BC0) */ struct r300_state_atom fogc; /* fog color (4BC8) */ @@ -767,6 +794,47 @@ struct r300_fragment_program { int max_temp_idx; + GLboolean WritesDepth; + GLuint optimization; +}; + +struct r500_fragment_program { + struct gl_fragment_program mesa_program; + + GLcontext *ctx; + GLboolean translated; + GLboolean error; + struct r300_pfs_compile_state *cs; + + struct { + GLuint inst0; + GLuint inst1; + GLuint inst2; + GLuint inst3; + GLuint inst4; + GLuint inst5; + } inst[512]; + /* TODO: This is magic! */ + + int temp_reg_offset; + + int inst_offset; + int inst_end; + + /* Hardware constants. + * Contains a pointer to the value. The destination of the pointer + * is supposed to be updated when GL state changes. + * Typically, this is either a pointer into + * gl_program_parameter_list::ParameterValues, or a pointer to a + * global constant (e.g. for sin/cos-approximation) + */ + const GLfloat *constant[PFS_NUM_CONST_REGS]; + int const_nr; + + int max_temp_idx; + + GLboolean writes_depth; + GLuint optimization; }; @@ -804,7 +872,7 @@ struct r300_state { */ struct r300_swtcl_info { GLuint RenderIndex; - + /** * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is * installed in the Mesa state vector. diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index e7371133d3..2ea17ad0a7 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -207,7 +207,10 @@ static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb, } } -static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, +#define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \ + (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT) + +GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, int *inputs, GLint * tab, GLuint nr) { GLuint i, dw; @@ -216,14 +219,15 @@ static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, for (i = 0; i < nr; i += 2) { /* make sure input is valid, would lockup the gpu */ assert(inputs[tab[i]] != -1); - dw = R300_INPUT_ROUTE_FLOAT | (inputs[tab[i]] << 8) | (attribptr[tab[i]]->size - 1); + dw = (R300_SIGNED | DW_SIZE(i)); if (i + 1 == nr) { - dw |= R300_VAP_INPUT_ROUTE_END; + dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; } else { assert(inputs[tab[i + 1]] != -1); - dw |= (R300_INPUT_ROUTE_FLOAT | (inputs[tab[i + 1]] << 8) | (attribptr[tab[i + 1]]->size - 1)) << 16; + dw |= (R300_SIGNED | + DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT; if (i + 2 == nr) { - dw |= (R300_VAP_INPUT_ROUTE_END << 16); + dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; } } dst[i >> 1] = dw; @@ -234,10 +238,10 @@ static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, static GLuint r300VAPInputRoute1Swizzle(int swizzle[4]) { - return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) | - (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) | - (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) | - (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT); + return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT); } GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) @@ -245,9 +249,13 @@ GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) GLuint i, dw; for (i = 0; i < nr; i += 2) { - dw = r300VAPInputRoute1Swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE; + dw = (r300VAPInputRoute1Swizzle(swizzle[i]) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | + R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT; if (i + 1 < nr) { - dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) << 16; + dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | + R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; } dst[i >> 1] = dw; } @@ -542,10 +550,10 @@ void r300EmitCacheFlush(r300ContextPtr rmesa) drm_radeon_cmd_header_t *cmd = NULL; reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | - RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - reg_start(ZB_ZCACHE_CTLSTAT, 0); - e32(ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + reg_start(R300_ZB_ZCACHE_CTLSTAT, 0); + e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); } diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index a6d69ec5ff..e6a6df8c4c 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -74,6 +74,20 @@ static inline uint32_t cmdvpu(int addr, int count) return cmd.u; } +static inline uint32_t cmdr500fp(int addr, int count, int type, int clamp) +{ + drm_r300_cmd_header_t cmd; + + cmd.r500fp.cmd_type = R300_CMD_R500FP; + cmd.r500fp.count = count; + cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8; + cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0; + cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0; + cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF); + + return cmd.u; +} + static inline uint32_t cmdpacket3(int packet) { drm_r300_cmd_header_t cmd; @@ -166,6 +180,19 @@ static inline uint32_t cmdpacify(void) cmd[0].i = cmdvpu((dest), _n/4); \ } while (0); +#define r500fp_start_fragment(dest, length) \ + do { \ + int _n; \ + _n = (length); \ + cmd = (drm_radeon_cmd_header_t*) \ + r300AllocCmdBuf(rmesa, \ + (_n+1), \ + __FUNCTION__); \ + cmd_reserved = _n+1; \ + cmd_written =1; \ + cmd[0].i = cmdr500fp((dest), _n/6, 0, 0); \ + } while (0); + #define start_packet3(packet, count) \ { \ int _n; \ @@ -230,6 +257,8 @@ extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); extern void r300EmitCacheFlush(r300ContextPtr rmesa); +extern GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, + int *inputs, GLint * tab, GLuint nr); extern GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr); extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index c664fb6562..54b80d20a1 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -172,19 +172,19 @@ static const struct { int s_op; } r300_fpop[] = { /* *INDENT-OFF* */ - {"MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD}, - {"DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4}, - {"DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4}, - {"MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN}, - {"MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX}, - {"CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP}, - {"FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC}, - {"EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2}, - {"LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2}, - {"RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP}, - {"RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ}, - {"REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL}, - {"CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL}, + {"MAD", 3, R300_ALU_OUTC_MAD, R300_ALU_OUTA_MAD}, + {"DP3", 2, R300_ALU_OUTC_DP3, R300_ALU_OUTA_DP4}, + {"DP4", 2, R300_ALU_OUTC_DP4, R300_ALU_OUTA_DP4}, + {"MIN", 2, R300_ALU_OUTC_MIN, R300_ALU_OUTA_MIN}, + {"MAX", 2, R300_ALU_OUTC_MAX, R300_ALU_OUTA_MAX}, + {"CMP", 3, R300_ALU_OUTC_CMP, R300_ALU_OUTA_CMP}, + {"FRC", 1, R300_ALU_OUTC_FRC, R300_ALU_OUTA_FRC}, + {"EX2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_EX2}, + {"LG2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_LG2}, + {"RCP", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RCP}, + {"RSQ", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RSQ}, + {"REPL_ALPHA", 1, R300_ALU_OUTC_REPL_ALPHA, PFS_INVAL}, + {"CMPH", 3, R300_ALU_OUTC_CMPH, PFS_INVAL}, /* *INDENT-ON* */ }; @@ -209,17 +209,17 @@ static const struct r300_pfs_swizzle { GLuint flags; } v_swiz[] = { /* *INDENT-OFF* */ - {MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, - {MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, - {MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, - {MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0}, + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}, {PFS_INVAL, 0, 0, 0}, /* *INDENT-ON* */ }; @@ -252,13 +252,13 @@ static const struct { GLuint flags; } s_swiz[] = { /* *INDENT-OFF* */ - {R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, - {R300_FPI2_ARGA_ZERO, 0, 0}, - {R300_FPI2_ARGA_ONE, 0, 0}, - {R300_FPI2_ARGA_HALF, 0, 0} + {R300_ALU_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, + {R300_ALU_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, + {R300_ALU_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, + {R300_ALU_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, + {R300_ALU_ARGA_ZERO, 0, 0}, + {R300_ALU_ARGA_ONE, 0, 0}, + {R300_ALU_ARGA_HALF, 0, 0} /* *INDENT-ON* */ }; @@ -859,11 +859,12 @@ static int t_hw_dst(struct r300_fragment_program *fp, switch (index) { case FRAG_RESULT_COLR: fp->node[fp->cur_node].flags |= - R300_PFS_NODE_OUTPUT_COLOR; + R300_RGBA_OUT; break; case FRAG_RESULT_DEPR: + fp->WritesDepth = GL_TRUE; fp->node[fp->cur_node].flags |= - R300_PFS_NODE_OUTPUT_DEPTH; + R300_W_OUT; break; } return index; @@ -903,49 +904,59 @@ static void emit_tex(struct r300_fragment_program *fp, int hwsrc, hwdest; GLuint tempreg = 0; + /** + * Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + * + * \todo Refactor this once we have proper rewriting/optimization + * support for programs. + */ + if (opcode != R300_TEX_OP_KIL && fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + int factor_index; + GLuint factorreg; + + tokens[2] = unit; + factor_index = + _mesa_add_state_reference(fp->mesa_program.Base. + Parameters, tokens); + factorreg = + emit_const4fv(fp, + fp->mesa_program.Base.Parameters-> + ParameterValues[factor_index]); + tempreg = keep(get_temp_reg(fp)); + + emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + coord, factorreg, pfs_zero, 0); + + coord = tempreg; + } + + /* Texture operations do not support swizzles etc. in hardware, + * so emit an additional arithmetic operation if necessary. + */ + if (REG_GET_VSWZ(coord) != SWIZZLE_XYZ || + REG_GET_SSWZ(coord) != SWIZZLE_W || + coord & (REG_NEGV_MASK | REG_NEGS_MASK | REG_ABS_MASK)) { + assert(tempreg == 0); + tempreg = keep(get_temp_reg(fp)); + emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + coord, pfs_one, pfs_zero, 0); + coord = tempreg; + } + + /* Ensure correct node indirection */ uin = cs->used_in_node; din = cs->dest_in_node; /* Resolve source/dest to hardware registers */ - if (opcode != R300_FPITX_OP_KIL) { - if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { - /** - * Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - * - * \todo Refactor this once we have proper rewriting/optimization - * support for programs. - */ - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - int factor_index; - GLuint factorreg; - - tokens[2] = unit; - factor_index = - _mesa_add_state_reference(fp->mesa_program.Base. - Parameters, tokens); - factorreg = - emit_const4fv(fp, - fp->mesa_program.Base.Parameters-> - ParameterValues[factor_index]); - tempreg = keep(get_temp_reg(fp)); - - emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, - coord, factorreg, pfs_zero, 0); - - /* Ensure correct node indirection */ - uin = cs->used_in_node; - din = cs->dest_in_node; - - hwsrc = t_hw_src(fp, tempreg, GL_TRUE); - } else { - hwsrc = t_hw_src(fp, coord, GL_TRUE); - } + hwsrc = t_hw_src(fp, coord, GL_TRUE); + if (opcode != R300_TEX_OP_KIL) { dest = t_dst(fp, fpi->DstReg); /* r300 doesn't seem to be able to do TEX->output reg */ @@ -972,7 +983,6 @@ static void emit_tex(struct r300_fragment_program *fp, } else { hwdest = 0; unit = 0; - hwsrc = t_hw_src(fp, coord, GL_TRUE); } /* Indirection if source has been written in this node, or if the @@ -1007,11 +1017,10 @@ static void emit_tex(struct r300_fragment_program *fp, if (fp->cur_node == 0) fp->first_node_has_tex = 1; - fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT) - | (hwdest << R300_FPITX_DST_SHIFT) - | (unit << R300_FPITX_IMAGE_SHIFT) - /* not entirely sure about this */ - | (opcode << R300_FPITX_OPCODE_SHIFT); + fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_SRC_ADDR_SHIFT) + | (hwdest << R300_DST_ADDR_SHIFT) + | (unit << R300_TEX_ID_SHIFT) + | (opcode << R300_TEX_INST_SHIFT); cs->dest_in_node |= (1 << hwdest); if (REG_GET_TYPE(coord) != REG_TYPE_CONST) @@ -1228,17 +1237,17 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, } // Emit the source fetch code - fp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; + fp->alu.inst[pos].inst1 &= ~R300_ALU_SRC_MASK; fp->alu.inst[pos].inst1 |= - ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | - (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | - (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); + ((cs->slot[pos].vsrc[0] << R300_ALU_SRC0C_SHIFT) | + (cs->slot[pos].vsrc[1] << R300_ALU_SRC1C_SHIFT) | + (cs->slot[pos].vsrc[2] << R300_ALU_SRC2C_SHIFT)); - fp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK; + fp->alu.inst[pos].inst3 &= ~R300_ALU_SRC_MASK; fp->alu.inst[pos].inst3 |= - ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | - (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | - (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); + ((cs->slot[pos].ssrc[0] << R300_ALU_SRC0A_SHIFT) | + (cs->slot[pos].ssrc[1] << R300_ALU_SRC1A_SHIFT) | + (cs->slot[pos].ssrc[2] << R300_ALU_SRC2A_SHIFT)); // Emit the argument selection code if (emit_vop) { @@ -1257,17 +1266,17 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, ARG_ABS : 0); } else { - swz[i] = R300_FPI0_ARGC_ZERO; + swz[i] = R300_ALU_ARGC_ZERO; } } fp->alu.inst[pos].inst0 &= - ~(R300_FPI0_ARG0C_MASK | R300_FPI0_ARG1C_MASK | - R300_FPI0_ARG2C_MASK); + ~(R300_ALU_ARG0C_MASK | R300_ALU_ARG1C_MASK | + R300_ALU_ARG2C_MASK); fp->alu.inst[pos].inst0 |= - (swz[0] << R300_FPI0_ARG0C_SHIFT) | (swz[1] << - R300_FPI0_ARG1C_SHIFT) - | (swz[2] << R300_FPI0_ARG2C_SHIFT); + (swz[0] << R300_ALU_ARG0C_SHIFT) | (swz[1] << + R300_ALU_ARG1C_SHIFT) + | (swz[2] << R300_ALU_ARG2C_SHIFT); } if (emit_sop) { @@ -1286,17 +1295,17 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, ARG_ABS : 0); } else { - swz[i] = R300_FPI2_ARGA_ZERO; + swz[i] = R300_ALU_ARGA_ZERO; } } fp->alu.inst[pos].inst2 &= - ~(R300_FPI2_ARG0A_MASK | R300_FPI2_ARG1A_MASK | - R300_FPI2_ARG2A_MASK); + ~(R300_ALU_ARG0A_MASK | R300_ALU_ARG1A_MASK | + R300_ALU_ARG2A_MASK); fp->alu.inst[pos].inst2 |= - (swz[0] << R300_FPI2_ARG0A_SHIFT) | (swz[1] << - R300_FPI2_ARG1A_SHIFT) - | (swz[2] << R300_FPI2_ARG2A_SHIFT); + (swz[0] << R300_ALU_ARG0A_SHIFT) | (swz[1] << + R300_ALU_ARG1A_SHIFT) + | (swz[2] << R300_ALU_ARG2A_SHIFT); } return pos; @@ -1333,9 +1342,9 @@ static void emit_arith(struct r300_fragment_program *fp, emit_vop = GL_FALSE; emit_sop = GL_FALSE; - if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) + if ((mask & WRITEMASK_XYZ) || vop == R300_ALU_OUTC_DP3) emit_vop = GL_TRUE; - if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) + if ((mask & WRITEMASK_W) || vop == R300_ALU_OUTC_REPL_ALPHA) emit_sop = GL_TRUE; pos = @@ -1347,33 +1356,33 @@ static void emit_arith(struct r300_fragment_program *fp, hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ if (flags & PFS_FLAG_SAT) { - vop |= R300_FPI0_OUTC_SAT; - sop |= R300_FPI2_OUTA_SAT; + vop |= R300_ALU_OUTC_CLAMP; + sop |= R300_ALU_OUTA_CLAMP; } - /* Throw the pieces together and get FPI0/1 */ + /* Throw the pieces together and get ALU/1 */ if (emit_vop) { fp->alu.inst[pos].inst0 |= vop; - fp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; + fp->alu.inst[pos].inst1 |= hwdest << R300_ALU_DSTC_SHIFT; if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { fp->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << - R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; + R300_ALU_DSTC_OUTPUT_MASK_SHIFT; } else assert(0); } else { fp->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << - R300_FPI1_DSTC_REG_MASK_SHIFT; + R300_ALU_DSTC_REG_MASK_SHIFT; cs->hwtemps[hwdest].vector_valid = pos + 1; } } - /* And now FPI2/3 */ + /* And now ALU/3 */ if (emit_sop) { fp->alu.inst[pos].inst2 |= sop; @@ -1381,18 +1390,18 @@ static void emit_arith(struct r300_fragment_program *fp, if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { fp->alu.inst[pos].inst3 |= - (hwdest << R300_FPI3_DSTA_SHIFT) | - R300_FPI3_DSTA_OUTPUT; + (hwdest << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_OUTPUT; } else if (REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { fp->alu.inst[pos].inst3 |= - R300_FPI3_DSTA_DEPTH; + R300_ALU_DSTA_DEPTH; } else assert(0); } else { fp->alu.inst[pos].inst3 |= - (hwdest << R300_FPI3_DSTA_SHIFT) | - R300_FPI3_DSTA_REG; + (hwdest << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; cs->hwtemps[hwdest].scalar_valid = pos + 1; } @@ -1708,7 +1717,7 @@ static GLboolean parse_program(struct r300_fragment_program *fp) src[0], undef, undef, flags); break; case OPCODE_KIL: - emit_tex(fp, fpi, R300_FPITX_OP_KIL); + emit_tex(fp, fpi, R300_TEX_OP_KIL); break; case OPCODE_LG2: src[0] = t_scalar_src(fp, fpi->SrcReg[0]); @@ -1943,13 +1952,13 @@ static GLboolean parse_program(struct r300_fragment_program *fp) src[0], pfs_one, negate(src[1]), flags); break; case OPCODE_TEX: - emit_tex(fp, fpi, R300_FPITX_OP_TEX); + emit_tex(fp, fpi, R300_TEX_OP_LD); break; case OPCODE_TXB: - emit_tex(fp, fpi, R300_FPITX_OP_TXB); + emit_tex(fp, fpi, R300_TEX_OP_TXB); break; case OPCODE_TXP: - emit_tex(fp, fpi, R300_FPITX_OP_TXP); + emit_tex(fp, fpi, R300_TEX_OP_TXP); break; case OPCODE_XPD:{ src[0] = t_src(fp, fpi->SrcReg[0]); @@ -2097,6 +2106,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) fp->translated = GL_FALSE; fp->error = GL_FALSE; fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); + fp->WritesDepth = GL_FALSE; fp->tex.length = 0; fp->cur_node = 0; fp->first_node_has_tex = 0; @@ -2217,6 +2227,7 @@ static void update_params(struct r300_fragment_program *fp) void r300TranslateFragmentShader(r300ContextPtr r300, struct r300_fragment_program *fp) { + struct r300_pfs_compile_state *cs = NULL; if (!fp->translated) { @@ -2281,18 +2292,18 @@ static void dump_program(struct r300_fragment_program *fp) const char *instr; switch ((fp->tex. - inst[i] >> R300_FPITX_OPCODE_SHIFT) & + inst[i] >> R300_TEX_INST_SHIFT) & 15) { - case R300_FPITX_OP_TEX: + case R300_TEX_OP_LD: instr = "TEX"; break; - case R300_FPITX_OP_KIL: + case R300_TEX_OP_KIL: instr = "KIL"; break; - case R300_FPITX_OP_TXP: + case R300_TEX_OP_TXP: instr = "TXP"; break; - case R300_FPITX_OP_TXB: + case R300_TEX_OP_TXB: instr = "TXB"; break; default: @@ -2303,15 +2314,13 @@ static void dump_program(struct r300_fragment_program *fp) " %s t%i, %c%i, texture[%i] (%08x)\n", instr, (fp->tex. - inst[i] >> R300_FPITX_DST_SHIFT) & 31, - (fp->tex. - inst[i] & R300_FPITX_SRC_CONST) ? 'c' : + inst[i] >> R300_DST_ADDR_SHIFT) & 31, 't', (fp->tex. - inst[i] >> R300_FPITX_SRC_SHIFT) & 31, + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, (fp->tex. - inst[i] & R300_FPITX_IMAGE_MASK) >> - R300_FPITX_IMAGE_SHIFT, + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, fp->tex.inst[i]); } } @@ -2337,45 +2346,45 @@ static void dump_program(struct r300_fragment_program *fp) dstc[0] = 0; sprintf(flags, "%s%s%s", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "", + inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "", + inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : ""); + inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(dstc, "t%i.%s ", (fp->alu.inst[i]. - inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + inst1 >> R300_ALU_DSTC_SHIFT) & 31, flags); } sprintf(flags, "%s%s%s", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "", + inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "", + inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : ""); + inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(tmp, "o%i.%s", (fp->alu.inst[i]. - inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + inst1 >> R300_ALU_DSTC_SHIFT) & 31, flags); strcat(dstc, tmp); } dsta[0] = 0; - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) { + if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { sprintf(dsta, "t%i.w ", (fp->alu.inst[i]. - inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + inst3 >> R300_ALU_DSTA_SHIFT) & 31); } - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) { + if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", (fp->alu.inst[i]. - inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + inst3 >> R300_ALU_DSTA_SHIFT) & 31); strcat(dsta, tmp); } - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) { + if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { strcat(dsta, "Z"); } @@ -2395,19 +2404,19 @@ static void dump_program(struct r300_fragment_program *fp) d = regc & 31; if (d < 12) { switch (d % 4) { - case R300_FPI0_ARGC_SRC0C_XYZ: + case R300_ALU_ARGC_SRC0C_XYZ: sprintf(buf, "%s.xyz", srcc[d / 4]); break; - case R300_FPI0_ARGC_SRC0C_XXX: + case R300_ALU_ARGC_SRC0C_XXX: sprintf(buf, "%s.xxx", srcc[d / 4]); break; - case R300_FPI0_ARGC_SRC0C_YYY: + case R300_ALU_ARGC_SRC0C_YYY: sprintf(buf, "%s.yyy", srcc[d / 4]); break; - case R300_FPI0_ARGC_SRC0C_ZZZ: + case R300_ALU_ARGC_SRC0C_ZZZ: sprintf(buf, "%s.zzz", srcc[d / 4]); break; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 73efe49fc1..573aacf19a 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -75,23 +75,23 @@ typedef struct r300_fragment_program_swizzle { #define SRC_STRIDE 6 #define NOP_INST0 ( \ - (R300_FPI0_OUTC_MAD) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT)) + (R300_ALU_OUTC_MAD) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG0C_SHIFT) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG1C_SHIFT) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG2C_SHIFT)) #define NOP_INST1 ( \ - ((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT)) + ((0 | SRC_CONST) << R300_ALU_SRC0C_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC1C_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC2C_SHIFT)) #define NOP_INST2 ( \ - (R300_FPI2_OUTA_MAD) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT)) + (R300_ALU_OUTA_MAD) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG0A_SHIFT) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG1A_SHIFT) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG2A_SHIFT)) #define NOP_INST3 ( \ - ((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT)) + ((0 | SRC_CONST) << R300_ALU_SRC0A_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC1A_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC2A_SHIFT)) #define DRI_CONF_FP_OPTIMIZATION_SPEED 0 #define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 @@ -117,42 +117,42 @@ typedef struct r300_fragment_program_swizzle { #define FP_SELC_MASK_XYZ 7 #define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_FPI1_DSTC_SHIFT) | \ + (((destidx) << R300_ALU_DSTC_SHIFT) | \ (FP_SELC_MASK_##regmask << 23) | \ (FP_SELC_MASK_##outmask << 26) | \ - ((src0) << R300_FPI1_SRC0C_SHIFT) | \ - ((src1) << R300_FPI1_SRC1C_SHIFT) | \ - ((src2) << R300_FPI1_SRC2C_SHIFT)) + ((src0) << R300_ALU_SRC0C_SHIFT) | \ + ((src1) << R300_ALU_SRC1C_SHIFT) | \ + ((src2) << R300_ALU_SRC2C_SHIFT)) #define FP_SELA_MASK_NO 0 #define FP_SELA_MASK_W 1 #define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_FPI3_DSTA_SHIFT) | \ + (((destidx) << R300_ALU_DSTA_SHIFT) | \ (FP_SELA_MASK_##regmask << 23) | \ (FP_SELA_MASK_##outmask << 24) | \ - ((src0) << R300_FPI3_SRC0A_SHIFT) | \ - ((src1) << R300_FPI3_SRC1A_SHIFT) | \ - ((src2) << R300_FPI3_SRC2A_SHIFT)) + ((src0) << R300_ALU_SRC0A_SHIFT) | \ + ((src1) << R300_ALU_SRC1A_SHIFT) | \ + ((src2) << R300_ALU_SRC2A_SHIFT)) /* Produce unshifted argument selectors */ -#define FP_ARGC(source) R300_FPI0_ARGC_##source -#define FP_ARGA(source) R300_FPI2_ARGA_##source +#define FP_ARGC(source) R300_ALU_ARGC_##source +#define FP_ARGA(source) R300_ALU_ARGA_##source #define FP_ABS(arg) ((arg) | (1 << 6)) #define FP_NEG(arg) ((arg) ^ (1 << 5)) /* Produce instruction dword */ #define FP_INSTRC(opcode,arg0,arg1,arg2) \ - (R300_FPI0_OUTC_##opcode | \ - ((arg0) << R300_FPI0_ARG0C_SHIFT) | \ - ((arg1) << R300_FPI0_ARG1C_SHIFT) | \ - ((arg2) << R300_FPI0_ARG2C_SHIFT)) + (R300_ALU_OUTC_##opcode | \ + ((arg0) << R300_ALU_ARG0C_SHIFT) | \ + ((arg1) << R300_ALU_ARG1C_SHIFT) | \ + ((arg2) << R300_ALU_ARG2C_SHIFT)) #define FP_INSTRA(opcode,arg0,arg1,arg2) \ - (R300_FPI2_OUTA_##opcode | \ - ((arg0) << R300_FPI2_ARG0A_SHIFT) | \ - ((arg1) << R300_FPI2_ARG1A_SHIFT) | \ - ((arg2) << R300_FPI2_ARG2A_SHIFT)) + (R300_ALU_OUTA_##opcode | \ + ((arg0) << R300_ALU_ARG0A_SHIFT) | \ + ((arg1) << R300_ALU_ARG1A_SHIFT) | \ + ((arg2) << R300_ALU_ARG2A_SHIFT)) #endif diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 1b405889c3..b0225453d3 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -118,7 +118,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) } R300_STATECHANGE(r300, zs); - reg_start(R300_RB3D_ZSTENCIL_CNTL_0, 2); + reg_start(R300_ZB_CNTL, 2); { uint32_t t1, t2; @@ -127,32 +127,32 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) t2 = 0x0; if (flags & CLEARBUFFER_DEPTH) { - t1 |= R300_RB3D_Z_WRITE_ONLY; + t1 |= R300_Z_ENABLE | R300_Z_WRITE_ENABLE; t2 |= - (R300_ZS_ALWAYS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); - } else { - t1 |= R300_RB3D_Z_DISABLED_1; // disable + (R300_ZS_ALWAYS << R300_Z_FUNC_SHIFT); + } else { //XXX + t1 |= R300_STENCIL_FRONT_BACK; // disable } if (flags & CLEARBUFFER_STENCIL) { - t1 |= R300_RB3D_STENCIL_ENABLE; + t1 |= R300_STENCIL_ENABLE; t2 |= (R300_ZS_ALWAYS << - R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | + R300_S_FRONT_FUNC_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) | + R300_S_FRONT_SFAIL_OP_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT) | + R300_S_FRONT_ZPASS_OP_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT) | + R300_S_FRONT_ZFAIL_OP_SHIFT) | (R300_ZS_ALWAYS << - R300_RB3D_ZS1_BACK_FUNC_SHIFT) | + R300_S_BACK_FUNC_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) | + R300_S_BACK_SFAIL_OP_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT) | + R300_S_BACK_ZPASS_OP_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT); + R300_S_BACK_ZFAIL_OP_SHIFT); } e32(t1); @@ -186,10 +186,16 @@ static void r300EmitClearState(GLcontext * ctx) int cmd_written = 0; drm_radeon_cmd_header_t *cmd = NULL; int has_tcl = 1; + int is_r500 = 0; + GLuint vap_cntl; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + + /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are * quite complex; see the functions in r300_emit.c. @@ -199,25 +205,38 @@ static void r300EmitClearState(GLcontext * ctx) * these registers, as well as the actual values used for rendering. */ R300_STATECHANGE(r300, vir[0]); - reg_start(R300_VAP_INPUT_ROUTE_0_0, 0); + reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0); if (!has_tcl) - e32(0x22030003); + e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); else - e32(0x21030003); + e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); /* disable fog */ R300_STATECHANGE(r300, fogs); - reg_start(FG_FOG_BLEND, 0); + reg_start(R300_FG_FOG_BLEND, 0); e32(0x0); R300_STATECHANGE(r300, vir[1]); - reg_start(R300_VAP_INPUT_ROUTE_1_0, 0); - e32(0xF688F688); + reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0); + e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) + << R300_SWIZZLE0_SHIFT) | + (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) + << R300_SWIZZLE1_SHIFT))); /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ R300_STATECHANGE(r300, vic); - reg_start(R300_VAP_INPUT_CNTL_0, 1); - e32(R300_INPUT_CNTL_0_COLOR); + reg_start(R300_VAP_VTX_STATE_CNTL, 1); + e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); R300_STATECHANGE(r300, vte); @@ -229,7 +248,7 @@ static void r300EmitClearState(GLcontext * ctx) R300_VPORT_Z_OFFSET_ENA); e32(0x8); - reg_start(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO); + reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0); e32(0xaaaaaaaa); R300_STATECHANGE(r300, vof); @@ -252,7 +271,7 @@ static void r300EmitClearState(GLcontext * ctx) efloat(0.0); R300_STATECHANGE(r300, at); - reg_start(FG_ALPHA_FUNC, 0); + reg_start(R300_FG_ALPHA_FUNC, 0); e32(0x0); R300_STATECHANGE(r300, bld); @@ -263,7 +282,7 @@ static void r300EmitClearState(GLcontext * ctx) if (has_tcl) { R300_STATECHANGE(r300, vap_clip_cntl); reg_start(R300_VAP_CLIP_CNTL, 0); - e32(R300_221C_CLEAR); + e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); } R300_STATECHANGE(r300, ps); @@ -271,59 +290,169 @@ static void r300EmitClearState(GLcontext * ctx) e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); - R300_STATECHANGE(r300, ri); - reg_start(R300_RS_IP_0, 8); - for (i = 0; i < 8; ++i) { - e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); + if (!is_r500) { + R300_STATECHANGE(r300, ri); + reg_start(R300_RS_IP_0, 7); + for (i = 0; i < 8; ++i) { + e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); + } + + R300_STATECHANGE(r300, rc); + /* The second constant is needed to get glxgears display anything .. */ + reg_start(R300_RS_COUNT, 1); + e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + e32(0x0); + + R300_STATECHANGE(r300, rr); + reg_start(R300_RS_INST_0, 0); + e32(R300_RS_INST_COL_CN_WRITE); + } else { + + R300_STATECHANGE(r300, ri); + reg_start(R500_RS_IP_0, 7); + for (i = 0; i < 8; ++i) { + e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + } + + R300_STATECHANGE(r300, rc); + /* The second constant is needed to get glxgears display anything .. */ + reg_start(R300_RS_COUNT, 1); + e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + e32(0x0); + + R300_STATECHANGE(r300, rr); + reg_start(R500_RS_INST_0, 0); + e32(R500_RS_INST_COL_CN_WRITE); + } - R300_STATECHANGE(r300, rc); - /* The second constant is needed to get glxgears display anything .. */ - reg_start(R300_RS_COUNT, 1); - e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - e32(0x0); + if (!is_r500) { + R300_STATECHANGE(r300, fp); + reg_start(R300_US_CONFIG, 2); + e32(0x0); + e32(0x0); + e32(0x0); + reg_start(R300_US_CODE_ADDR_0, 3); + e32(0x0); + e32(0x0); + e32(0x0); + e32(R300_RGBA_OUT); - R300_STATECHANGE(r300, rr); - reg_start(R300_RS_INST_0, 0); - e32(R300_RS_INST_COL_CN_WRITE); + R300_STATECHANGE(r300, fpi[0]); + R300_STATECHANGE(r300, fpi[1]); + R300_STATECHANGE(r300, fpi[2]); + R300_STATECHANGE(r300, fpi[3]); - R300_STATECHANGE(r300, fp); - reg_start(R300_PFS_CNTL_0, 2); - e32(0x0); - e32(0x0); - e32(0x0); - reg_start(R300_PFS_NODE_0, 3); - e32(0x0); - e32(0x0); - e32(0x0); - e32(R300_PFS_NODE_OUTPUT_COLOR); + reg_start(R300_US_ALU_RGB_INST_0, 0); + e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); - R300_STATECHANGE(r300, fpi[0]); - R300_STATECHANGE(r300, fpi[1]); - R300_STATECHANGE(r300, fpi[2]); - R300_STATECHANGE(r300, fpi[3]); + reg_start(R300_US_ALU_RGB_ADDR_0, 0); + e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); - reg_start(R300_PFS_INSTR0_0, 0); - e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); + reg_start(R300_US_ALU_ALPHA_INST_0, 0); + e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); - reg_start(R300_PFS_INSTR1_0, 0); - e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); + reg_start(R300_US_ALU_ALPHA_ADDR_0, 0); + e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + } else { + R300_STATECHANGE(r300, fp); + reg_start(R500_US_CONFIG, 1); + e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); + e32(0x0); + reg_start(R500_US_CODE_ADDR, 2); + e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); + e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); + e32(R500_US_CODE_OFFSET_ADDR(0)); + + R300_STATECHANGE(r300, r500fp); + r500fp_start_fragment(0, 6); + + e32(R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP); + + e32(R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST); + + e32(R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST); + + e32(R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_B); + + e32(R500_ALPHA_OP_CMP | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_A); + + e32(R500_ALU_RGBA_OP_CMP | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0); + } - reg_start(R300_PFS_INSTR2_0, 0); - e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); + reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0); + e32(0x00000000); + if (has_tcl) { + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT)); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vap_cntl |= R500_TCL_STATE_OPTIMIZATION; + } else + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560)) + vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) + vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else + vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); - reg_start(R300_PFS_INSTR3_0, 0); - e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + R300_STATECHANGE(rmesa, vap_cntl); + reg_start(R300_VAP_CNTL, 0); + e32(vap_cntl); if (has_tcl) { R300_STATECHANGE(r300, pvs); - reg_start(R300_VAP_PVS_CNTL_1, 2); - - e32((0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | - (0 << R300_PVS_CNTL_1_POS_END_SHIFT) | - (1 << R300_PVS_CNTL_1_PROGRAM_END_SHIFT)); - e32(0x0); - e32(1 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT); + reg_start(R300_VAP_PVS_CODE_CNTL_0, 2); + + e32((0 << R300_PVS_FIRST_INST_SHIFT) | + (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | + (1 << R300_PVS_LAST_INST_SHIFT)); + e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); + e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); R300_STATECHANGE(r300, vpi); vsf_start_fragment(0x0, 8); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 2200cec6ab..21e1dc29de 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -67,9 +67,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* * Vertex Array Processing (VAP) Control - * Stolen from r200 code from Christoph Brill (It's a guess!) */ #define R300_VAP_CNTL 0x2080 +# define R300_PVS_NUM_SLOTS_SHIFT 0 +# define R300_PVS_NUM_CNTLRS_SHIFT 4 +# define R300_PVS_NUM_FPUS_SHIFT 8 +# define R300_VF_MAX_VTX_NUM_SHIFT 18 +# define R300_GL_CLIP_SPACE_DEF (0 << 22) +# define R300_DX_CLIP_SPACE_DEF (1 << 22) +# define R500_TCL_STATE_OPTIMIZATION (1 << 23) /* This register is written directly and also starts data section * in many 3d CP_PACKET3's @@ -106,6 +112,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* number of vertices */ # define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 +#define R500_VAP_INDEX_OFFSET 0x208c + #define R300_VAP_OUTPUT_VTX_FMT_0 0x2090 # define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0) # define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1) @@ -125,24 +133,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 -# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT (1<<0) -# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT (1<<1) -# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS (1<<2) -# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS (1<<3) -# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS (1<<4) +# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1 +# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2 +# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 #define R300_SE_VTE_CNTL 0x20b0 -# define R300_VPORT_X_SCALE_ENA 0x00000001 -# define R300_VPORT_X_OFFSET_ENA 0x00000002 -# define R300_VPORT_Y_SCALE_ENA 0x00000004 -# define R300_VPORT_Y_OFFSET_ENA 0x00000008 -# define R300_VPORT_Z_SCALE_ENA 0x00000010 -# define R300_VPORT_Z_OFFSET_ENA 0x00000020 -# define R300_VTX_XY_FMT 0x00000100 -# define R300_VTX_Z_FMT 0x00000200 -# define R300_VTX_W0_FMT 0x00000400 -# define R300_VTX_W0_NORMALIZE 0x00000800 -# define R300_VTX_ST_DENORMALIZED 0x00001000 +# define R300_VPORT_X_SCALE_ENA (1 << 0) +# define R300_VPORT_X_OFFSET_ENA (1 << 1) +# define R300_VPORT_Y_SCALE_ENA (1 << 2) +# define R300_VPORT_Y_OFFSET_ENA (1 << 3) +# define R300_VPORT_Z_SCALE_ENA (1 << 4) +# define R300_VPORT_Z_OFFSET_ENA (1 << 5) +# define R300_VTX_XY_FMT (1 << 8) +# define R300_VTX_Z_FMT (1 << 9) +# define R300_VTX_W0_FMT (1 << 10) +# define R300_SERIAL_PROC_ENA (1 << 11) /* BEGIN: Vertex data assembly - lots of uncertainties */ @@ -211,27 +218,31 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Always set COMPONENTS_4 in immediate mode. */ -#define R300_VAP_INPUT_ROUTE_0_0 0x2150 -# define R300_INPUT_ROUTE_COMPONENTS_1 (0 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_2 (1 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_3 (2 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_4 (3 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_RGBA (4 << 0) /* GUESS */ -# define R300_VAP_INPUT_ROUTE_IDX_SHIFT 8 -# define R300_VAP_INPUT_ROUTE_IDX_MASK (31 << 8) /* GUESS */ -# define R300_VAP_INPUT_ROUTE_END (1 << 13) -# define R300_INPUT_ROUTE_IMMEDIATE_MODE (0 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_FLOAT (1 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_UNSIGNED_BYTE (2 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_FLOAT_COLOR (3 << 14) /* GUESS */ -#define R300_VAP_INPUT_ROUTE_0_1 0x2154 -#define R300_VAP_INPUT_ROUTE_0_2 0x2158 -#define R300_VAP_INPUT_ROUTE_0_3 0x215C -#define R300_VAP_INPUT_ROUTE_0_4 0x2160 -#define R300_VAP_INPUT_ROUTE_0_5 0x2164 -#define R300_VAP_INPUT_ROUTE_0_6 0x2168 -#define R300_VAP_INPUT_ROUTE_0_7 0x216C - +#define R300_VAP_PROG_STREAM_CNTL_0 0x2150 +# define R300_DATA_TYPE_0_SHIFT 0 +# define R300_DATA_TYPE_FLOAT_1 0 +# define R300_DATA_TYPE_FLOAT_2 1 +# define R300_DATA_TYPE_FLOAT_3 2 +# define R300_DATA_TYPE_FLOAT_4 3 +# define R300_DATA_TYPE_BYTE 4 +# define R300_DATA_TYPE_D3DCOLOR 5 +# define R300_DATA_TYPE_SHORT_2 6 +# define R300_DATA_TYPE_SHORT_4 7 +# define R300_DATA_TYPE_VECTOR_3_TTT 8 +# define R300_DATA_TYPE_VECTOR_3_EET 9 +# define R300_SKIP_DWORDS_SHIFT 4 +# define R300_DST_VEC_LOC_SHIFT 8 +# define R300_LAST_VEC (1 << 13) +# define R300_SIGNED (1 << 14) +# define R300_NORMALIZE (1 << 15) +# define R300_DATA_TYPE_1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_1 0x2154 +#define R300_VAP_PROG_STREAM_CNTL_2 0x2158 +#define R300_VAP_PROG_STREAM_CNTL_3 0x215C +#define R300_VAP_PROG_STREAM_CNTL_4 0x2160 +#define R300_VAP_PROG_STREAM_CNTL_5 0x2164 +#define R300_VAP_PROG_STREAM_CNTL_6 0x2168 +#define R300_VAP_PROG_STREAM_CNTL_7 0x216C /* gap */ /* Notes: @@ -239,9 +250,26 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * if vertex program uses only position, fglrx will set normal, too * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. */ -#define R300_VAP_INPUT_CNTL_0 0x2180 -# define R300_INPUT_CNTL_0_COLOR 0x00000001 -#define R300_VAP_INPUT_CNTL_1 0x2184 +#define R300_VAP_VTX_STATE_CNTL 0x2180 +# define R300_COLOR_0_ASSEMBLY_SHIFT 0 +# define R300_SEL_COLOR 0 +# define R300_SEL_USER_COLOR_0 1 +# define R300_SEL_USER_COLOR_1 2 +# define R300_COLOR_1_ASSEMBLY_SHIFT 2 +# define R300_COLOR_2_ASSEMBLY_SHIFT 4 +# define R300_COLOR_3_ASSEMBLY_SHIFT 6 +# define R300_COLOR_4_ASSEMBLY_SHIFT 8 +# define R300_COLOR_5_ASSEMBLY_SHIFT 10 +# define R300_COLOR_6_ASSEMBLY_SHIFT 12 +# define R300_COLOR_7_ASSEMBLY_SHIFT 14 +# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16) + +/* + * Each bit in this field applies to the corresponding vector in the VSM + * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit + * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream. + */ +#define R300_VAP_VSM_VTX_ASSM 0x2184 # define R300_INPUT_CNTL_POS 0x00000001 # define R300_INPUT_CNTL_NORMAL 0x00000002 # define R300_INPUT_CNTL_COLOR 0x00000004 @@ -269,26 +297,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * mode, the swizzling pattern is e.g. used to set zw components in texture * coordinates with only tweo components. */ -#define R300_VAP_INPUT_ROUTE_1_0 0x21E0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0 +# define R300_SWIZZLE0_SHIFT 0 +# define R300_SWIZZLE_SELECT_X_SHIFT 0 +# define R300_SWIZZLE_SELECT_Y_SHIFT 3 +# define R300_SWIZZLE_SELECT_Z_SHIFT 6 +# define R300_SWIZZLE_SELECT_W_SHIFT 9 + +# define R300_SWIZZLE_SELECT_X 0 +# define R300_SWIZZLE_SELECT_Y 1 +# define R300_SWIZZLE_SELECT_Z 2 +# define R300_SWIZZLE_SELECT_W 3 +# define R300_SWIZZLE_SELECT_FP_ZERO 4 +# define R300_SWIZZLE_SELECT_FP_ONE 5 +/* alternate forms for r300_emit.c */ # define R300_INPUT_ROUTE_SELECT_X 0 # define R300_INPUT_ROUTE_SELECT_Y 1 # define R300_INPUT_ROUTE_SELECT_Z 2 # define R300_INPUT_ROUTE_SELECT_W 3 # define R300_INPUT_ROUTE_SELECT_ZERO 4 # define R300_INPUT_ROUTE_SELECT_ONE 5 -# define R300_INPUT_ROUTE_SELECT_MASK 7 -# define R300_INPUT_ROUTE_X_SHIFT 0 -# define R300_INPUT_ROUTE_Y_SHIFT 3 -# define R300_INPUT_ROUTE_Z_SHIFT 6 -# define R300_INPUT_ROUTE_W_SHIFT 9 -# define R300_INPUT_ROUTE_ENABLE (15 << 12) -#define R300_VAP_INPUT_ROUTE_1_1 0x21E4 -#define R300_VAP_INPUT_ROUTE_1_2 0x21E8 -#define R300_VAP_INPUT_ROUTE_1_3 0x21EC -#define R300_VAP_INPUT_ROUTE_1_4 0x21F0 -#define R300_VAP_INPUT_ROUTE_1_5 0x21F4 -#define R300_VAP_INPUT_ROUTE_1_6 0x21F8 -#define R300_VAP_INPUT_ROUTE_1_7 0x21FC + +# define R300_WRITE_ENA_SHIFT 12 +# define R300_WRITE_ENA_X 1 +# define R300_WRITE_ENA_Y 2 +# define R300_WRITE_ENA_Z 4 +# define R300_WRITE_ENA_W 8 +# define R300_SWIZZLE1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec +#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc /* END: Vertex data assembly */ @@ -320,25 +362,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Multiple vertex programs and parameter sets can be loaded at once, * which could explain the size discrepancy. */ -#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 -# define R300_PVS_UPLOAD_PROGRAM 0x00000000 -/* gap */ -# define R300_PVS_UPLOAD_PARAMETERS 0x00000200 -/* gap */ -# define R300_PVS_UPLOAD_CLIP_PLANE0 0x00000400 -# define R300_PVS_UPLOAD_CLIP_PLANE1 0x00000401 -# define R300_PVS_UPLOAD_CLIP_PLANE2 0x00000402 -# define R300_PVS_UPLOAD_CLIP_PLANE3 0x00000403 -# define R300_PVS_UPLOAD_CLIP_PLANE4 0x00000404 -# define R300_PVS_UPLOAD_CLIP_PLANE5 0x00000405 -# define R300_PVS_UPLOAD_POINTSIZE 0x00000406 - -# define R500_PVS_UPLOAD_CLIP_PLANE0 0x00000600 -# define R500_PVS_UPLOAD_CLIP_PLANE1 0x00000601 -# define R500_PVS_UPLOAD_CLIP_PLANE2 0x00000602 -# define R500_PVS_UPLOAD_CLIP_PLANE3 0x00000603 -# define R500_PVS_UPLOAD_CLIP_PLANE4 0x00000604 -# define R500_PVS_UPLOAD_CLIP_PLANE5 0x00000605 +#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200 +# define R300_PVS_CODE_START 0 +# define R300_MAX_PVS_CODE_LINES 256 +# define R500_MAX_PVS_CODE_LINES 1024 +# define R300_PVS_CONST_START 512 +# define R500_PVS_CONST_START 1024 +# define R300_MAX_PVS_CONST_VECS 256 +# define R500_MAX_PVS_CONST_VECS 1024 +# define R300_PVS_UCP_START 1024 +# define R500_PVS_UCP_START 1536 +# define R300_POINT_VPORT_SCALE_OFFSET 1030 +# define R500_POINT_VPORT_SCALE_OFFSET 1542 +# define R300_POINT_GEN_TEX_OFFSET 1031 +# define R500_POINT_GEN_TEX_OFFSET 1543 /* * These are obsolete defines form r300_context.h, but they might give some @@ -373,9 +410,21 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view */ #define R300_VAP_CLIP_CNTL 0x221C -# define R300_221C_NORMAL 0x00000000 -# define R300_221C_CLEAR 0x0001C000 -#define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_1 (1 << 1) +# define R300_VAP_UCP_ENABLE_2 (1 << 2) +# define R300_VAP_UCP_ENABLE_3 (1 << 3) +# define R300_VAP_UCP_ENABLE_4 (1 << 4) +# define R300_VAP_UCP_ENABLE_5 (1 << 5) +# define R300_PS_UCP_MODE_DIST_COP (0 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14) +# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14) +# define R300_CLIP_DISABLE (1 << 16) +# define R300_UCP_CULL_ONLY_ENABLE (1 << 17) +# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18) +# define R500_COLOR2_IS_TEXTURE (1 << 20) +# define R500_COLOR3_IS_TEXTURE (1 << 21) /* These seem to be per-pixel and per-vertex X and Y clipping planes. The first * plane is per-pixel and the second plane is per-vertex. @@ -384,10 +433,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest. */ -#define R300_VAP_CLIP_X_0 0x2220 -#define R300_VAP_CLIP_X_1 0x2224 -#define R300_VAP_CLIP_Y_0 0x2228 -#define R300_VAP_CLIP_Y_1 0x222c +#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220 +#define R300_VAP_GB_VERT_DISC_ADJ 0x2224 +#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 +#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c /* gap */ @@ -396,7 +445,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and * avoids bugs caused by still running shaders reading bad data from memory. */ -#define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */ +#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284 /* This register is used to define the number of core clocks to wait for a * vertex to be received by the VAP input controller (while the primitive @@ -426,17 +475,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * is sometimes accepted other instruction that have no relationship with * position calculations. */ -#define R300_VAP_PVS_CNTL_1 0x22D0 -# define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0 -# define R300_PVS_CNTL_1_POS_END_SHIFT 10 -# define R300_PVS_CNTL_1_PROGRAM_END_SHIFT 20 +#define R300_VAP_PVS_CODE_CNTL_0 0x22D0 +# define R300_PVS_FIRST_INST_SHIFT 0 +# define R300_PVS_XYZW_VALID_INST_SHIFT 10 +# define R300_PVS_LAST_INST_SHIFT 20 /* Addresses are relative the the vertex program parameters area. */ -#define R300_VAP_PVS_CNTL_2 0x22D4 -# define R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 -# define R300_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 -#define R300_VAP_PVS_CNTL_3 0x22D8 -# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT 10 -# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0 +#define R300_VAP_PVS_CONST_CNTL 0x22D4 +# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 +# define R300_PVS_MAX_CONST_ADDR_SHIFT 16 +#define R300_VAP_PVS_CODE_CNTL_1 0x22D8 +# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 +#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices @@ -657,7 +706,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* This table specifies the source location and format for up to 16 texture * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) */ -#define R500_RS_IP_0 0x4074
+#define R500_RS_IP_0 0x4074 #define R500_RS_IP_1 0x4078 #define R500_RS_IP_2 0x407C #define R500_RS_IP_3 0x4080 @@ -673,31 +722,24 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_13 0x40A8 #define R500_RS_IP_14 0x40AC #define R500_RS_IP_15 0x40B0 -#define R500_RS_IP_TEX_PTR_S_SHIFT 0
-#define R500_RS_IP_TEX_PTR_T_SHIFT 6
-#define R500_RS_IP_TEX_PTR_R_SHIFT 12
-#define R500_RS_IP_TEX_PTR_Q_SHIFT 18
-#define R500_RS_IP_COL_PTR_SHIFT 24
-#define R500_RS_IP_COL_FMT_SHIFT 27
-#define R500_RS_IP_COL_FMT_RGBA (0 << 27) -#define R500_RS_IP_COL_FMT_RGB0 (1 << 27) -#define R500_RS_IP_COL_FMT_RGB1 (2 << 27) -/* gap */ -#define R500_RS_IP_COL_FMT_000A (4 << 27) -#define R500_RS_IP_COL_FMT_0000 (5 << 27) -#define R500_RS_IP_COL_FMT_0001 (6 << 27) -/* gap */ -#define R500_RS_IP_COL_FMT_111A (8 << 27) -#define R500_RS_IP_COL_FMT_1110 (9 << 27) -#define R500_RS_IP_COL_FMT_1111 (10 << 27) +#define R500_RS_IP_PTR_K0 62 +#define R500_RS_IP_PTR_K1 63 +#define R500_RS_IP_TEX_PTR_S_SHIFT 0 +#define R500_RS_IP_TEX_PTR_T_SHIFT 6 +#define R500_RS_IP_TEX_PTR_R_SHIFT 12 +#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 +#define R500_RS_IP_COL_PTR_SHIFT 24 +#define R500_RS_IP_COL_FMT_SHIFT 27 +# define R500_RS_COL_PTR(x) (x << 24) +# define R500_RS_COL_FMT(x) (x << 27) /* gap */ #define R500_RS_IP_OFFSET_DIS (0 << 31) -#define R500_RS_IP_OFFSET_EN (1 << 31)
+#define R500_RS_IP_OFFSET_EN (1 << 31) /* gap */ /* Zero to flush caches. */ -#define R300_TX_CNTL 0x4100 +#define R300_TX_INVALTAGS 0x4100 #define R300_TX_FLUSH 0x0 /* The upper enable bits are guessed, based on fglrx reported limits. */ @@ -728,25 +770,25 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_TX_DIRECTION_VERITCAL (1<<27) /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ -#define GA_POINT_S0 0x4200 +#define R300_GA_POINT_S0 0x4200 /* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ -#define GA_POINT_T0 0x4204 +#define R300_GA_POINT_T0 0x4204 /* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ -#define GA_POINT_S1 0x4208 +#define R300_GA_POINT_S1 0x4208 /* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ -#define GA_POINT_T1 0x420c +#define R300_GA_POINT_T1 0x420c /* Specifies amount to shift integer position of vertex (screen space) before * converting to float for triangle stipple. */ -#define GA_TRIANGLE_STIPPLE 0x4214 -# define GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0 -# define GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f -# define GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16 -# define GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000 +#define R300_GA_TRIANGLE_STIPPLE 0x4214 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16 +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000 /* The pointsize is given in multiples of 6. The pointsize can be enormous: * Clear() renders a single point that fills the entire framebuffer. @@ -761,16 +803,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) /* Blue fill color */ -#define GA_FILL_R 0x4220 +#define R500_GA_FILL_R 0x4220 /* Blue fill color */ -#define GA_FILL_G 0x4224 +#define R500_GA_FILL_G 0x4224 /* Blue fill color */ -#define GA_FILL_B 0x4228 +#define R500_GA_FILL_B 0x4228 /* Alpha fill color */ -#define GA_FILL_A 0x422c +#define R500_GA_FILL_A 0x422c /* Specifies maximum and minimum point & sprite sizes for per vertex size @@ -791,159 +833,159 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * VE: vertical or horizontal * HO & VE: no classification */ -#define GA_LINE_CNTL 0x4234 -# define GA_LINE_CNTL_WIDTH_SHIFT 0 -# define GA_LINE_CNTL_WIDTH_MASK 0x0000ffff -# define GA_LINE_CNTL_END_TYPE_HOR (0 << 16) -# define GA_LINE_CNTL_END_TYPE_VER (1 << 16) -# define GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */ -# define GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */ -# define GA_LINE_CNTL_SORT_NO (0 << 18) -# define GA_LINE_CNTL_SORT_MINX_MINY (1 << 18) +#define R300_GA_LINE_CNTL 0x4234 +# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0 +# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff +# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */ +# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */ +# define R500_GA_LINE_CNTL_SORT_NO (0 << 18) +# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18) /** TODO: looks wrong */ -# define R300_LINESIZE_MAX (GA_LINE_CNTL_WIDTH_MASK / 6) +# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6) /** TODO: looks wrong */ # define R300_LINE_CNT_HO (1 << 16) /** TODO: looks wrong */ # define R300_LINE_CNT_VE (1 << 17) /* Line Stipple configuration information. */ -#define GA_LINE_STIPPLE_CONFIG 0x4238 -# define GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0) -# define GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0) -# define GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0) -# define GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2 -# define GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc +#define R300_GA_LINE_STIPPLE_CONFIG 0x4238 +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2 +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc /* Used to load US instructions and constants */ #define R500_GA_US_VECTOR_INDEX 0x4250 -# define GA_US_VECTOR_INDEX_SHIFT 0 -# define GA_US_VECTOR_INDEX_MASK 0x000000ff -# define GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16) -# define GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16) -# define GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17) -# define GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17) +# define R500_GA_US_VECTOR_INDEX_SHIFT 0 +# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff +# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16) +# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16) +# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17) +# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17) /* Data register for loading US instructions and constants */ #define R500_GA_US_VECTOR_DATA 0x4254 /* Specifies color properties and mappings of textures. */ -#define GA_COLOR_CONTROL_PS3 0x4258 -# define TEX0_SHADING_PS3_SOLID (0 << 0) -# define TEX0_SHADING_PS3_FLAT (1 << 0) -# define TEX0_SHADING_PS3_GOURAUD (2 << 0) -# define TEX1_SHADING_PS3_SOLID (0 << 2) -# define TEX1_SHADING_PS3_FLAT (1 << 2) -# define TEX1_SHADING_PS3_GOURAUD (2 << 2) -# define TEX2_SHADING_PS3_SOLID (0 << 4) -# define TEX2_SHADING_PS3_FLAT (1 << 4) -# define TEX2_SHADING_PS3_GOURAUD (2 << 4) -# define TEX3_SHADING_PS3_SOLID (0 << 6) -# define TEX3_SHADING_PS3_FLAT (1 << 6) -# define TEX3_SHADING_PS3_GOURAUD (2 << 6) -# define TEX4_SHADING_PS3_SOLID (0 << 8) -# define TEX4_SHADING_PS3_FLAT (1 << 8) -# define TEX4_SHADING_PS3_GOURAUD (2 << 8) -# define TEX5_SHADING_PS3_SOLID (0 << 10) -# define TEX5_SHADING_PS3_FLAT (1 << 10) -# define TEX5_SHADING_PS3_GOURAUD (2 << 10) -# define TEX6_SHADING_PS3_SOLID (0 << 12) -# define TEX6_SHADING_PS3_FLAT (1 << 12) -# define TEX6_SHADING_PS3_GOURAUD (2 << 12) -# define TEX7_SHADING_PS3_SOLID (0 << 14) -# define TEX7_SHADING_PS3_FLAT (1 << 14) -# define TEX7_SHADING_PS3_GOURAUD (2 << 14) -# define TEX8_SHADING_PS3_SOLID (0 << 16) -# define TEX8_SHADING_PS3_FLAT (1 << 16) -# define TEX8_SHADING_PS3_GOURAUD (2 << 16) -# define TEX9_SHADING_PS3_SOLID (0 << 18) -# define TEX9_SHADING_PS3_FLAT (1 << 18) -# define TEX9_SHADING_PS3_GOURAUD (2 << 18) -# define TEX10_SHADING_PS3_SOLID (0 << 20) -# define TEX10_SHADING_PS3_FLAT (1 << 20) -# define TEX10_SHADING_PS3_GOURAUD (2 << 20) -# define COLOR0_TEX_OVERRIDE_NO (0 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22) -# define COLOR1_TEX_OVERRIDE_NO (0 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26) +#define R500_GA_COLOR_CONTROL_PS3 0x4258 +# define R500_TEX0_SHADING_PS3_SOLID (0 << 0) +# define R500_TEX0_SHADING_PS3_FLAT (1 << 0) +# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0) +# define R500_TEX1_SHADING_PS3_SOLID (0 << 2) +# define R500_TEX1_SHADING_PS3_FLAT (1 << 2) +# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2) +# define R500_TEX2_SHADING_PS3_SOLID (0 << 4) +# define R500_TEX2_SHADING_PS3_FLAT (1 << 4) +# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4) +# define R500_TEX3_SHADING_PS3_SOLID (0 << 6) +# define R500_TEX3_SHADING_PS3_FLAT (1 << 6) +# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6) +# define R500_TEX4_SHADING_PS3_SOLID (0 << 8) +# define R500_TEX4_SHADING_PS3_FLAT (1 << 8) +# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8) +# define R500_TEX5_SHADING_PS3_SOLID (0 << 10) +# define R500_TEX5_SHADING_PS3_FLAT (1 << 10) +# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10) +# define R500_TEX6_SHADING_PS3_SOLID (0 << 12) +# define R500_TEX6_SHADING_PS3_FLAT (1 << 12) +# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12) +# define R500_TEX7_SHADING_PS3_SOLID (0 << 14) +# define R500_TEX7_SHADING_PS3_FLAT (1 << 14) +# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14) +# define R500_TEX8_SHADING_PS3_SOLID (0 << 16) +# define R500_TEX8_SHADING_PS3_FLAT (1 << 16) +# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16) +# define R500_TEX9_SHADING_PS3_SOLID (0 << 18) +# define R500_TEX9_SHADING_PS3_FLAT (1 << 18) +# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18) +# define R500_TEX10_SHADING_PS3_SOLID (0 << 20) +# define R500_TEX10_SHADING_PS3_FLAT (1 << 20) +# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20) +# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22) +# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26) /* Returns idle status of various G3D block, captured when GA_IDLE written or * when hard or soft reset asserted. */ -#define GA_IDLE 0x425c -# define GA_IDLE_PIPE3_Z_IDLE (0 << 0) -# define GA_IDLE_PIPE2_Z_IDLE (0 << 1) -# define GA_IDLE_PIPE3_CD_IDLE (0 << 2) -# define GA_IDLE_PIPE2_CD_IDLE (0 << 3) -# define GA_IDLE_PIPE3_FG_IDLE (0 << 4) -# define GA_IDLE_PIPE2_FG_IDLE (0 << 5) -# define GA_IDLE_PIPE3_US_IDLE (0 << 6) -# define GA_IDLE_PIPE2_US_IDLE (0 << 7) -# define GA_IDLE_PIPE3_SC_IDLE (0 << 8) -# define GA_IDLE_PIPE2_SC_IDLE (0 << 9) -# define GA_IDLE_PIPE3_RS_IDLE (0 << 10) -# define GA_IDLE_PIPE2_RS_IDLE (0 << 11) -# define GA_IDLE_PIPE1_Z_IDLE (0 << 12) -# define GA_IDLE_PIPE0_Z_IDLE (0 << 13) -# define GA_IDLE_PIPE1_CD_IDLE (0 << 14) -# define GA_IDLE_PIPE0_CD_IDLE (0 << 15) -# define GA_IDLE_PIPE1_FG_IDLE (0 << 16) -# define GA_IDLE_PIPE0_FG_IDLE (0 << 17) -# define GA_IDLE_PIPE1_US_IDLE (0 << 18) -# define GA_IDLE_PIPE0_US_IDLE (0 << 19) -# define GA_IDLE_PIPE1_SC_IDLE (0 << 20) -# define GA_IDLE_PIPE0_SC_IDLE (0 << 21) -# define GA_IDLE_PIPE1_RS_IDLE (0 << 22) -# define GA_IDLE_PIPE0_RS_IDLE (0 << 23) -# define GA_IDLE_SU_IDLE (0 << 24) -# define GA_IDLE_GA_IDLE (0 << 25) -# define GA_IDLE_GA_UNIT2_IDLE (0 << 26) +#define R500_GA_IDLE 0x425c +# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0) +# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1) +# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2) +# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3) +# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4) +# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5) +# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6) +# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7) +# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8) +# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9) +# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10) +# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11) +# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12) +# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13) +# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14) +# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15) +# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16) +# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17) +# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18) +# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19) +# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20) +# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21) +# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22) +# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23) +# define R500_GA_IDLE_SU_IDLE (0 << 24) +# define R500_GA_IDLE_GA_IDLE (0 << 25) +# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26) /* Current value of stipple accumulator. */ #define R300_GA_LINE_STIPPLE_VALUE 0x4260 /* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */ -#define GA_LINE_S0 0x4264 +#define R300_GA_LINE_S0 0x4264 /* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */ -#define GA_LINE_S1 0x4268 +#define R300_GA_LINE_S1 0x4268 /* GA Input fifo high water marks */ -#define GA_FIFO_CNTL 0x4270 -# define GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007 -# define GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0 -# define GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038 -# define GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3 -# define GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0 -# define GA_FIFO_CNTL_VERTEX_REG_SHIFT 6 - -/* Something shade related */ -#define GA_ENHANCE 0x4274 -# define GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0) -# define GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */ -# define GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1) -# define GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */ -# define GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */ -# define GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */ -# define GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3) -# define GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */ +#define R500_GA_FIFO_CNTL 0x4270 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3 +# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0 +# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6 + +/* GA enhance/tweaks */ +#define R300_GA_ENHANCE 0x4274 +# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0) +# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */ +# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1) +# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */ +# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */ +# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */ +# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3) +# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */ #define R300_GA_COLOR_CONTROL 0x4278 # define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0) @@ -1006,46 +1048,48 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Polygon Mode * Dangerous */ -#define GA_POLY_MODE 0x4288 -# define GA_POLY_MODE_DISABLE (0 << 0) -# define GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */ +#define R300_GA_POLY_MODE 0x4288 +# define R300_GA_POLY_MODE_DISABLE (0 << 0) +# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */ /* reserved */ -# define GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4) -# define GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4) -# define GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4) /* reserved */ -# define GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7) -# define GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7) -# define GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7) /* reserved */ /* Specifies the rouding mode for geometry & color SPFP to FP conversions. */ -#define GA_ROUND_MODE 0x428c -# define GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0) -# define GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0) -# define GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2) -# define GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2) -# define GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4) -# define GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4) -# define GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5) -# define GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5) -# define GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6 -# define GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0 +#define R300_GA_ROUND_MODE 0x428c +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0) +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0) +# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2) +# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2) +# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4) +# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5) +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6 +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0 /* Specifies x & y offsets for vertex data after conversion to FP. * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b * subprecision). */ -#define GA_OFFSET 0x4290 -# define GA_OFFSET_X_OFFSET_SHIFT 0 -# define GA_OFFSET_X_OFFSET_MASK 0x0000ffff -# define GA_OFFSET_Y_OFFSET_SHIFT 16 -# define GA_OFFSET_Y_OFFSET_MASK 0xffff0000 +#define R300_GA_OFFSET 0x4290 +# define R300_GA_OFFSET_X_OFFSET_SHIFT 0 +# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff +# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16 +# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000 /* Specifies the scale to apply to fog. */ -#define R300_RE_FOG_SCALE 0x4294 +#define R300_GA_FOG_SCALE 0x4294 /* Specifies the offset to apply to fog. */ -#define R300_RE_FOG_START 0x4298 +#define R300_GA_FOG_OFFSET 0x4298 +/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */ +#define R300_GA_SOFT_RESET 0x429c /* Not sure why there are duplicate of factor and constant values. * My best guess so far is that there are seperate zbiases for test and write. @@ -1053,11 +1097,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Some of the tests indicate that fgl has a fallback implementation of zbias * via pixel shaders. */ -#define R300_RE_ZBIAS_CNTL 0x42A0 /* GUESS */ -#define R300_RE_ZBIAS_T_FACTOR 0x42A4 -#define R300_RE_ZBIAS_T_CONSTANT 0x42A8 -#define R300_RE_ZBIAS_W_FACTOR 0x42AC -#define R300_RE_ZBIAS_W_CONSTANT 0x42B0 +#define R300_SU_TEX_WRAP 0x42A0 +#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4 +#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8 +#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC +#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0 /* This register needs to be set to (1<<1) for RV350 to correctly * perform depth test (see --vb-triangles in r300_demo) @@ -1068,10 +1112,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * One to enable depth test and one for depth write. * Yet this doesnt explain why depth writes work ... */ -#define R300_RE_OCCLUSION_CNTL 0x42B4 -# define R300_OCCLUSION_ON (1<<1) +#define R300_SU_POLY_OFFSET_ENABLE 0x42B4 +# define R300_FRONT_ENABLE (1 << 0) +# define R300_BACK_ENABLE (1 << 1) +# define R300_PARA_ENABLE (1 << 2) -#define R300_RE_CULL_CNTL 0x42B8 +#define R300_SU_CULL_MODE 0x42B8 # define R300_CULL_FRONT (1 << 0) # define R300_CULL_BACK (1 << 1) # define R300_FRONT_FACE_CCW (0 << 2) @@ -1126,32 +1172,32 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_RS_IP_3 0x431C # define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */ # define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */ -# define R300_RS_TEX_PTR(x) (x << 0)
-# define R300_RS_COL_PTR(x) (x << 6)
-# define R300_RS_COL_FMT(x) (x << 9)
-# define R300_RS_COL_FMT_RGBA 0
-# define R300_RS_COL_FMT_RGB0 2
-# define R300_RS_COL_FMT_RGB1 3
-# define R300_RS_COL_FMT_000A 4
-# define R300_RS_COL_FMT_0000 5
-# define R300_RS_COL_FMT_0001 6
-# define R300_RS_COL_FMT_111A 8
-# define R300_RS_COL_FMT_1110 9
-# define R300_RS_COL_FMT_1111 10
-# define R300_RS_SEL_S(x) (x << 13)
-# define R300_RS_SEL_T(x) (x << 16)
-# define R300_RS_SEL_R(x) (x << 19)
-# define R300_RS_SEL_Q(x) (x << 22)
-# define R300_RS_SEL_C0 0
-# define R300_RS_SEL_C1 1
-# define R300_RS_SEL_C2 2
-# define R300_RS_SEL_C3 3
-# define R300_RS_SEL_K0 4
-# define R300_RS_SEL_K1 5
+# define R300_RS_TEX_PTR(x) (x << 0) +# define R300_RS_COL_PTR(x) (x << 6) +# define R300_RS_COL_FMT(x) (x << 9) +# define R300_RS_COL_FMT_RGBA 0 +# define R300_RS_COL_FMT_RGB0 1 +# define R300_RS_COL_FMT_RGB1 2 +# define R300_RS_COL_FMT_000A 4 +# define R300_RS_COL_FMT_0000 5 +# define R300_RS_COL_FMT_0001 6 +# define R300_RS_COL_FMT_111A 8 +# define R300_RS_COL_FMT_1110 9 +# define R300_RS_COL_FMT_1111 10 +# define R300_RS_SEL_S(x) (x << 13) +# define R300_RS_SEL_T(x) (x << 16) +# define R300_RS_SEL_R(x) (x << 19) +# define R300_RS_SEL_Q(x) (x << 22) +# define R300_RS_SEL_C0 0 +# define R300_RS_SEL_C1 1 +# define R300_RS_SEL_C2 2 +# define R300_RS_SEL_C3 3 +# define R300_RS_SEL_K0 4 +# define R300_RS_SEL_K1 5 /* */ -#define R500_RS_INST_0 0x4320
+#define R500_RS_INST_0 0x4320 #define R500_RS_INST_1 0x4324 #define R500_RS_INST_2 0x4328 #define R500_RS_INST_3 0x432c @@ -1167,17 +1213,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_INST_13 0x4354 #define R500_RS_INST_14 0x4358 #define R500_RS_INST_15 0x435c -#define R500_RS_INST_TEX_ID_SHIFT 0
-#define R500_RS_INST_TEX_CN_WRITE (1 << 4)
-#define R500_RS_INST_TEX_ADDR_SHIFT 5
-#define R500_RS_INST_COL_ID_SHIFT 12
-#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16)
-#define R500_RS_INST_COL_CN_WRITE (1 << 16)
-#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16)
-#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16)
-#define R500_RS_INST_COL_ADDR_SHIFT 18
-#define R500_RS_INST_TEX_ADJ (1 << 25)
-#define R500_RS_INST_W_CN (1 << 26)
+#define R500_RS_INST_TEX_ID_SHIFT 0 +#define R500_RS_INST_TEX_CN_WRITE (1 << 4) +#define R500_RS_INST_TEX_ADDR_SHIFT 5 +#define R500_RS_INST_COL_ID_SHIFT 12 +#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) +#define R500_RS_INST_COL_CN_WRITE (1 << 16) +#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16) +#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16) +#define R500_RS_INST_COL_ADDR_SHIFT 18 +#define R500_RS_INST_TEX_ADJ (1 << 25) +#define R500_RS_INST_W_CN (1 << 26) /* These DWORDs control how vertex data is routed into fragment program * registers, after interpolators. @@ -1220,6 +1266,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6) # define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6) +#define R300_SC_EDGERULE 0x43a8 /* BEGIN: Scissors and cliprects */ @@ -1237,21 +1284,21 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * For some reason, the top-left corner of the framebuffer is at (1440, 1440) * for the purpose of clipping and scissors. */ -#define R300_RE_CLIPRECT_TL_0 0x43B0 -#define R300_RE_CLIPRECT_BR_0 0x43B4 -#define R300_RE_CLIPRECT_TL_1 0x43B8 -#define R300_RE_CLIPRECT_BR_1 0x43BC -#define R300_RE_CLIPRECT_TL_2 0x43C0 -#define R300_RE_CLIPRECT_BR_2 0x43C4 -#define R300_RE_CLIPRECT_TL_3 0x43C8 -#define R300_RE_CLIPRECT_BR_3 0x43CC +#define R300_SC_CLIPRECT_TL_0 0x43B0 +#define R300_SC_CLIPRECT_BR_0 0x43B4 +#define R300_SC_CLIPRECT_TL_1 0x43B8 +#define R300_SC_CLIPRECT_BR_1 0x43BC +#define R300_SC_CLIPRECT_TL_2 0x43C0 +#define R300_SC_CLIPRECT_BR_2 0x43C4 +#define R300_SC_CLIPRECT_TL_3 0x43C8 +#define R300_SC_CLIPRECT_BR_3 0x43CC # define R300_CLIPRECT_OFFSET 1440 # define R300_CLIPRECT_MASK 0x1FFF # define R300_CLIPRECT_X_SHIFT 0 # define R300_CLIPRECT_X_MASK (0x1FFF << 0) # define R300_CLIPRECT_Y_SHIFT 13 # define R300_CLIPRECT_Y_MASK (0x1FFF << 13) -#define R300_RE_CLIPRECT_CNTL 0x43D0 +#define R300_SC_CLIP_RULE 0x43D0 # define R300_CLIP_OUT (1 << 0) # define R300_CLIP_0 (1 << 1) # define R300_CLIP_1 (1 << 2) @@ -1271,8 +1318,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* gap */ -#define R300_RE_SCISSORS_TL 0x43E0 -#define R300_RE_SCISSORS_BR 0x43E4 +#define R300_SC_SCISSORS_TL 0x43E0 +#define R300_SC_SCISSORS_BR 0x43E4 # define R300_SCISSORS_OFFSET 1440 # define R300_SCISSORS_X_SHIFT 0 # define R300_SCISSORS_X_MASK (0x1FFF << 0) @@ -1556,23 +1603,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * offsets into the respective instruction streams, while *_END points to the * last instruction relative to this offset. */ -#define R300_PFS_CNTL_0 0x4600 +#define R300_US_CONFIG 0x4600 # define R300_PFS_CNTL_LAST_NODES_SHIFT 0 # define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0) # define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3) -#define R300_PFS_CNTL_1 0x4604 +#define R300_US_PIXSIZE 0x4604 /* There is an unshifted value here which has so far always been equal to the * index of the highest used temporary register. */ -#define R300_PFS_CNTL_2 0x4608 +#define R300_US_CODE_OFFSET 0x4608 # define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0 # define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0) # define R300_PFS_CNTL_ALU_END_SHIFT 6 # define R300_PFS_CNTL_ALU_END_MASK (63 << 6) -# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 12 -# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 12) /* GUESS */ +# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13 +# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13) # define R300_PFS_CNTL_TEX_END_SHIFT 18 -# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* GUESS */ +# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* gap */ @@ -1583,70 +1630,65 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * * Offsets are relative to the master offset from PFS_CNTL_2. */ -#define R300_PFS_NODE_0 0x4610 -#define R300_PFS_NODE_1 0x4614 -#define R300_PFS_NODE_2 0x4618 -#define R300_PFS_NODE_3 0x461C -# define R300_PFS_NODE_ALU_OFFSET_SHIFT 0 -# define R300_PFS_NODE_ALU_OFFSET_MASK (63 << 0) -# define R300_PFS_NODE_ALU_END_SHIFT 6 -# define R300_PFS_NODE_ALU_END_MASK (63 << 6) -# define R300_PFS_NODE_TEX_OFFSET_SHIFT 12 -# define R300_PFS_NODE_TEX_OFFSET_MASK (31 << 12) -# define R300_PFS_NODE_TEX_END_SHIFT 17 -# define R300_PFS_NODE_TEX_END_MASK (31 << 17) -# define R300_PFS_NODE_OUTPUT_COLOR (1 << 22) -# define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23) +#define R300_US_CODE_ADDR_0 0x4610 +#define R300_US_CODE_ADDR_1 0x4614 +#define R300_US_CODE_ADDR_2 0x4618 +#define R300_US_CODE_ADDR_3 0x461C +# define R300_ALU_START_SHIFT 0 +# define R300_ALU_START_MASK (63 << 0) +# define R300_ALU_SIZE_SHIFT 6 +# define R300_ALU_SIZE_MASK (63 << 6) +# define R300_TEX_START_SHIFT 12 +# define R300_TEX_START_MASK (31 << 12) +# define R300_TEX_SIZE_SHIFT 17 +# define R300_TEX_SIZE_MASK (31 << 17) +# define R300_RGBA_OUT (1 << 22) +# define R300_W_OUT (1 << 23) /* TEX * As far as I can tell, texture instructions cannot write into output * registers directly. A subsequent ALU instruction is always necessary, * even if it's just MAD o0, r0, 1, 0 */ -#define R300_PFS_TEXI_0 0x4620 -# define R300_FPITX_SRC_SHIFT 0 -# define R300_FPITX_SRC_MASK (31 << 0) - /* GUESS */ -# define R300_FPITX_SRC_CONST (1 << 5) -# define R300_FPITX_DST_SHIFT 6 -# define R300_FPITX_DST_MASK (31 << 6) -# define R300_FPITX_IMAGE_SHIFT 11 - /* GUESS based on layout and native limits */ -# define R300_FPITX_IMAGE_MASK (15 << 11) -/* Unsure if these are opcodes, or some kind of bitfield, but this is how - * they were set when I checked - */ -# define R300_FPITX_OPCODE_SHIFT 15 -# define R300_FPITX_OP_TEX 1 -# define R300_FPITX_OP_KIL 2 -# define R300_FPITX_OP_TXP 3 -# define R300_FPITX_OP_TXB 4 -# define R300_FPITX_OPCODE_MASK (7 << 15) +#define R300_US_TEX_INST_0 0x4620 +# define R300_SRC_ADDR_SHIFT 0 +# define R300_SRC_ADDR_MASK (31 << 0) +# define R300_DST_ADDR_SHIFT 6 +# define R300_DST_ADDR_MASK (31 << 6) +# define R300_TEX_ID_SHIFT 11 +# define R300_TEX_ID_MASK (15 << 11) +# define R300_TEX_INST_SHIFT 15 +# define R300_TEX_OP_NOP 0 +# define R300_TEX_OP_LD 1 +# define R300_TEX_OP_KIL 2 +# define R300_TEX_OP_TXP 3 +# define R300_TEX_OP_TXB 4 +# define R300_TEX_INST_MASK (7 << 15) /* Output format from the unfied shader */ -#define R500_US_OUT_FMT 0x46A4 -# define R500_US_OUT_FMT_C4_8 (0 << 0) -# define R500_US_OUT_FMT_C4_10 (1 << 0) -# define R500_US_OUT_FMT_C4_10_GAMMA (2 << 0) -# define R500_US_OUT_FMT_C_16 (3 << 0) -# define R500_US_OUT_FMT_C2_16 (4 << 0) -# define R500_US_OUT_FMT_C4_16 (5 << 0) -# define R500_US_OUT_FMT_C_16_MPEG (6 << 0) -# define R500_US_OUT_FMT_C2_16_MPEG (7 << 0) -# define R500_US_OUT_FMT_C2_4 (8 << 0) -# define R500_US_OUT_FMT_C_3_3_2 (9 << 0) -# define R500_US_OUT_FMT_C_6_5_6 (10 << 0) -# define R500_US_OUT_FMT_C_11_11_10 (11 << 0) -# define R500_US_OUT_FMT_C_10_11_11 (12 << 0) -# define R500_US_OUT_FMT_C_2_10_10_10 (13 << 0) +#define R300_US_OUT_FMT 0x46A4 +# define R300_US_OUT_FMT_C4_8 (0 << 0) +# define R300_US_OUT_FMT_C4_10 (1 << 0) +# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R300_US_OUT_FMT_C_16 (3 << 0) +# define R300_US_OUT_FMT_C2_16 (4 << 0) +# define R300_US_OUT_FMT_C4_16 (5 << 0) +# define R300_US_OUT_FMT_C_16_MPEG (6 << 0) +# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0) +# define R300_US_OUT_FMT_C2_4 (8 << 0) +# define R300_US_OUT_FMT_C_3_3_2 (9 << 0) +# define R300_US_OUT_FMT_C_6_5_6 (10 << 0) +# define R300_US_OUT_FMT_C_11_11_10 (11 << 0) +# define R300_US_OUT_FMT_C_10_11_11 (12 << 0) +# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0) /* reserved */ -# define R500_US_OUT_FMT_UNUSED (15 << 0) -# define R500_US_OUT_FMT_C_16_FP (16 << 0) -# define R500_US_OUT_FMT_C2_16_FP (17 << 0) -# define R500_US_OUT_FMT_C4_16_FP (18 << 0) -# define R500_US_OUT_FMT_C_32_FP (19 << 0) -# define R500_US_OUT_FMT_C2_32_FP (20 << 0) -# define R500_US_OUT_FMT_C4_32_FP (20 << 0) +# define R300_US_OUT_FMT_UNUSED (15 << 0) +# define R300_US_OUT_FMT_C_16_FP (16 << 0) +# define R300_US_OUT_FMT_C2_16_FP (17 << 0) +# define R300_US_OUT_FMT_C4_16_FP (18 << 0) +# define R300_US_OUT_FMT_C_32_FP (19 << 0) +# define R300_US_OUT_FMT_C2_32_FP (20 << 0) +# define R300_US_OUT_FMT_C4_32_FP (20 << 0) /* ALU * The ALU instructions register blocks are enumerated according to the order @@ -1712,204 +1754,247 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * - Set FPI0/FPI2_SPECIAL_LRP * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD */ -#define R300_PFS_INSTR1_0 0x46C0 -# define R300_FPI1_SRC0C_SHIFT 0 -# define R300_FPI1_SRC0C_MASK (31 << 0) -# define R300_FPI1_SRC0C_CONST (1 << 5) -# define R300_FPI1_SRC1C_SHIFT 6 -# define R300_FPI1_SRC1C_MASK (31 << 6) -# define R300_FPI1_SRC1C_CONST (1 << 11) -# define R300_FPI1_SRC2C_SHIFT 12 -# define R300_FPI1_SRC2C_MASK (31 << 12) -# define R300_FPI1_SRC2C_CONST (1 << 17) -# define R300_FPI1_SRC_MASK 0x0003ffff -# define R300_FPI1_DSTC_SHIFT 18 -# define R300_FPI1_DSTC_MASK (31 << 18) -# define R300_FPI1_DSTC_REG_MASK_SHIFT 23 -# define R300_FPI1_DSTC_REG_X (1 << 23) -# define R300_FPI1_DSTC_REG_Y (1 << 24) -# define R300_FPI1_DSTC_REG_Z (1 << 25) -# define R300_FPI1_DSTC_OUTPUT_MASK_SHIFT 26 -# define R300_FPI1_DSTC_OUTPUT_X (1 << 26) -# define R300_FPI1_DSTC_OUTPUT_Y (1 << 27) -# define R300_FPI1_DSTC_OUTPUT_Z (1 << 28) - -#define R300_PFS_INSTR3_0 0x47C0 -# define R300_FPI3_SRC0A_SHIFT 0 -# define R300_FPI3_SRC0A_MASK (31 << 0) -# define R300_FPI3_SRC0A_CONST (1 << 5) -# define R300_FPI3_SRC1A_SHIFT 6 -# define R300_FPI3_SRC1A_MASK (31 << 6) -# define R300_FPI3_SRC1A_CONST (1 << 11) -# define R300_FPI3_SRC2A_SHIFT 12 -# define R300_FPI3_SRC2A_MASK (31 << 12) -# define R300_FPI3_SRC2A_CONST (1 << 17) -# define R300_FPI3_SRC_MASK 0x0003ffff -# define R300_FPI3_DSTA_SHIFT 18 -# define R300_FPI3_DSTA_MASK (31 << 18) -# define R300_FPI3_DSTA_REG (1 << 23) -# define R300_FPI3_DSTA_OUTPUT (1 << 24) -# define R300_FPI3_DSTA_DEPTH (1 << 27) - -#define R300_PFS_INSTR0_0 0x48C0 -# define R300_FPI0_ARGC_SRC0C_XYZ 0 -# define R300_FPI0_ARGC_SRC0C_XXX 1 -# define R300_FPI0_ARGC_SRC0C_YYY 2 -# define R300_FPI0_ARGC_SRC0C_ZZZ 3 -# define R300_FPI0_ARGC_SRC1C_XYZ 4 -# define R300_FPI0_ARGC_SRC1C_XXX 5 -# define R300_FPI0_ARGC_SRC1C_YYY 6 -# define R300_FPI0_ARGC_SRC1C_ZZZ 7 -# define R300_FPI0_ARGC_SRC2C_XYZ 8 -# define R300_FPI0_ARGC_SRC2C_XXX 9 -# define R300_FPI0_ARGC_SRC2C_YYY 10 -# define R300_FPI0_ARGC_SRC2C_ZZZ 11 -# define R300_FPI0_ARGC_SRC0A 12 -# define R300_FPI0_ARGC_SRC1A 13 -# define R300_FPI0_ARGC_SRC2A 14 -# define R300_FPI0_ARGC_SRC1C_LRP 15 -# define R300_FPI0_ARGC_ZERO 20 -# define R300_FPI0_ARGC_ONE 21 - /* GUESS */ -# define R300_FPI0_ARGC_HALF 22 -# define R300_FPI0_ARGC_SRC0C_YZX 23 -# define R300_FPI0_ARGC_SRC1C_YZX 24 -# define R300_FPI0_ARGC_SRC2C_YZX 25 -# define R300_FPI0_ARGC_SRC0C_ZXY 26 -# define R300_FPI0_ARGC_SRC1C_ZXY 27 -# define R300_FPI0_ARGC_SRC2C_ZXY 28 -# define R300_FPI0_ARGC_SRC0CA_WZY 29 -# define R300_FPI0_ARGC_SRC1CA_WZY 30 -# define R300_FPI0_ARGC_SRC2CA_WZY 31 - -# define R300_FPI0_ARG0C_SHIFT 0 -# define R300_FPI0_ARG0C_MASK (31 << 0) -# define R300_FPI0_ARG0C_NEG (1 << 5) -# define R300_FPI0_ARG0C_ABS (1 << 6) -# define R300_FPI0_ARG1C_SHIFT 7 -# define R300_FPI0_ARG1C_MASK (31 << 7) -# define R300_FPI0_ARG1C_NEG (1 << 12) -# define R300_FPI0_ARG1C_ABS (1 << 13) -# define R300_FPI0_ARG2C_SHIFT 14 -# define R300_FPI0_ARG2C_MASK (31 << 14) -# define R300_FPI0_ARG2C_NEG (1 << 19) -# define R300_FPI0_ARG2C_ABS (1 << 20) -# define R300_FPI0_SPECIAL_LRP (1 << 21) -# define R300_FPI0_OUTC_MAD (0 << 23) -# define R300_FPI0_OUTC_DP3 (1 << 23) -# define R300_FPI0_OUTC_DP4 (2 << 23) -# define R300_FPI0_OUTC_MIN (4 << 23) -# define R300_FPI0_OUTC_MAX (5 << 23) -# define R300_FPI0_OUTC_CMPH (7 << 23) -# define R300_FPI0_OUTC_CMP (8 << 23) -# define R300_FPI0_OUTC_FRC (9 << 23) -# define R300_FPI0_OUTC_REPL_ALPHA (10 << 23) -# define R300_FPI0_OUTC_SAT (1 << 30) -# define R300_FPI0_INSERT_NOP (1 << 31) - -#define R300_PFS_INSTR2_0 0x49C0 -# define R300_FPI2_ARGA_SRC0C_X 0 -# define R300_FPI2_ARGA_SRC0C_Y 1 -# define R300_FPI2_ARGA_SRC0C_Z 2 -# define R300_FPI2_ARGA_SRC1C_X 3 -# define R300_FPI2_ARGA_SRC1C_Y 4 -# define R300_FPI2_ARGA_SRC1C_Z 5 -# define R300_FPI2_ARGA_SRC2C_X 6 -# define R300_FPI2_ARGA_SRC2C_Y 7 -# define R300_FPI2_ARGA_SRC2C_Z 8 -# define R300_FPI2_ARGA_SRC0A 9 -# define R300_FPI2_ARGA_SRC1A 10 -# define R300_FPI2_ARGA_SRC2A 11 -# define R300_FPI2_ARGA_SRC1A_LRP 15 -# define R300_FPI2_ARGA_ZERO 16 -# define R300_FPI2_ARGA_ONE 17 - /* GUESS */ -# define R300_FPI2_ARGA_HALF 18 -# define R300_FPI2_ARG0A_SHIFT 0 -# define R300_FPI2_ARG0A_MASK (31 << 0) -# define R300_FPI2_ARG0A_NEG (1 << 5) - /* GUESS */ -# define R300_FPI2_ARG0A_ABS (1 << 6) -# define R300_FPI2_ARG1A_SHIFT 7 -# define R300_FPI2_ARG1A_MASK (31 << 7) -# define R300_FPI2_ARG1A_NEG (1 << 12) - /* GUESS */ -# define R300_FPI2_ARG1A_ABS (1 << 13) -# define R300_FPI2_ARG2A_SHIFT 14 -# define R300_FPI2_ARG2A_MASK (31 << 14) -# define R300_FPI2_ARG2A_NEG (1 << 19) - /* GUESS */ -# define R300_FPI2_ARG2A_ABS (1 << 20) -# define R300_FPI2_SPECIAL_LRP (1 << 21) -# define R300_FPI2_OUTA_MAD (0 << 23) -# define R300_FPI2_OUTA_DP4 (1 << 23) -# define R300_FPI2_OUTA_MIN (2 << 23) -# define R300_FPI2_OUTA_MAX (3 << 23) -# define R300_FPI2_OUTA_CMP (6 << 23) -# define R300_FPI2_OUTA_FRC (7 << 23) -# define R300_FPI2_OUTA_EX2 (8 << 23) -# define R300_FPI2_OUTA_LG2 (9 << 23) -# define R300_FPI2_OUTA_RCP (10 << 23) -# define R300_FPI2_OUTA_RSQ (11 << 23) -# define R300_FPI2_OUTA_SAT (1 << 30) -# define R300_FPI2_UNKNOWN_31 (1 << 31) +#define R300_US_ALU_RGB_ADDR_0 0x46C0 +# define R300_ALU_SRC0C_SHIFT 0 +# define R300_ALU_SRC0C_MASK (31 << 0) +# define R300_ALU_SRC0C_CONST (1 << 5) +# define R300_ALU_SRC1C_SHIFT 6 +# define R300_ALU_SRC1C_MASK (31 << 6) +# define R300_ALU_SRC1C_CONST (1 << 11) +# define R300_ALU_SRC2C_SHIFT 12 +# define R300_ALU_SRC2C_MASK (31 << 12) +# define R300_ALU_SRC2C_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTC_SHIFT 18 +# define R300_ALU_DSTC_MASK (31 << 18) +# define R300_ALU_DSTC_REG_MASK_SHIFT 23 +# define R300_ALU_DSTC_REG_X (1 << 23) +# define R300_ALU_DSTC_REG_Y (1 << 24) +# define R300_ALU_DSTC_REG_Z (1 << 25) +# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26 +# define R300_ALU_DSTC_OUTPUT_X (1 << 26) +# define R300_ALU_DSTC_OUTPUT_Y (1 << 27) +# define R300_ALU_DSTC_OUTPUT_Z (1 << 28) + +#define R300_US_ALU_ALPHA_ADDR_0 0x47C0 +# define R300_ALU_SRC0A_SHIFT 0 +# define R300_ALU_SRC0A_MASK (31 << 0) +# define R300_ALU_SRC0A_CONST (1 << 5) +# define R300_ALU_SRC1A_SHIFT 6 +# define R300_ALU_SRC1A_MASK (31 << 6) +# define R300_ALU_SRC1A_CONST (1 << 11) +# define R300_ALU_SRC2A_SHIFT 12 +# define R300_ALU_SRC2A_MASK (31 << 12) +# define R300_ALU_SRC2A_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTA_SHIFT 18 +# define R300_ALU_DSTA_MASK (31 << 18) +# define R300_ALU_DSTA_REG (1 << 23) +# define R300_ALU_DSTA_OUTPUT (1 << 24) +# define R300_ALU_DSTA_DEPTH (1 << 27) + +#define R300_US_ALU_RGB_INST_0 0x48C0 +# define R300_ALU_ARGC_SRC0C_XYZ 0 +# define R300_ALU_ARGC_SRC0C_XXX 1 +# define R300_ALU_ARGC_SRC0C_YYY 2 +# define R300_ALU_ARGC_SRC0C_ZZZ 3 +# define R300_ALU_ARGC_SRC1C_XYZ 4 +# define R300_ALU_ARGC_SRC1C_XXX 5 +# define R300_ALU_ARGC_SRC1C_YYY 6 +# define R300_ALU_ARGC_SRC1C_ZZZ 7 +# define R300_ALU_ARGC_SRC2C_XYZ 8 +# define R300_ALU_ARGC_SRC2C_XXX 9 +# define R300_ALU_ARGC_SRC2C_YYY 10 +# define R300_ALU_ARGC_SRC2C_ZZZ 11 +# define R300_ALU_ARGC_SRC0A 12 +# define R300_ALU_ARGC_SRC1A 13 +# define R300_ALU_ARGC_SRC2A 14 +# define R300_ALU_ARGC_SRCP_XYZ 15 +# define R300_ALU_ARGC_SRCP_XXX 16 +# define R300_ALU_ARGC_SRCP_YYY 17 +# define R300_ALU_ARGC_SRCP_ZZZ 18 +# define R300_ALU_ARGC_SRCP_WWW 19 +# define R300_ALU_ARGC_ZERO 20 +# define R300_ALU_ARGC_ONE 21 +# define R300_ALU_ARGC_HALF 22 +# define R300_ALU_ARGC_SRC0C_YZX 23 +# define R300_ALU_ARGC_SRC1C_YZX 24 +# define R300_ALU_ARGC_SRC2C_YZX 25 +# define R300_ALU_ARGC_SRC0C_ZXY 26 +# define R300_ALU_ARGC_SRC1C_ZXY 27 +# define R300_ALU_ARGC_SRC2C_ZXY 28 +# define R300_ALU_ARGC_SRC0CA_WZY 29 +# define R300_ALU_ARGC_SRC1CA_WZY 30 +# define R300_ALU_ARGC_SRC2CA_WZY 31 + +# define R300_ALU_ARG0C_SHIFT 0 +# define R300_ALU_ARG0C_MASK (31 << 0) +# define R300_ALU_ARG0C_NOP (0 << 5) +# define R300_ALU_ARG0C_NEG (1 << 5) +# define R300_ALU_ARG0C_ABS (2 << 5) +# define R300_ALU_ARG0C_NAB (3 << 5) +# define R300_ALU_ARG1C_SHIFT 7 +# define R300_ALU_ARG1C_MASK (31 << 7) +# define R300_ALU_ARG1C_NOP (0 << 12) +# define R300_ALU_ARG1C_NEG (1 << 12) +# define R300_ALU_ARG1C_ABS (2 << 12) +# define R300_ALU_ARG1C_NAB (3 << 12) +# define R300_ALU_ARG2C_SHIFT 14 +# define R300_ALU_ARG2C_MASK (31 << 14) +# define R300_ALU_ARG2C_NOP (0 << 19) +# define R300_ALU_ARG2C_NEG (1 << 19) +# define R300_ALU_ARG2C_ABS (2 << 19) +# define R300_ALU_ARG2C_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTC_MAD (0 << 23) +# define R300_ALU_OUTC_DP3 (1 << 23) +# define R300_ALU_OUTC_DP4 (2 << 23) +# define R300_ALU_OUTC_D2A (3 << 23) +# define R300_ALU_OUTC_MIN (4 << 23) +# define R300_ALU_OUTC_MAX (5 << 23) +# define R300_ALU_OUTC_CMPH (7 << 23) +# define R300_ALU_OUTC_CMP (8 << 23) +# define R300_ALU_OUTC_FRC (9 << 23) +# define R300_ALU_OUTC_REPL_ALPHA (10 << 23) + +# define R300_ALU_OUTC_MOD_NOP (0 << 27) +# define R300_ALU_OUTC_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTC_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTC_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTC_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTC_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTC_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTC_CLAMP (1 << 30) +# define R300_ALU_INSERT_NOP (1 << 31) + +#define R300_US_ALU_ALPHA_INST_0 0x49C0 +# define R300_ALU_ARGA_SRC0C_X 0 +# define R300_ALU_ARGA_SRC0C_Y 1 +# define R300_ALU_ARGA_SRC0C_Z 2 +# define R300_ALU_ARGA_SRC1C_X 3 +# define R300_ALU_ARGA_SRC1C_Y 4 +# define R300_ALU_ARGA_SRC1C_Z 5 +# define R300_ALU_ARGA_SRC2C_X 6 +# define R300_ALU_ARGA_SRC2C_Y 7 +# define R300_ALU_ARGA_SRC2C_Z 8 +# define R300_ALU_ARGA_SRC0A 9 +# define R300_ALU_ARGA_SRC1A 10 +# define R300_ALU_ARGA_SRC2A 11 +# define R300_ALU_ARGA_SRCP_X 12 +# define R300_ALU_ARGA_SRCP_Y 13 +# define R300_ALU_ARGA_SRCP_Z 14 +# define R300_ALU_ARGA_SRCP_W 15 + +# define R300_ALU_ARGA_ZERO 16 +# define R300_ALU_ARGA_ONE 17 +# define R300_ALU_ARGA_HALF 18 +# define R300_ALU_ARG0A_SHIFT 0 +# define R300_ALU_ARG0A_MASK (31 << 0) +# define R300_ALU_ARG0A_NOP (0 << 5) +# define R300_ALU_ARG0A_NEG (1 << 5) +# define R300_ALU_ARG0A_ABS (2 << 5) +# define R300_ALU_ARG0A_NAB (3 << 5) +# define R300_ALU_ARG1A_SHIFT 7 +# define R300_ALU_ARG1A_MASK (31 << 7) +# define R300_ALU_ARG1A_NOP (0 << 12) +# define R300_ALU_ARG1A_NEG (1 << 12) +# define R300_ALU_ARG1A_ABS (2 << 12) +# define R300_ALU_ARG1A_NAB (3 << 12) +# define R300_ALU_ARG2A_SHIFT 14 +# define R300_ALU_ARG2A_MASK (31 << 14) +# define R300_ALU_ARG2A_NOP (0 << 19) +# define R300_ALU_ARG2A_NEG (1 << 19) +# define R300_ALU_ARG2A_ABS (2 << 19) +# define R300_ALU_ARG2A_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTA_MAD (0 << 23) +# define R300_ALU_OUTA_DP4 (1 << 23) +# define R300_ALU_OUTA_MIN (2 << 23) +# define R300_ALU_OUTA_MAX (3 << 23) +# define R300_ALU_OUTA_CND (5 << 23) +# define R300_ALU_OUTA_CMP (6 << 23) +# define R300_ALU_OUTA_FRC (7 << 23) +# define R300_ALU_OUTA_EX2 (8 << 23) +# define R300_ALU_OUTA_LG2 (9 << 23) +# define R300_ALU_OUTA_RCP (10 << 23) +# define R300_ALU_OUTA_RSQ (11 << 23) + +# define R300_ALU_OUTA_MOD_NOP (0 << 27) +# define R300_ALU_OUTA_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTA_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTA_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTA_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTA_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTA_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTA_CLAMP (1 << 30) /* END: Fragment program instruction set */ /* Fog: Fog Blending Enable */ -#define FG_FOG_BLEND 0x4bc0 -# define FG_FOG_BLEND_DISABLE (0 << 0) -# define FG_FOG_BLEND_ENABLE (1 << 0) -# define FG_FOG_BLEND_FN_LINEAR (0 << 1) -# define FG_FOG_BLEND_FN_EXP (1 << 1) -# define FG_FOG_BLEND_FN_EXP2 (2 << 1) -# define FG_FOG_BLEND_FN_CONSTANT (3 << 1) -# define FG_FOG_BLEND_FN_MASK 0x00000006 +#define R300_FG_FOG_BLEND 0x4bc0 +# define R300_FG_FOG_BLEND_DISABLE (0 << 0) +# define R300_FG_FOG_BLEND_ENABLE (1 << 0) +# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1) +# define R300_FG_FOG_BLEND_FN_EXP (1 << 1) +# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1) +# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1) +# define R300_FG_FOG_BLEND_FN_MASK (3 << 1) /* Fog: Red Component of Fog Color */ -#define FG_FOG_COLOR_R 0x4bc8 +#define R300_FG_FOG_COLOR_R 0x4bc8 /* Fog: Green Component of Fog Color */ -#define FG_FOG_COLOR_G 0x4bcc +#define R300_FG_FOG_COLOR_G 0x4bcc /* Fog: Blue Component of Fog Color */ -#define FG_FOG_COLOR_B 0x4bd0 -# define FG_FOG_COLOR_MASK 0x000001ff +#define R300_FG_FOG_COLOR_B 0x4bd0 +# define R300_FG_FOG_COLOR_MASK 0x000003ff /* Fog: Constant Factor for Fog Blending */ -#define FG_FOG_FACTOR 0x4bc4 -# define FG_FOG_FACTOR_MASK 0x000001ff +#define R300_FG_FOG_FACTOR 0x4bc4 +# define FG_FOG_FACTOR_MASK 0x000003ff /* Fog: Alpha function */ -#define FG_ALPHA_FUNC 0x4bd4 -# define R300_REF_ALPHA_MASK 0x000000ff -# define FG_ALPHA_FUNC_NEVER (0 << 8) -# define FG_ALPHA_FUNC_LESS (1 << 8) -# define FG_ALPHA_FUNC_EQUAL (2 << 8) -# define FG_ALPHA_FUNC_LE (3 << 8) -# define FG_ALPHA_FUNC_GREATER (4 << 8) -# define FG_ALPHA_FUNC_NOTEQUAL (5 << 8) -# define FG_ALPHA_FUNC_GE (6 << 8) -# define FG_ALPHA_FUNC_ALWAYS (7 << 8) -# define R300_ALPHA_TEST_OP_MASK (7 << 8) -# define FG_ALPHA_FUNC_DISABLE (0 << 11) -# define FG_ALPHA_FUNC_ENABLE (1 << 11) -# define FG_ALPHA_FUNC_10BIT (0 << 12) -# define FG_ALPHA_FUNC_8BIT (1 << 12) -/* gap in AMD spec */ -# define FG_ALPHA_FUNC_MASK_DISABLE (0 << 16) -# define FG_ALPHA_FUNC_MASK_ENABLE (1 << 16) -# define FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17) -# define FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17) -/* gap in AMD spec */ -# define FG_ALPHA_FUNC_DITH_DISABLE (0 << 20) -# define FG_ALPHA_FUNC_DITH_ENABLE (1 << 20) -/* gap in AMD spec */ -# define FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24) /* Not supported in R520. Default R300 and RV350 behaviour. */ -# define FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */ -# define FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25) -# define FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25) -/* gap in AMD spec */ -# define FG_ALPHA_FUNC_FP16_DISABLE (0 << 28) -# define FG_ALPHA_FUNC_FP16_ENABLE (1 << 28) -/* gap in AMD spec */ +#define R300_FG_ALPHA_FUNC 0x4bd4 +# define R300_FG_ALPHA_FUNC_VAL_MASK 0x000000ff +# define R300_FG_ALPHA_FUNC_NEVER (0 << 8) +# define R300_FG_ALPHA_FUNC_LESS (1 << 8) +# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8) +# define R300_FG_ALPHA_FUNC_LE (3 << 8) +# define R300_FG_ALPHA_FUNC_GREATER (4 << 8) +# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8) +# define R300_FG_ALPHA_FUNC_GE (6 << 8) +# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8) +# define R300_ALPHA_TEST_OP_MASK (7 << 8) +# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11) +# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11) + +# define R500_FG_ALPHA_FUNC_10BIT (0 << 12) +# define R500_FG_ALPHA_FUNC_8BIT (1 << 12) + +# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16) +# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16) +# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17) +# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17) + +# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20) +# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20) + +# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24) +# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */ +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25) +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25) + +# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28) +# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28) + /* Fog: Where does the depth come from? */ #define R300_FG_DEPTH_SRC 0x4bd8 @@ -1917,8 +2002,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_FG_DEPTH_SRC_SHADER (1 << 0) /* Fog: Alpha Compare Value */ -#define FG_ALPHA_VALUE 0x4be0 -# define FG_ALPHA_VALUE_MASK 0x0000ffff +#define R500_FG_ALPHA_VALUE 0x4be0 +# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff /* gap */ @@ -1927,7 +2012,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_PFS_PARAM_0_Y 0x4C04 #define R300_PFS_PARAM_0_Z 0x4C08 #define R300_PFS_PARAM_0_W 0x4C0C -/* GUESS: PARAM_31 is last, based on native limits reported by fglrx */ +/* last consts */ #define R300_PFS_PARAM_31_X 0x4DF0 #define R300_PFS_PARAM_31_Y 0x4DF4 #define R300_PFS_PARAM_31_Z 0x4DF8 @@ -1935,14 +2020,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Unpipelined. */ #define R300_RB3D_CCTL 0x4e00 -/* gap in AMD docs */ # define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5) # define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5) # define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5) # define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5) # define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7) # define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7) -/* gap in AMD docs */ # define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9) # define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9) # define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10) @@ -1967,9 +2050,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_RB3D_CBLEND 0x4E04 #define R300_RB3D_ABLEND 0x4E08 /* the following only appear in CBLEND */ -# define R300_BLEND_ENABLE (1 << 0) -# define R300_BLEND_UNKNOWN (3 << 1) -# define R300_BLEND_NO_SEPARATE (1 << 3) +# define R300_ALPHA_BLEND_ENABLE (1 << 0) +# define R300_SEPARATE_ALPHA_ENABLE (1 << 1) +# define R300_READ_ENABLE (1 << 2) +# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) + /* the following are shared between CBLEND and ABLEND */ # define R300_FCN_MASK (3 << 12) # define R300_COMB_FCN_ADD_CLAMP (0 << 12) @@ -2044,7 +2135,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Color Buffer Address Offset of multibuffer 0. Unpipelined. */ #define R300_RB3D_COLOROFFSET0 0x4E28 -# define R300_COLOROFFSET_MASK 0xFFFFFFF0 /* GUESS */ +# define R300_COLOROFFSET_MASK 0xFFFFFFE0 /* Color Buffer Address Offset of multibuffer 1. Unpipelined. */ #define R300_RB3D_COLOROFFSET1 0x4E2C /* Color Buffer Address Offset of multibuffer 2. Unpipelined. */ @@ -2061,7 +2152,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Bit 18: Extremely weird tile like, but some pixels duplicated? */ #define R300_RB3D_COLORPITCH0 0x4E38 -# define R300_COLORPITCH_MASK 0x00001FF8 /* GUESS, should be 13:1 */ +# define R300_COLORPITCH_MASK 0x00003FFE # define R300_COLOR_TILE_DISABLE (0 << 16) # define R300_COLOR_TILE_ENABLE (1 << 16) # define R300_COLOR_MICROTILE_DISABLE (0 << 17) @@ -2071,12 +2162,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) # define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) # define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19) -# define R300_COLOR_FORMAT_ARGB10101010 (0 << 21) -# define R300_COLOR_FORMAT_UV1010 (1 << 21) -# define R300_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */ +# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21) +# define R500_COLOR_FORMAT_UV1010 (1 << 21) +# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */ # define R300_COLOR_FORMAT_ARGB1555 (3 << 21) # define R300_COLOR_FORMAT_RGB565 (4 << 21) -# define R300_COLOR_FORMAT_ARGB2101010 (5 << 21) +# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21) # define R300_COLOR_FORMAT_ARGB8888 (6 << 21) # define R300_COLOR_FORMAT_ARGB32323232 (7 << 21) /* reserved */ @@ -2085,7 +2176,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLOR_FORMAT_VYUY (11 << 21) # define R300_COLOR_FORMAT_YVYU (12 << 21) # define R300_COLOR_FORMAT_UV88 (13 << 21) -# define R300_COLOR_FORMAT_I10 (14 << 21) +# define R500_COLOR_FORMAT_I10 (14 << 21) # define R300_COLOR_FORMAT_ARGB4444 (15 << 21) #define R300_RB3D_COLORPITCH1 0x4E3C #define R300_RB3D_COLORPITCH2 0x4E40 @@ -2104,16 +2195,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Set to 0A before 3D operations, set to 02 afterwards. */ #define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c -# define RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0) -# define RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0) -# define RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0) -# define RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0) -# define RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2) -# define RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2) -# define RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2) -# define RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2) -# define RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4) -# define RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4) #define R300_RB3D_DITHER_CTL 0x4E50 # define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0) @@ -2128,87 +2219,81 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Resolve buffer destination address. The cache must be empty before changing * this register if the cb is in resolve mode. Unpipelined */ -#define RB3D_AARESOLVE_OFFSET 0x4e80 -# define RB3D_AARESOLVE_OFFSET_SHIFT 5 -# define RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */ +#define R300_RB3D_AARESOLVE_OFFSET 0x4e80 +# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5 +# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */ /* Resolve Buffer Pitch and Tiling Control. The cache must be empty before * changing this register if the cb is in resolve mode. Unpipelined */ -#define RB3D_AARESOLVE_PITCH 0x4e84 -# define RB3D_AARESOLVE_PITCH_SHIFT 1 -# define RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */ +#define R300_RB3D_AARESOLVE_PITCH 0x4e84 +# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1 +# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */ /* Resolve Buffer Control. Unpipelined */ -#define RB3D_AARESOLVE_CTL 0x4e88 -# define RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0) -# define RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0) -# define RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1) -# define RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1) -# define RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2) -# define RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2) +#define R300_RB3D_AARESOLVE_CTL 0x4e88 +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2) /* Discard src pixels less than or equal to threshold. */ -#define RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0 +#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0 /* Discard src pixels greater than or equal to threshold. */ -#define RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000 +#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000 /* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */ -#define RB3D_ROPCNTL 0x4e18 +#define R300_RB3D_ROPCNTL 0x4e18 /* TODO: fill in content here */ /* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */ -#define RB3D_CLRCMP_FLIPE 0x4e1c +#define R300_RB3D_CLRCMP_FLIPE 0x4e1c /* Sets the fifo sizes */ -#define RB3D_FIFO_SIZE 0x4ef4 -# define RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0) -# define RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0) -# define RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) -# define RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0) -/* gap in AMD spec */ +#define R500_RB3D_FIFO_SIZE 0x4ef4 +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0) /* Constant color used by the blender. Pipelined through the blender. */ -#define RB3D_CONSTANT_COLOR_AR 0x4ef8 -# define RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff -# define RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0 -# define RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000 -# define RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16 +#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8 +# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16 /* Constant color used by the blender. Pipelined through the blender. */ -#define RB3D_CONSTANT_COLOR_GB 0x4efc -# define RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff -# define RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0 -# define RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000 -# define RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16 +#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16 /* gap */ /* There seems to be no "write only" setting, so use Z-test = ALWAYS * for this. * Bit (1<<8) is the "test" bit. so plain write is 6 - vd */ -#define R300_RB3D_ZSTENCIL_CNTL_0 0x4F00 -# define R300_RB3D_Z_DISABLED_1 0x00000010 -# define R300_RB3D_Z_DISABLED_2 0x00000014 -# define R300_RB3D_Z_TEST 0x00000012 -# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016 -# define R300_RB3D_Z_WRITE_ONLY 0x00000006 - -# define R300_RB3D_Z_TEST 0x00000012 -# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016 -# define R300_RB3D_Z_WRITE_ONLY 0x00000006 -# define R300_RB3D_STENCIL_ENABLE 0x00000001 - -#define R300_RB3D_ZSTENCIL_CNTL_1 0x4f04 +#define R300_ZB_CNTL 0x4F00 +# define R300_STENCIL_ENABLE (1 << 0) +# define R300_Z_ENABLE (1 << 1) +# define R300_Z_WRITE_ENABLE (1 << 2) +# define R300_Z_SIGNED_COMPARE (1 << 3) +# define R300_STENCIL_FRONT_BACK (1 << 4) + +#define R300_ZB_ZSTENCILCNTL 0x4f04 /* functions */ # define R300_ZS_NEVER 0 # define R300_ZS_LESS 1 @@ -2228,51 +2313,49 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ZS_INVERT 5 # define R300_ZS_INCR_WRAP 6 # define R300_ZS_DECR_WRAP 7 +# define R300_Z_FUNC_SHIFT 0 /* front and back refer to operations done for front and back faces, i.e. separate stencil function support */ -# define R300_RB3D_ZS1_DEPTH_FUNC_SHIFT 0 -# define R300_RB3D_ZS1_FRONT_FUNC_SHIFT 3 -# define R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT 6 -# define R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT 9 -# define R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT 12 -# define R300_RB3D_ZS1_BACK_FUNC_SHIFT 15 -# define R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT 18 -# define R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT 21 -# define R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT 24 - -#define ZB_STENCILREFMASK 0x4f08 -# define ZB_STENCILREFMASK_STENCILREF_SHIFT 0 -# define ZB_STENCILREFMASK_STENCIL_MASK 0xff -# define ZB_STENCILREFMASK_STENCILREF_MASK 0x000000ff -# define ZB_STENCILREFMASK_STENCILMASK_SHIFT 8 -# define ZB_STENCILREFMASK_STENCILMASK_MASK 0x0000ff00 -# define ZB_STENCILREFMASK_STENCILWRITEMASK_SHIFT 16 -# define ZB_STENCILREFMASK_STENCILWRITEMASK_MASK 0xffff0000 +# define R300_S_FRONT_FUNC_SHIFT 3 +# define R300_S_FRONT_SFAIL_OP_SHIFT 6 +# define R300_S_FRONT_ZPASS_OP_SHIFT 9 +# define R300_S_FRONT_ZFAIL_OP_SHIFT 12 +# define R300_S_BACK_FUNC_SHIFT 15 +# define R300_S_BACK_SFAIL_OP_SHIFT 18 +# define R300_S_BACK_ZPASS_OP_SHIFT 21 +# define R300_S_BACK_ZFAIL_OP_SHIFT 24 + +#define R300_ZB_STENCILREFMASK 0x4f08 +# define R300_STENCILREF_SHIFT 0 +# define R300_STENCILREF_MASK 0x000000ff +# define R300_STENCILMASK_SHIFT 8 +# define R300_STENCILMASK_MASK 0x0000ff00 +# define R300_STENCILWRITEMASK_SHIFT 16 +# define R300_STENCILWRITEMASK_MASK 0x00ff0000 /* gap */ -#define ZB_FORMAT 0x4f10 -# define ZB_FORMAR_DEPTHFORMAT_16BIT_INT_Z (0 << 0) -# define ZB_FORMAR_DEPTHFORMAT_16BIT_13E3 (1 << 0) -# define ZB_FORMAR_DEPTHFORMAT_24BIT_INT_Z (2 << 0) +#define R300_ZB_FORMAT 0x4f10 +# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0) +# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0) +# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0) /* reserved up to (15 << 0) */ -# define ZB_FORMAR_INVERT_13E3_LEADING_ONES (0 << 4) -# define ZB_FORMAR_INVERT_13E3_LEADING_ZEROS (1 << 4) -# define ZB_FORMAR_PEQ8_UNUSED (1 << 5) +# define R300_INVERT_13E3_LEADING_ONES (0 << 4) +# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4) -#define R300_RB3D_EARLY_Z 0x4F14 -# define R300_EARLY_Z_DISABLE (0 << 0) -# define R300_EARLY_Z_ENABLE (1 << 0) +#define R300_ZB_ZTOP 0x4F14 +# define R300_ZTOP_DISABLE (0 << 0) +# define R300_ZTOP_ENABLE (1 << 0) /* gap */ -#define ZB_ZCACHE_CTLSTAT 0x4f18 -# define ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0) -# define ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0) -# define ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1) -# define ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1) -# define ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31) -# define ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31) +#define R300_ZB_ZCACHE_CTLSTAT 0x4f18 +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31) #define R300_ZB_BW_CNTL 0x4f1c # define R300_HIZ_DISABLE (0 << 0) @@ -2289,31 +2372,32 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5) # define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6) # define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6) -# define R300_ZEQUAL_OPTIMIZE_ENABLE (0 << 7) -# define R300_ZEQUAL_OPTIMIZE_DISABLE (1 << 7) -# define R300_SEQUAL_OPTIMIZE_ENABLE (0 << 8) -# define R300_SEQUAL_OPTIMIZE_DISABLE (1 << 8) -/* gap in AMD docs */ -# define R300_BMASK_ENABLE (0 << 10) -# define R300_BMASK_DISABLE (1 << 10) -# define R300_HIZ_EQUAL_REJECT_DISABLE (0 << 11) -# define R300_HIZ_EQUAL_REJECT_ENABLE (1 << 11) -# define R300_HIZ_FP_EXP_BITS_DISABLE (0 << 12) -# define R300_HIZ_FP_EXP_BITS_1 (1 << 12) -# define R300_HIZ_FP_EXP_BITS_2 (2 << 12) -# define R300_HIZ_FP_EXP_BITS_3 (3 << 12) -# define R300_HIZ_FP_EXP_BITS_4 (4 << 12) -# define R300_HIZ_FP_EXP_BITS_5 (5 << 12) -# define R300_HIZ_FP_INVERT_LEADING_ONES (0 << 15) -# define R300_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15) -# define R300_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16) -# define R300_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16) -# define R300_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17) -# define R300_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17) -# define R300_PEQ_PACKING_DISABLE (0 << 18) -# define R300_PEQ_PACKING_ENABLE (1 << 18) -# define R300_COVERED_PTR_MASKING_DISABLE (0 << 18) -# define R300_COVERED_PTR_MASKING_ENABLE (1 << 18) + +# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7) +# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7) +# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8) +# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8) + +# define R500_BMASK_ENABLE (0 << 10) +# define R500_BMASK_DISABLE (1 << 10) +# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11) +# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11) +# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12) +# define R500_HIZ_FP_EXP_BITS_1 (1 << 12) +# define R500_HIZ_FP_EXP_BITS_2 (2 << 12) +# define R500_HIZ_FP_EXP_BITS_3 (3 << 12) +# define R500_HIZ_FP_EXP_BITS_4 (4 << 12) +# define R500_HIZ_FP_EXP_BITS_5 (5 << 12) +# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15) +# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15) +# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16) +# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16) +# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17) +# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17) +# define R500_PEQ_PACKING_DISABLE (0 << 18) +# define R500_PEQ_PACKING_ENABLE (1 << 18) +# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18) +# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18) /* gap */ @@ -2321,67 +2405,68 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Z Buffer Address Offset. * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles. */ -#define ZB_DEPTHOFFSET 0x4f20 +#define R300_ZB_DEPTHOFFSET 0x4f20 /* Z Buffer Pitch and Endian Control */ -#define ZB_DEPTHPITCH 0x4f24 -# define R300_DEPTHPITCH_MASK 0x00001FF8 /* TODO: should be (13:2) */ -# define ZB_DEPTHPITCH_DEPTHMACROTILE_DISABLE (0 << 16) -# define ZB_DEPTHPITCH_DEPTHMACROTILE_ENABLE (1 << 16) -# define ZB_DEPTHPITCH_DEPTHMICROTILE_LINEAR (0 << 17) -# define ZB_DEPTHPITCH_DEPTHMICROTILE_TILED (1 << 17) -# define ZB_DEPTHPITCH_DEPTHMICROTILE_TILED_SQUARE (2 << 17) -# define ZB_DEPTHPITCH_DEPTHENDIAN_NO_SWAP (0 << 18) -# define ZB_DEPTHPITCH_DEPTHENDIAN_WORD_SWAP (1 << 18) -# define ZB_DEPTHPITCH_DEPTHENDIAN_DWORD_SWAP (2 << 18) -# define ZB_DEPTHPITCH_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18) +#define R300_ZB_DEPTHPITCH 0x4f24 +# define R300_DEPTHPITCH_MASK 0x00003FFC +# define R300_DEPTHMACROTILE_DISABLE (0 << 16) +# define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMICROTILE_LINEAR (0 << 17) +# define R300_DEPTHMICROTILE_TILED (1 << 17) +# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHENDIAN_NO_SWAP (0 << 18) +# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) +# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) +# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18) /* Z Buffer Clear Value */ -#define ZB_DEPTHCLEARVALUE 0x4f28 +#define R300_ZB_DEPTHCLEARVALUE 0x4f28 /* Hierarchical Z Memory Offset */ -#define ZB_HIZ_OFFSET 0x4f44 +#define R300_ZB_HIZ_OFFSET 0x4f44 -/* Hierarchical Z Read Index */ -#define ZB_HIZ_RDINDEX 0x4f48 +/* Hierarchical Z Write Index */ +#define R300_ZB_HIZ_WRINDEX 0x4f48 /* Hierarchical Z Data */ -#define ZB_HIZ_DWORD 0x4f4c +#define R300_ZB_HIZ_DWORD 0x4f4c -/* Hierarchical Z Write Index */ -#define ZB_HIZ_WRINDEX 0x4f50 +/* Hierarchical Z Read Index */ +#define R300_ZB_HIZ_RDINDEX 0x4f50 /* Hierarchical Z Pitch */ -#define ZB_HIZ_PITCH 0x4f54 +#define R300_ZB_HIZ_PITCH 0x4f54 /* Z Buffer Z Pass Counter Data */ -#define ZB_ZPASS_DATA 0x4f58 +#define R300_ZB_ZPASS_DATA 0x4f58 /* Z Buffer Z Pass Counter Address */ -#define ZB_ZPASS_ADDR 0x4f5c +#define R300_ZB_ZPASS_ADDR 0x4f5c /* Depth buffer X and Y coordinate offset */ -#define ZB_DEPTHXY_OFFSET 0x4f60 -# define ZB_DEPTHX_OFFSET_SHIFT 1 -# define ZB_DEPTHX_OFFSET_MASK 0x000007FE -# define ZB_DEPTHY_OFFSET_SHIFT 17 -# define ZB_DEPTHY_OFFSET_MASK 0x07FE0000 +#define R300_ZB_DEPTHXY_OFFSET 0x4f60 +# define R300_DEPTHX_OFFSET_SHIFT 1 +# define R300_DEPTHX_OFFSET_MASK 0x000007FE +# define R300_DEPTHY_OFFSET_SHIFT 17 +# define R300_DEPTHY_OFFSET_MASK 0x07FE0000 /* Sets the fifo sizes */ -#define ZB_FIFO_SIZE 0x4fd0 -# define ZB_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0) -# define ZB_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0) -# define ZB_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) -# define ZB_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (4 << 0) +#define R500_ZB_FIFO_SIZE 0x4fd0 +# define R500_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0) /* Stencil Reference Value and Mask for backfacing quads */ -#define ZB_STENCILREFMASK_BF 0x4fd4 -# define ZB_STENCILREFMASK_BF_STENCILREF_SHIFT 0 -# define ZB_STENCILREFMASK_BF_STENCILREF_MASK 0x000000ff -# define ZB_STENCILREFMASK_BF_STENCILMASK_SHIFT 8 -# define ZB_STENCILREFMASK_BF_STENCILMASK_MASK 0x0000ff00 -# define ZB_STENCILREFMASK_BF_STENCILWRITEMASK_SHIFT 16 -# define ZB_STENCILREFMASK_BF_STENCILWRITEMASK_MASK 0xffff0000 +/* R300_ZB_STENCILREFMASK handles front face */ +#define R500_ZB_STENCILREFMASK_BF 0x4fd4 +# define R500_STENCILREF_SHIFT 0 +# define R500_STENCILREF_MASK 0x000000ff +# define R500_STENCILMASK_SHIFT 8 +# define R500_STENCILMASK_MASK 0x0000ff00 +# define R500_STENCILWRITEMASK_SHIFT 16 +# define R500_STENCILWRITEMASK_MASK 0x00ff0000 /** * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION @@ -2598,6 +2683,479 @@ enum { #define R300_PRIM_NUM_VERTICES_SHIFT 16 #define R300_PRIM_NUM_VERTICES_MASK 0xffff + + +/* + * The R500 unified shader (US) registers come in banks of 512 each, one + * for each instruction slot in the shader. You can't touch them directly. + * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive + * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the + * instruction is fully specified. + */ +#define R500_US_ALU_ALPHA_INST_0 0xa800 +# define R500_ALPHA_OP_MAD 0 +# define R500_ALPHA_OP_DP 1 +# define R500_ALPHA_OP_MIN 2 +# define R500_ALPHA_OP_MAX 3 +/* #define R500_ALPHA_OP_RESERVED 4 */ +# define R500_ALPHA_OP_CND 5 +# define R500_ALPHA_OP_CMP 6 +# define R500_ALPHA_OP_FRC 7 +# define R500_ALPHA_OP_EX2 8 +# define R500_ALPHA_OP_LN2 9 +# define R500_ALPHA_OP_RCP 10 +# define R500_ALPHA_OP_RSQ 11 +# define R500_ALPHA_OP_SIN 12 +# define R500_ALPHA_OP_COS 13 +# define R500_ALPHA_OP_MDH 14 +# define R500_ALPHA_OP_MDV 15 +# define R500_ALPHA_ADDRD(x) (x << 4) +# define R500_ALPHA_ADDRD_REL (1 << 11) +# define R500_ALPHA_SEL_A_SRC0 (0 << 12) +# define R500_ALPHA_SEL_A_SRC1 (1 << 12) +# define R500_ALPHA_SEL_A_SRC2 (2 << 12) +# define R500_ALPHA_SEL_A_SRCP (3 << 12) +# define R500_ALPHA_SWIZ_A_R (0 << 14) +# define R500_ALPHA_SWIZ_A_G (1 << 14) +# define R500_ALPHA_SWIZ_A_B (2 << 14) +# define R500_ALPHA_SWIZ_A_A (3 << 14) +# define R500_ALPHA_SWIZ_A_0 (4 << 14) +# define R500_ALPHA_SWIZ_A_HALF (5 << 14) +# define R500_ALPHA_SWIZ_A_1 (6 << 14) +/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */ +# define R500_ALPHA_MOD_A_NOP (0 << 17) +# define R500_ALPHA_MOD_A_NEG (1 << 17) +# define R500_ALPHA_MOD_A_ABS (2 << 17) +# define R500_ALPHA_MOD_A_NAB (3 << 17) +# define R500_ALPHA_SEL_B_SRC0 (0 << 19) +# define R500_ALPHA_SEL_B_SRC1 (1 << 19) +# define R500_ALPHA_SEL_B_SRC2 (2 << 19) +# define R500_ALPHA_SEL_B_SRCP (3 << 19) +# define R500_ALPHA_SWIZ_B_R (0 << 21) +# define R500_ALPHA_SWIZ_B_G (1 << 21) +# define R500_ALPHA_SWIZ_B_B (2 << 21) +# define R500_ALPHA_SWIZ_B_A (3 << 21) +# define R500_ALPHA_SWIZ_B_0 (4 << 21) +# define R500_ALPHA_SWIZ_B_HALF (5 << 21) +# define R500_ALPHA_SWIZ_B_1 (6 << 21) +/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALPHA_MOD_B_NOP (0 << 24) +# define R500_ALPHA_MOD_B_NEG (1 << 24) +# define R500_ALPHA_MOD_B_ABS (2 << 24) +# define R500_ALPHA_MOD_B_NAB (3 << 24) +# define R500_ALPHA_OMOD_IDENTITY (0 << 26) +# define R500_ALPHA_OMOD_MUL_2 (1 << 26) +# define R500_ALPHA_OMOD_MUL_4 (2 << 26) +# define R500_ALPHA_OMOD_MUL_8 (3 << 26) +# define R500_ALPHA_OMOD_DIV_2 (4 << 26) +# define R500_ALPHA_OMOD_DIV_4 (5 << 26) +# define R500_ALPHA_OMOD_DIV_8 (6 << 26) +# define R500_ALPHA_OMOD_DISABLE (7 << 26) +# define R500_ALPHA_TARGET(x) (x << 29) +# define R500_ALPHA_W_OMASK (1 << 31) +#define R500_US_ALU_ALPHA_ADDR_0 0x9800 +# define R500_ALPHA_ADDR0(x) (x << 0) +# define R500_ALPHA_ADDR0_CONST (1 << 8) +# define R500_ALPHA_ADDR0_REL (1 << 9) +# define R500_ALPHA_ADDR1(x) (x << 10) +# define R500_ALPHA_ADDR1_CONST (1 << 18) +# define R500_ALPHA_ADDR1_REL (1 << 19) +# define R500_ALPHA_ADDR2(x) (x << 20) +# define R500_ALPHA_ADDR2_CONST (1 << 28) +# define R500_ALPHA_ADDR2_REL (1 << 29) +# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) +# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30) +# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30) +# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30) +#define R500_US_ALU_RGBA_INST_0 0xb000 +# define R500_ALU_RGBA_OP_MAD (0 << 0) +# define R500_ALU_RGBA_OP_DP3 (1 << 0) +# define R500_ALU_RGBA_OP_DP4 (2 << 0) +# define R500_ALU_RGBA_OP_D2A (3 << 0) +# define R500_ALU_RGBA_OP_MIN (4 << 0) +# define R500_ALU_RGBA_OP_MAX (5 << 0) +/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */ +# define R500_ALU_RGBA_OP_CND (7 << 0) +# define R500_ALU_RGBA_OP_CMP (8 << 0) +# define R500_ALU_RGBA_OP_FRC (9 << 0) +# define R500_ALU_RGBA_OP_SOP (10 << 0) +# define R500_ALU_RGBA_OP_MDH (11 << 0) +# define R500_ALU_RGBA_OP_MDV (12 << 0) +# define R500_ALU_RGBA_ADDRD(x) (x << 4) +# define R500_ALU_RGBA_ADDRD_REL (1 << 11) +# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) +# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12) +# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12) +# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12) +# define R500_ALU_RGBA_R_SWIZ_R (0 << 14) +# define R500_ALU_RGBA_R_SWIZ_G (1 << 14) +# define R500_ALU_RGBA_R_SWIZ_B (2 << 14) +# define R500_ALU_RGBA_R_SWIZ_A (3 << 14) +# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14) +# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14) +# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14) +/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */ +# define R500_ALU_RGBA_G_SWIZ_R (0 << 17) +# define R500_ALU_RGBA_G_SWIZ_G (1 << 17) +# define R500_ALU_RGBA_G_SWIZ_B (2 << 17) +# define R500_ALU_RGBA_G_SWIZ_A (3 << 17) +# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17) +# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17) +# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17) +/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */ +# define R500_ALU_RGBA_B_SWIZ_R (0 << 20) +# define R500_ALU_RGBA_B_SWIZ_G (1 << 20) +# define R500_ALU_RGBA_B_SWIZ_B (2 << 20) +# define R500_ALU_RGBA_B_SWIZ_A (3 << 20) +# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20) +# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20) +# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20) +/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */ +# define R500_ALU_RGBA_MOD_C_NOP (0 << 23) +# define R500_ALU_RGBA_MOD_C_NEG (1 << 23) +# define R500_ALU_RGBA_MOD_C_ABS (2 << 23) +# define R500_ALU_RGBA_MOD_C_NAB (3 << 23) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25) +# define R500_ALU_RGBA_A_SWIZ_R (0 << 27) +# define R500_ALU_RGBA_A_SWIZ_G (1 << 27) +# define R500_ALU_RGBA_A_SWIZ_B (2 << 27) +# define R500_ALU_RGBA_A_SWIZ_A (3 << 27) +# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27) +# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27) +# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27) +/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */ +# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30) +#define R500_US_ALU_RGB_INST_0 0xa000 +# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0) +# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0) +# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0) +# define R500_ALU_RGB_SEL_A_SRCP (3 << 0) +# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2) +# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2) +# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2) +# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2) +# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2) +# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2) +# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2) +/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */ +# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5) +# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5) +# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5) +# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5) +# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5) +# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5) +# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5) +/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */ +# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8) +# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8) +# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8) +# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8) +# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8) +# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8) +# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8) +/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */ +# define R500_ALU_RGB_MOD_A_NOP (0 << 11) +# define R500_ALU_RGB_MOD_A_NEG (1 << 11) +# define R500_ALU_RGB_MOD_A_ABS (2 << 11) +# define R500_ALU_RGB_MOD_A_NAB (3 << 11) +# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13) +# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13) +# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13) +# define R500_ALU_RGB_SEL_B_SRCP (3 << 13) +# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15) +# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15) +# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15) +# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15) +# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15) +# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15) +# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15) +/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */ +# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18) +# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18) +# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18) +# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18) +# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18) +# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18) +# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18) +/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */ +# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21) +# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21) +# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21) +# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21) +# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21) +# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21) +# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21) +/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALU_RGB_MOD_B_NOP (0 << 24) +# define R500_ALU_RGB_MOD_B_NEG (1 << 24) +# define R500_ALU_RGB_MOD_B_ABS (2 << 24) +# define R500_ALU_RGB_MOD_B_NAB (3 << 24) +# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26) +# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26) +# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26) +# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26) +# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26) +# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26) +# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26) +# define R500_ALU_RGB_OMOD_DISABLE (7 << 26) +# define R500_ALU_RGB_TARGET(x) (x << 29) +# define R500_ALU_RGB_WMASK (1 << 31) +#define R500_US_ALU_RGB_ADDR_0 0x9000 +# define R500_RGB_ADDR0(x) (x << 0) +# define R500_RGB_ADDR0_CONST (1 << 8) +# define R500_RGB_ADDR0_REL (1 << 9) +# define R500_RGB_ADDR1(x) (x << 10) +# define R500_RGB_ADDR1_CONST (1 << 18) +# define R500_RGB_ADDR1_REL (1 << 19) +# define R500_RGB_ADDR2(x) (x << 20) +# define R500_RGB_ADDR2_CONST (1 << 28) +# define R500_RGB_ADDR2_REL (1 << 29) +# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) +# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30) +# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30) +# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30) +#define R500_US_CMN_INST_0 0xb800 +# define R500_INST_TYPE_ALU (0 << 0) +# define R500_INST_TYPE_OUT (1 << 0) +# define R500_INST_TYPE_FC (2 << 0) +# define R500_INST_TYPE_TEX (3 << 0) +# define R500_INST_TEX_SEM_WAIT (1 << 2) +# define R500_INST_RGB_PRED_SEL_NONE (0 << 3) +# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3) +# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3) +# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3) +# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3) +# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3) +# define R500_INST_RGB_PRED_INV (1 << 6) +# define R500_INST_WRITE_INACTIVE (1 << 7) +# define R500_INST_LAST (1 << 8) +# define R500_INST_NOP (1 << 9) +# define R500_INST_ALU_WAIT (1 << 10) +# define R500_INST_RGB_WMASK_R (1 << 11) +# define R500_INST_RGB_WMASK_G (1 << 12) +# define R500_INST_RGB_WMASK_B (1 << 13) +# define R500_INST_ALPHA_WMASK (1 << 14) +# define R500_INST_RGB_OMASK_R (1 << 15) +# define R500_INST_RGB_OMASK_G (1 << 16) +# define R500_INST_RGB_OMASK_B (1 << 17) +# define R500_INST_ALPHA_OMASK (1 << 18) +# define R500_INST_RGB_CLAMP (1 << 19) +# define R500_INST_ALPHA_CLAMP (1 << 20) +# define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALPHA_PRED_INV (1 << 22) +# define R500_INST_ALU_RESULT_OP_EQ (0 << 23) +# define R500_INST_ALU_RESULT_OP_LT (1 << 23) +# define R500_INST_ALU_RESULT_OP_GE (2 << 23) +# define R500_INST_ALU_RESULT_OP_NE (3 << 23) +# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25) +# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25) +# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25) +# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25) +# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25) +# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25) +/* XXX next four are kind of guessed */ +# define R500_INST_STAT_WE_R (1 << 28) +# define R500_INST_STAT_WE_G (1 << 29) +# define R500_INST_STAT_WE_B (1 << 30) +# define R500_INST_STAT_WE_A (1 << 31) + +/* note that these are 8 bit lengths, despite the offsets, at least for R500 */ +#define R500_US_CODE_ADDR 0x4630 +# define R500_US_CODE_START_ADDR(x) (x << 0) +# define R500_US_CODE_END_ADDR(x) (x << 16) +#define R500_US_CODE_OFFSET 0x4638 +# define R500_US_CODE_OFFSET_ADDR(x) (x << 0) +#define R500_US_CODE_RANGE 0x4634 +# define R500_US_CODE_RANGE_ADDR(x) (x << 0) +# define R500_US_CODE_RANGE_SIZE(x) (x << 16) +#define R500_US_CONFIG 0x4600 +# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) +#define R500_US_FC_ADDR_0 0xa000 +# define R500_FC_BOOL_ADDR(x) (x << 0) +# define R500_FC_INT_ADDR(x) (x << 8) +# define R500_FC_JUMP_ADDR(x) (x << 16) +# define R500_FC_JUMP_GLOBAL (1 << 31) +#define R500_US_FC_BOOL_CONST 0x4620 +# define R500_FC_KBOOL(x) (x) +#define R500_US_FC_CTRL 0x4624 +# define R500_FC_TEST_EN (1 << 30) +# define R500_FC_FULL_FC_EN (1 << 31) +#define R500_US_FC_INST_0 0x9800 +# define R500_FC_OP_JUMP (0 << 0) +# define R500_FC_OP_LOOP (1 << 0) +# define R500_FC_OP_ENDLOOP (2 << 0) +# define R500_FC_OP_REP (3 << 0) +# define R500_FC_OP_ENDREP (4 << 0) +# define R500_FC_OP_BREAKLOOP (5 << 0) +# define R500_FC_OP_BREAKREP (6 << 0) +# define R500_FC_OP_CONTINUE (7 << 0) +# define R500_FC_B_ELSE (1 << 4) +# define R500_FC_JUMP_ANY (1 << 5) +# define R500_FC_A_OP_NONE (0 << 6) +# define R500_FC_A_OP_POP (1 << 6) +# define R500_FC_A_OP_PUSH (2 << 6) +# define R500_FC_JUMP_FUNC(x) (x << 8) +# define R500_FC_B_POP_CNT(x) (x << 16) +# define R500_FC_B_OP0_NONE (0 << 24) +# define R500_FC_B_OP0_DECR (1 << 24) +# define R500_FC_B_OP0_INCR (2 << 24) +# define R500_FC_B_OP1_DECR (0 << 26) +# define R500_FC_B_OP1_NONE (1 << 26) +# define R500_FC_B_OP1_INCR (2 << 26) +# define R500_FC_IGNORE_UNCOVERED (1 << 28) +#define R500_US_FC_INT_CONST_0 0x4c00 +# define R500_FC_INT_CONST_KR(x) (x << 0) +# define R500_FC_INT_CONST_KG(x) (x << 8) +# define R500_FC_INT_CONST_KB(x) (x << 16) +/* _0 through _15 */ +#define R500_US_FORMAT0_0 0x4640 +# define R500_FORMAT_TXWIDTH(x) (x << 0) +# define R500_FORMAT_TXHEIGHT(x) (x << 11) +# define R500_FORMAT_TXDEPTH(x) (x << 22) +/* _0 through _3 */ +#define R500_US_OUT_FMT_0 0x46a4 +# define R500_OUT_FMT_C4_8 (0 << 0) +# define R500_OUT_FMT_C4_10 (1 << 0) +# define R500_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R500_OUT_FMT_C_16 (3 << 0) +# define R500_OUT_FMT_C2_16 (4 << 0) +# define R500_OUT_FMT_C4_16 (5 << 0) +# define R500_OUT_FMT_C_16_MPEG (6 << 0) +# define R500_OUT_FMT_C2_16_MPEG (7 << 0) +# define R500_OUT_FMT_C2_4 (8 << 0) +# define R500_OUT_FMT_C_3_3_2 (9 << 0) +# define R500_OUT_FMT_C_6_5_6 (10 << 0) +# define R500_OUT_FMT_C_11_11_10 (11 << 0) +# define R500_OUT_FMT_C_10_11_11 (12 << 0) +# define R500_OUT_FMT_C_2_10_10_10 (13 << 0) +/* #define R500_OUT_FMT_RESERVED (14 << 0) */ +# define R500_OUT_FMT_UNUSED (15 << 0) +# define R500_OUT_FMT_C_16_FP (16 << 0) +# define R500_OUT_FMT_C2_16_FP (17 << 0) +# define R500_OUT_FMT_C4_16_FP (18 << 0) +# define R500_OUT_FMT_C_32_FP (19 << 0) +# define R500_OUT_FMT_C2_32_FP (20 << 0) +# define R500_OUT_FMT_C4_32_FP (21 << 0) +# define R500_C0_SEL_A (0 << 8) +# define R500_C0_SEL_R (1 << 8) +# define R500_C0_SEL_G (2 << 8) +# define R500_C0_SEL_B (3 << 8) +# define R500_C1_SEL_A (0 << 10) +# define R500_C1_SEL_R (1 << 10) +# define R500_C1_SEL_G (2 << 10) +# define R500_C1_SEL_B (3 << 10) +# define R500_C2_SEL_A (0 << 12) +# define R500_C2_SEL_R (1 << 12) +# define R500_C2_SEL_G (2 << 12) +# define R500_C2_SEL_B (3 << 12) +# define R500_C3_SEL_A (0 << 14) +# define R500_C3_SEL_R (1 << 14) +# define R500_C3_SEL_G (2 << 14) +# define R500_C3_SEL_B (3 << 14) +# define R500_OUT_SIGN(x) (x << 16) +# define R500_ROUND_ADJ (1 << 20) +#define R500_US_PIXSIZE 0x4604 +# define R500_PIX_SIZE(x) (x) +#define R500_US_TEX_ADDR_0 0x9800 +# define R500_TEX_SRC_ADDR(x) (x << 0) +# define R500_TEX_SRC_ADDR_REL (1 << 7) +# define R500_TEX_SRC_S_SWIZ_R (0 << 8) +# define R500_TEX_SRC_S_SWIZ_G (1 << 8) +# define R500_TEX_SRC_S_SWIZ_B (2 << 8) +# define R500_TEX_SRC_S_SWIZ_A (3 << 8) +# define R500_TEX_SRC_T_SWIZ_R (0 << 10) +# define R500_TEX_SRC_T_SWIZ_G (1 << 10) +# define R500_TEX_SRC_T_SWIZ_B (2 << 10) +# define R500_TEX_SRC_T_SWIZ_A (3 << 10) +# define R500_TEX_SRC_R_SWIZ_R (0 << 12) +# define R500_TEX_SRC_R_SWIZ_G (1 << 12) +# define R500_TEX_SRC_R_SWIZ_B (2 << 12) +# define R500_TEX_SRC_R_SWIZ_A (3 << 12) +# define R500_TEX_SRC_Q_SWIZ_R (0 << 14) +# define R500_TEX_SRC_Q_SWIZ_G (1 << 14) +# define R500_TEX_SRC_Q_SWIZ_B (2 << 14) +# define R500_TEX_SRC_Q_SWIZ_A (3 << 14) +# define R500_TEX_DST_ADDR(x) (x << 16) +# define R500_TEX_DST_ADDR_REL (1 << 23) +# define R500_TEX_DST_R_SWIZ_R (0 << 24) +# define R500_TEX_DST_R_SWIZ_G (1 << 24) +# define R500_TEX_DST_R_SWIZ_B (2 << 24) +# define R500_TEX_DST_R_SWIZ_A (3 << 24) +# define R500_TEX_DST_G_SWIZ_R (0 << 26) +# define R500_TEX_DST_G_SWIZ_G (1 << 26) +# define R500_TEX_DST_G_SWIZ_B (2 << 26) +# define R500_TEX_DST_G_SWIZ_A (3 << 26) +# define R500_TEX_DST_B_SWIZ_R (0 << 28) +# define R500_TEX_DST_B_SWIZ_G (1 << 28) +# define R500_TEX_DST_B_SWIZ_B (2 << 28) +# define R500_TEX_DST_B_SWIZ_A (3 << 28) +# define R500_TEX_DST_A_SWIZ_R (0 << 30) +# define R500_TEX_DST_A_SWIZ_G (1 << 30) +# define R500_TEX_DST_A_SWIZ_B (2 << 30) +# define R500_TEX_DST_A_SWIZ_A (3 << 30) +#define R500_US_TEX_ADDR_DXDY_0 0xa000 +# define R500_DX_ADDR(x) (x << 0) +# define R500_DX_ADDR_REL (1 << 7) +# define R500_DX_S_SWIZ_R (0 << 8) +# define R500_DX_S_SWIZ_G (1 << 8) +# define R500_DX_S_SWIZ_B (2 << 8) +# define R500_DX_S_SWIZ_A (3 << 8) +# define R500_DX_T_SWIZ_R (0 << 10) +# define R500_DX_T_SWIZ_G (1 << 10) +# define R500_DX_T_SWIZ_B (2 << 10) +# define R500_DX_T_SWIZ_A (3 << 10) +# define R500_DX_R_SWIZ_R (0 << 12) +# define R500_DX_R_SWIZ_G (1 << 12) +# define R500_DX_R_SWIZ_B (2 << 12) +# define R500_DX_R_SWIZ_A (3 << 12) +# define R500_DX_Q_SWIZ_R (0 << 14) +# define R500_DX_Q_SWIZ_G (1 << 14) +# define R500_DX_Q_SWIZ_B (2 << 14) +# define R500_DX_Q_SWIZ_A (3 << 14) +# define R500_DY_ADDR(x) (x << 16) +# define R500_DY_ADDR_REL (1 << 17) +# define R500_DY_S_SWIZ_R (0 << 24) +# define R500_DY_S_SWIZ_G (1 << 24) +# define R500_DY_S_SWIZ_B (2 << 24) +# define R500_DY_S_SWIZ_A (3 << 24) +# define R500_DY_T_SWIZ_R (0 << 26) +# define R500_DY_T_SWIZ_G (1 << 26) +# define R500_DY_T_SWIZ_B (2 << 26) +# define R500_DY_T_SWIZ_A (3 << 26) +# define R500_DY_R_SWIZ_R (0 << 28) +# define R500_DY_R_SWIZ_G (1 << 28) +# define R500_DY_R_SWIZ_B (2 << 28) +# define R500_DY_R_SWIZ_A (3 << 28) +# define R500_DY_Q_SWIZ_R (0 << 30) +# define R500_DY_Q_SWIZ_G (1 << 30) +# define R500_DY_Q_SWIZ_B (2 << 30) +# define R500_DY_Q_SWIZ_A (3 << 30) +#define R500_US_TEX_INST_0 0x9000 +# define R500_TEX_ID(x) (x << 16) +# define R500_TEX_INST_NOP (0 << 22) +# define R500_TEX_INST_LD (1 << 22) +# define R500_TEX_INST_TEXKILL (2 << 22) +# define R500_TEX_INST_PROJ (3 << 22) +# define R500_TEX_INST_LODBIAS (4 << 22) +# define R500_TEX_INST_LOD (5 << 22) +# define R500_TEX_INST_DXDY (6 << 22) +# define R500_TEX_SEM_ACQUIRE (1 << 25) +# define R500_TEX_IGNORE_UNCOVERED (1 << 26) +# define R500_TEX_UNSCALED (1 << 27) +#define R300_US_W_FMT 0x46b4 +# define R300_W_FMT_W0 (0 << 0) +# define R300_W_FMT_W24 (1 << 0) +# define R300_W_FMT_W24FP (2 << 0) +# define R300_W_SRC_US (0 << 2) +# define R300_W_SRC_RAS (1 << 2) + + /* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. * Two parameter dwords: * 0. VAP_VTX_FMT: The first parameter is not written to hardware diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index eee1e803a0..fc07105c56 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -334,13 +334,26 @@ static GLboolean r300RunRender(GLcontext * ctx, static int r300Fallback(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_fragment_program *fp = (struct r300_fragment_program *) + /* Do we need to use new-style shaders? + * Also is there a better way to do this? */ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; - - if (fp) { - if (!fp->translated) - r300TranslateFragmentShader(r300, fp); - FALLBACK_IF(!fp->translated); + if (fp) { + if (!fp->translated) { + r500TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } + } else { + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + if (fp) { + if (!fp->translated) { + r300TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } } FALLBACK_IF(ctx->RenderMode != GL_RENDER); diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 77abf86a8e..5c8fd8a5e5 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -9,8 +9,10 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, GLuint id) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp; - struct r300_fragment_program *fp; + struct r300_fragment_program *r300_fp; + struct r500_fragment_program *r500_fp; switch (target) { case GL_VERTEX_STATE_PROGRAM_NV: @@ -19,14 +21,28 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); case GL_FRAGMENT_PROGRAM_ARB: - fp = CALLOC_STRUCT(r300_fragment_program); - fp->ctx = ctx; - return _mesa_init_fragment_program(ctx, &fp->mesa_program, - target, id); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r500_fp = CALLOC_STRUCT(r500_fragment_program); + r500_fp->ctx = ctx; + return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, + target, id); + } else { + r300_fp = CALLOC_STRUCT(r300_fragment_program); + r300_fp->ctx = ctx; + return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, + target, id); + } + case GL_FRAGMENT_PROGRAM_NV: - fp = CALLOC_STRUCT(r300_fragment_program); - return _mesa_init_fragment_program(ctx, &fp->mesa_program, - target, id); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r500_fp = CALLOC_STRUCT(r500_fragment_program); + return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, + target, id); + } else { + r300_fp = CALLOC_STRUCT(r300_fragment_program); + return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, + target, id); + } default: _mesa_problem(ctx, "Bad target in r300NewProgram"); } @@ -42,15 +58,20 @@ static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) static void r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp = (void *)prog; - struct r300_fragment_program *fp = (struct r300_fragment_program *)prog; + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; + struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog; switch (target) { case GL_VERTEX_PROGRAM_ARB: vp->progs = NULL; break; case GL_FRAGMENT_PROGRAM_ARB: - fp->translated = GL_FALSE; + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500_fp->translated = GL_FALSE; + else + r300_fp->translated = GL_FALSE; break; } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e11b5afc30..550f710854 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -60,7 +60,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" #include "r300_reg.h" #include "r300_emit.h" -#include "r300_fragprog.h" #include "r300_tex.h" #include "drirenderbuffer.h" @@ -189,7 +188,7 @@ static void r300SetBlendCntl(r300ContextPtr r300, int func, int eqn, */ #if 0 if (new_ablend == new_cblend) { - new_cblend |= R300_BLEND_NO_SEPARATE; + new_cblend |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; } #endif new_cblend |= cbits; @@ -295,7 +294,9 @@ static void r300SetBlendState(GLcontext * ctx) r300SetBlendCntl(r300, func, eqn, - R300_BLEND_UNKNOWN | R300_BLEND_ENABLE, funcA, eqnA); + (R300_SEPARATE_ALPHA_ENABLE | + R300_READ_ENABLE | + R300_ALPHA_BLEND_ENABLE), funcA, eqnA); } static void r300BlendEquationSeparate(GLcontext * ctx, @@ -401,42 +402,40 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state) } } -static void r300SetEarlyZState(GLcontext * ctx) +static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) { - /* updates register R300_RB3D_EARLY_Z (0x4F14) - if depth test is not enabled it should be R300_EARLY_Z_DISABLE - if depth is enabled and alpha not it should be R300_EARLY_Z_ENABLE - if depth and alpha is enabled it should be R300_EARLY_Z_DISABLE - */ r300ContextPtr r300 = R300_CONTEXT(ctx); - R300_STATECHANGE(r300, zstencil_format); - switch (ctx->Visual.depthBits) { - case 16: - r300->hw.zstencil_format.cmd[1] = ZB_FORMAR_DEPTHFORMAT_16BIT_INT_Z; - break; - case 24: - r300->hw.zstencil_format.cmd[1] = ZB_FORMAR_DEPTHFORMAT_24BIT_INT_Z; - break; - default: - fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits); - _mesa_exit(-1); + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + return (fp && fp->WritesDepth); + } else { + struct r500_fragment_program* fp = + (struct r500_fragment_program*)(char*) + ctx->FragmentProgram._Current; + return (fp && fp->writes_depth); } +} + +static void r300SetEarlyZState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint topZ = R300_ZTOP_ENABLE; if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) - /* disable early Z */ - r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; - else { - if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) - /* enable early Z */ - r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_ENABLE; - else - /* disable early Z */ - r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; + topZ = R300_ZTOP_DISABLE; + if (current_fragment_program_writes_depth(ctx)) + topZ = R300_ZTOP_DISABLE; + + if (topZ != r300->hw.zstencil_format.cmd[2]) { + /* Note: This completely reemits the stencil format. + * I have not tested whether this is strictly necessary, + * or if emitting a write to ZB_ZTOP is enough. + */ + R300_STATECHANGE(r300, zstencil_format); + r300->hw.zstencil_format.cmd[2] = topZ; } - - r300->hw.zstencil_format.cmd[3] = 0x00000003; - r300->hw.zstencil_format.cmd[4] = 0x00000000; } static void r300SetAlphaState(GLcontext * ctx) @@ -450,25 +449,25 @@ static void r300SetAlphaState(GLcontext * ctx) switch (ctx->Color.AlphaFunc) { case GL_NEVER: - pp_misc |= FG_ALPHA_FUNC_NEVER; + pp_misc |= R300_FG_ALPHA_FUNC_NEVER; break; case GL_LESS: - pp_misc |= FG_ALPHA_FUNC_LESS; + pp_misc |= R300_FG_ALPHA_FUNC_LESS; break; case GL_EQUAL: - pp_misc |= FG_ALPHA_FUNC_EQUAL; + pp_misc |= R300_FG_ALPHA_FUNC_EQUAL; break; case GL_LEQUAL: - pp_misc |= FG_ALPHA_FUNC_LE; + pp_misc |= R300_FG_ALPHA_FUNC_LE; break; case GL_GREATER: - pp_misc |= FG_ALPHA_FUNC_GREATER; + pp_misc |= R300_FG_ALPHA_FUNC_GREATER; break; case GL_NOTEQUAL: - pp_misc |= FG_ALPHA_FUNC_NOTEQUAL; + pp_misc |= R300_FG_ALPHA_FUNC_NOTEQUAL; break; case GL_GEQUAL: - pp_misc |= FG_ALPHA_FUNC_GE; + pp_misc |= R300_FG_ALPHA_FUNC_GE; break; case GL_ALWAYS: /*pp_misc |= FG_ALPHA_FUNC_ALWAYS; */ @@ -477,8 +476,9 @@ static void r300SetAlphaState(GLcontext * ctx) } if (really_enabled) { - pp_misc |= FG_ALPHA_FUNC_ENABLE; - pp_misc |= (refByte & R300_REF_ALPHA_MASK); + pp_misc |= R300_FG_ALPHA_FUNC_ENABLE; + pp_misc |= R500_FG_ALPHA_FUNC_8BIT; + pp_misc |= (refByte & R300_FG_ALPHA_FUNC_VAL_MASK); } else { pp_misc = 0x0; } @@ -525,24 +525,24 @@ static void r300SetDepthState(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, zs); - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE; + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE; // XXX r300->hw.zs.cmd[R300_ZS_CNTL_1] &= - ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); + ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) { if (ctx->Depth.Mask) r300->hw.zs.cmd[R300_ZS_CNTL_0] |= - R300_RB3D_Z_TEST_AND_WRITE; + R300_Z_ENABLE | R300_Z_WRITE_ENABLE | R300_STENCIL_FRONT_BACK; // XXX else - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST; + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE | R300_STENCIL_FRONT_BACK; // XXX r300->hw.zs.cmd[R300_ZS_CNTL_1] |= translate_func(ctx->Depth. - Func) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; + Func) << R300_Z_FUNC_SHIFT; } else { - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_DISABLED_1; + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; // XXX r300->hw.zs.cmd[R300_ZS_CNTL_1] |= - translate_func(GL_NEVER) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; + translate_func(GL_NEVER) << R300_Z_FUNC_SHIFT; } r300SetEarlyZState(ctx); @@ -556,10 +556,10 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state) R300_STATECHANGE(r300, zs); if (state) { r300->hw.zs.cmd[R300_ZS_CNTL_0] |= - R300_RB3D_STENCIL_ENABLE; + R300_STENCIL_ENABLE; } else { r300->hw.zs.cmd[R300_ZS_CNTL_0] &= - ~R300_RB3D_STENCIL_ENABLE; + ~R300_STENCIL_ENABLE; } } else { #if R200_MERGED @@ -571,7 +571,7 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state) static void r300UpdatePolygonMode(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - uint32_t hw_mode = GA_POLY_MODE_DISABLE; + uint32_t hw_mode = R300_GA_POLY_MODE_DISABLE; /* Only do something if a polygon mode is wanted, default is GL_FILL */ if (ctx->Polygon.FrontMode != GL_FILL || @@ -590,29 +590,29 @@ static void r300UpdatePolygonMode(GLcontext * ctx) } /* Enable polygon mode */ - hw_mode |= GA_POLY_MODE_DUAL; + hw_mode |= R300_GA_POLY_MODE_DUAL; switch (f) { case GL_LINE: - hw_mode |= GA_POLY_MODE_FRONT_PTYPE_LINE; + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_LINE; break; case GL_POINT: - hw_mode |= GA_POLY_MODE_FRONT_PTYPE_POINT; + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_POINT; break; case GL_FILL: - hw_mode |= GA_POLY_MODE_FRONT_PTYPE_TRI; + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_TRI; break; } switch (b) { case GL_LINE: - hw_mode |= GA_POLY_MODE_BACK_PTYPE_LINE; + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_LINE; break; case GL_POINT: - hw_mode |= GA_POLY_MODE_BACK_PTYPE_POINT; + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_POINT; break; case GL_FILL: - hw_mode |= GA_POLY_MODE_BACK_PTYPE_TRI; + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_TRI; break; } } @@ -716,8 +716,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) R300_STATECHANGE(r300, fogs); r300->hw.fogs.cmd[R300_FOGS_STATE] = (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~FG_FOG_BLEND_FN_MASK) | - FG_FOG_BLEND_FN_LINEAR; + cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | + R300_FG_FOG_BLEND_FN_LINEAR; if (ctx->Fog.Start == ctx->Fog.End) { fogScale.f = -1.0; @@ -734,8 +734,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) R300_STATECHANGE(r300, fogs); r300->hw.fogs.cmd[R300_FOGS_STATE] = (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~FG_FOG_BLEND_FN_MASK) | - FG_FOG_BLEND_FN_EXP; + cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | + R300_FG_FOG_BLEND_FN_EXP; fogScale.f = 0.0933 * ctx->Fog.Density; fogStart.f = 0.0; break; @@ -743,8 +743,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) R300_STATECHANGE(r300, fogs); r300->hw.fogs.cmd[R300_FOGS_STATE] = (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~FG_FOG_BLEND_FN_MASK) | - FG_FOG_BLEND_FN_EXP2; + cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | + R300_FG_FOG_BLEND_FN_EXP2; fogScale.f = 0.3 * ctx->Fog.Density; fogStart.f = 0.0; default: @@ -808,7 +808,7 @@ static void r300SetFogState(GLcontext * ctx, GLboolean state) R300_STATECHANGE(r300, fogs); if (state) { - r300->hw.fogs.cmd[R300_FOGS_STATE] |= FG_FOG_BLEND_ENABLE; + r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FG_FOG_BLEND_ENABLE; r300Fogfv(ctx, GL_FOG_MODE, NULL); r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); @@ -816,7 +816,7 @@ static void r300SetFogState(GLcontext * ctx, GLboolean state) r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); } else { - r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~FG_FOG_BLEND_ENABLE; + r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FG_FOG_BLEND_ENABLE; } } @@ -914,36 +914,36 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint refmask = (((ctx->Stencil. - Ref[0] & 0xff) << ZB_STENCILREFMASK_STENCILREF_SHIFT) | ((ctx-> - Stencil. - ValueMask - [0] & - 0xff) - << - ZB_STENCILREFMASK_STENCILMASK_SHIFT)); + Ref[0] & 0xff) << R300_STENCILREF_SHIFT) | ((ctx-> + Stencil. + ValueMask + [0] & + 0xff) + << + R300_STENCILMASK_SHIFT)); GLuint flag; R300_STATECHANGE(rmesa, zs); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK << - R300_RB3D_ZS1_FRONT_FUNC_SHIFT) + R300_S_FRONT_FUNC_SHIFT) | (R300_ZS_MASK << - R300_RB3D_ZS1_BACK_FUNC_SHIFT)); + R300_S_BACK_FUNC_SHIFT)); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= - ~((ZB_STENCILREFMASK_STENCIL_MASK << ZB_STENCILREFMASK_STENCILREF_SHIFT) | - (ZB_STENCILREFMASK_STENCIL_MASK << ZB_STENCILREFMASK_STENCILMASK_SHIFT)); + ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) | + (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT)); flag = translate_func(ctx->Stencil.Function[0]); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= - (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT); + (flag << R300_S_FRONT_FUNC_SHIFT); if (ctx->Stencil._TestTwoSide) flag = translate_func(ctx->Stencil.Function[1]); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= - (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT); + (flag << R300_S_BACK_FUNC_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask; } @@ -953,12 +953,12 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) R300_STATECHANGE(rmesa, zs); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= - ~(ZB_STENCILREFMASK_STENCIL_MASK << - ZB_STENCILREFMASK_STENCILWRITEMASK_SHIFT); + ~(R300_STENCILREF_MASK << + R300_STENCILWRITEMASK_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= (ctx->Stencil. - WriteMask[0] & ZB_STENCILREFMASK_STENCIL_MASK) << - ZB_STENCILREFMASK_STENCILWRITEMASK_SHIFT; + WriteMask[0] & R300_STENCILREF_MASK) << + R300_STENCILWRITEMASK_SHIFT; } static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, @@ -969,34 +969,34 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, R300_STATECHANGE(rmesa, zs); /* It is easier to mask what's left.. */ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= - (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT) | - (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | - (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT); + (R300_ZS_MASK << R300_Z_FUNC_SHIFT) | + (R300_ZS_MASK << R300_S_FRONT_FUNC_SHIFT) | + (R300_ZS_MASK << R300_S_BACK_FUNC_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (translate_stencil_op(ctx->Stencil.FailFunc[0]) << - R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) + R300_S_FRONT_SFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << - R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT) + R300_S_FRONT_ZFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << - R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT); + R300_S_FRONT_ZPASS_OP_SHIFT); if (ctx->Stencil._TestTwoSide) { rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (translate_stencil_op(ctx->Stencil.FailFunc[1]) << - R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) + R300_S_BACK_SFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZFailFunc[1]) << - R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT) + R300_S_BACK_ZFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZPassFunc[1]) << - R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT); + R300_S_BACK_ZPASS_OP_SHIFT); } else { rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (translate_stencil_op(ctx->Stencil.FailFunc[0]) << - R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) + R300_S_BACK_SFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << - R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT) + R300_S_BACK_ZFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << - R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT); + R300_S_BACK_ZPASS_OP_SHIFT); } } @@ -1005,10 +1005,10 @@ static void r300ClearStencil(GLcontext * ctx, GLint s) r300ContextPtr rmesa = R300_CONTEXT(ctx); rmesa->state.stencil.clear = - ((GLuint) (ctx->Stencil.Clear & ZB_STENCILREFMASK_STENCIL_MASK) | - (ZB_STENCILREFMASK_STENCIL_MASK << ZB_STENCILREFMASK_STENCILMASK_SHIFT) | - ((ctx->Stencil.WriteMask[0] & ZB_STENCILREFMASK_STENCIL_MASK) << - ZB_STENCILREFMASK_STENCILMASK_SHIFT)); + ((GLuint) (ctx->Stencil.Clear & R300_STENCILREF_MASK) | + (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT) | + ((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << + R300_STENCILMASK_SHIFT)); } /* ============================================================= @@ -1322,6 +1322,82 @@ static unsigned long gen_fixed_filter(unsigned long f) return f; } +static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int i; + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + + R300_STATECHANGE(r300, fpt); + + for (i = 0; i < fp->tex.length; i++) { + int unit; + int opcode; + unsigned long val; + + unit = fp->tex.inst[i] >> R300_TEX_ID_SHIFT; + unit &= 15; + + val = fp->tex.inst[i]; + val &= ~R300_TEX_ID_MASK; + + opcode = + (val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT; + if (opcode == R300_TEX_OP_KIL) { + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + if (tmu_mappings[unit] >= 0) { + val |= + tmu_mappings[unit] << + R300_TEX_ID_SHIFT; + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + // We get here when the corresponding texture image is incomplete + // (e.g. incomplete mipmaps etc.) + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } + } + } + + r300->hw.fpt.cmd[R300_FPT_CMD_0] = + cmdpacket0(R300_US_TEX_INST_0, fp->tex.length); +} + +static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +{ + int i; + struct r500_fragment_program *fp = (struct r500_fragment_program *) + (char *)ctx->FragmentProgram._Current; + + /* find all the texture instructions and relocate the texture units */ + for (i = 0; i < fp->inst_end + 1; i++) { + if ((fp->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) { + uint32_t val; + int unit, opcode, new_unit; + + val = fp->inst[i].inst1; + + unit = (val >> 16) & 0xf; + + val &= ~(0xf << 16); + + opcode = val & (0x7 << 22); + if (opcode == R500_TEX_INST_TEXKILL) { + new_unit = 0; + } else { + if (tmu_mappings[unit] >= 0) { + new_unit = tmu_mappings[unit]; + } else { + new_unit = 0; + } + } + val |= R500_TEX_ID(new_unit); + fp->inst[i].inst1 = val; + } + } +} + static void r300SetupTextures(GLcontext * ctx) { int i, mtu; @@ -1436,39 +1512,18 @@ static void r300SetupTextures(GLcontext * ctx) if (!fp) /* should only happenen once, just after context is created */ return; - R300_STATECHANGE(r300, fpt); - - for (i = 0; i < fp->tex.length; i++) { - int unit; - int opcode; - unsigned long val; - - unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT; - unit &= 15; - - val = fp->tex.inst[i]; - val &= ~R300_FPITX_IMAGE_MASK; - - opcode = - (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT; - if (opcode == R300_FPITX_OP_KIL) { - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } else { - if (tmu_mappings[unit] >= 0) { - val |= - tmu_mappings[unit] << - R300_FPITX_IMAGE_SHIFT; - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } else { - // We get here when the corresponding texture image is incomplete - // (e.g. incomplete mipmaps etc.) - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + if (fp->mesa_program.UsesKill && last_hw_tmu < 0) { + // The KILL operation requires the first texture unit + // to be enabled. + r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_FILTER0_0, 1); } - } - - r300->hw.fpt.cmd[R300_FPT_CMD_0] = - cmdpacket0(R300_PFS_TEXI_0, fp->tex.length); + r300SetupFragmentShaderTextures(ctx, tmu_mappings); + } else + r500SetupFragmentShaderTextures(ctx, tmu_mappings); if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", @@ -1488,21 +1543,17 @@ static void r300SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); /* I'm still unsure if these are needed */ - GLuint interp_magic[8] = { - 0x00, - R300_RS_COL_PTR(1), - R300_RS_COL_PTR(2), - R300_RS_COL_PTR(3), - 0x00, - 0x00, - 0x00, - 0x00 - }; + GLuint interp_col[8]; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; union r300_outputs_written OutputsWritten; GLuint InputsRead; int fp_reg, high_rr; - int in_texcoords, col_interp_nr; - int i; + int col_interp_nr; + int rs_tex_count = 0, rs_col_count = 0; + int i, count; + + memset(interp_col, 0, sizeof(interp_col)); if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; @@ -1520,7 +1571,7 @@ static void r300SetupRSUnit(GLcontext * ctx) R300_STATECHANGE(r300, rc); R300_STATECHANGE(r300, rr); - fp_reg = in_texcoords = col_interp_nr = high_rr = 0; + fp_reg = col_interp_nr = high_rr = 0; r300->hw.rr.cmd[R300_RR_INST_1] = 0; @@ -1538,12 +1589,50 @@ static void r300SetupRSUnit(GLcontext * ctx) InputsRead &= ~FRAG_BIT_WPOS; } + if (InputsRead & FRAG_BIT_COL0) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + interp_col[0] |= R300_RS_COL_PTR(rs_col_count); + if (count == 3) + interp_col[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + rs_col_count += count; + } + else + interp_col[0] = R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + + if (InputsRead & FRAG_BIT_COL1) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + if (count == 3) + interp_col[1] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB0); + interp_col[1] |= R300_RS_COL_PTR(1); + rs_col_count += count; + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - | interp_magic[i]; + int swiz; + + /* with TCL we always seem to route 4 components */ + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count; + switch(count) { + case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; + case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; + default: + case 1: + case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; + }; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] |= swiz; r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { + + rs_tex_count += count; + //assert(r300->state.texture.tc_count != 0); r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R300_RS_INST_TEX_CN_WRITE | i /* source INTERP */ | (fp_reg << R300_RS_INST_TEX_ADDR_SHIFT); @@ -1557,10 +1646,6 @@ static void r300SetupRSUnit(GLcontext * ctx) WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); } } - /* Need to count all coords enabled at vof */ - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { - in_texcoords++; - } } if (InputsRead & FRAG_BIT_COL0) { @@ -1586,23 +1671,186 @@ static void r300SetupRSUnit(GLcontext * ctx) } /* Need at least one. This might still lock as the values are undefined... */ - if (in_texcoords == 0 && col_interp_nr == 0) { + if (rs_tex_count == 0 && col_interp_nr == 0) { r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT); col_interp_nr++; } - r300->hw.rc.cmd[1] = 0 | ((in_texcoords << 2) << R300_IT_COUNT_SHIFT) + r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_interp_nr << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; assert(high_rr >= 0); r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1); + r300->hw.rc.cmd[2] = high_rr; + + if (InputsRead) + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); +} + +static void r500SetupRSUnit(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + /* I'm still unsure if these are needed */ + GLuint interp_col[8]; + union r300_outputs_written OutputsWritten; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint InputsRead; + int fp_reg, high_rr; + int rs_col_count = 0; + int in_texcoords, col_interp_nr; + int i, count; + + memset(interp_col, 0, sizeof(interp_col)); + if (hw_tcl_on) + OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + else + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); + + if (ctx->FragmentProgram._Current) + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + else { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; /* This should only ever happen once.. */ + } + + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + fp_reg = col_interp_nr = high_rr = in_texcoords = 0; + + r300->hw.rr.cmd[R300_RR_INST_1] = 0; + + if (InputsRead & FRAG_BIT_WPOS) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (!(InputsRead & (FRAG_BIT_TEX0 << i))) + break; + + if (i == ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tno free texcoord found...\n"); + _mesa_exit(-1); + } + + InputsRead |= (FRAG_BIT_TEX0 << i); + InputsRead &= ~FRAG_BIT_WPOS; + } + + if (InputsRead & FRAG_BIT_COL0) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + interp_col[0] |= R500_RS_COL_PTR(rs_col_count); + if (count == 3) + interp_col[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + rs_col_count += count; + } + else + interp_col[0] = R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + + if (InputsRead & FRAG_BIT_COL1) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + interp_col[1] |= R500_RS_COL_PTR(1); + if (count == 3) + interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB0); + rs_col_count += count; + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + GLuint swiz = 0; + + /* with TCL we always seem to route 4 components */ + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + /* always have on texcoord */ + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT; + if (count >= 2) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; + else + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; + + if (count >= 3) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT; + else + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + + if (count == 4) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT; + else + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + + } else + swiz = (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | swiz; + + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + //assert(r300->state.texture.tc_count != 0); + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ + | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT); + high_rr = fp_reg; + + /* Passing invalid data here can lock the GPU. */ + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + InputsRead &= ~(FRAG_BIT_TEX0 << i); + fp_reg++; + } else { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + } + } + } + + if (InputsRead & FRAG_BIT_COL0) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); + InputsRead &= ~FRAG_BIT_COL0; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } + } + + if (InputsRead & FRAG_BIT_COL1) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); + InputsRead &= ~FRAG_BIT_COL1; + if (high_rr < 1) + high_rr = 1; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } + } + + /* Need at least one. This might still lock as the values are undefined... */ + if (in_texcoords == 0 && col_interp_nr == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); + col_interp_nr++; + } + + r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_IT_COUNT_SHIFT) + | (col_interp_nr << R300_IC_COUNT_SHIFT) + | R300_HIRES_EN; + + assert(high_rr >= 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1); r300->hw.rc.cmd[2] = 0xC0 | high_rr; if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); } + + + #define bump_vpu_count(ptr, new_count) do{\ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ int _nc=(new_count)/4; \ @@ -1648,10 +1896,67 @@ static inline void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, } } +#define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c)) + + +static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, + GLuint output_count, GLuint temp_count) +{ + int vtx_mem_size; + int pvs_num_slots; + int pvs_num_cntrls; + + /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS. + * See r500 docs 6.5.2 - done in emit */ + + /* avoid division by zero */ + if (input_count == 0) input_count = 1; + if (output_count == 0) output_count = 1; + if (temp_count == 0) temp_count = 1; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vtx_mem_size = 128; + else + vtx_mem_size = 72; + + pvs_num_slots = MIN3(10, vtx_mem_size/input_count, vtx_mem_size/output_count); + pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count); + + R300_STATECHANGE(rmesa, vap_cntl); + if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = + (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) | + (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= R500_TCL_STATE_OPTIMIZATION; + } else + /* not sure about non-tcl */ + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT); + +} + static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) { struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader); GLuint o_reg = 0; + GLuint i_reg = 0; int i; int inst_count = 0; int param_count = 0; @@ -1664,26 +1969,37 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); program_end += 4; + i_reg++; } } prog->program.length = program_end; - r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, + r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); inst_count = (prog->program.length / 4) - 1; + r300VapCntl(rmesa, i_reg, o_reg, 0); + R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | - (inst_count << R300_PVS_CNTL_1_POS_END_SHIFT) | - (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); + (0 << R300_PVS_FIRST_INST_SHIFT) | + (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) | - (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); + (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) | - (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT); + (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); +} + +static int bit_count (int x) +{ + x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U); + x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U); + x = (x >> 16) + (x & 0xffff); + x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f); + return (x >> 8) + (x & 0x00ff); } static void r300SetupRealVertexProgram(r300ContextPtr rmesa) @@ -1704,20 +2020,22 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa) bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, &(prog->program)); + r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); inst_count = (prog->program.length / 4) - 1; + r300VapCntl(rmesa, bit_count(prog->key.InputsRead), + bit_count(prog->key.OutputsWritten), prog->num_temporaries); + R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | - (inst_count << R300_PVS_CNTL_1_POS_END_SHIFT) | - (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); + (0 << R300_PVS_FIRST_INST_SHIFT) | + (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) | - (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); + (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) | - (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT); + (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); } static void r300SetupVertexProgram(r300ContextPtr rmesa) @@ -1740,13 +2058,6 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa) r300SetupDefaultVertexProgram(rmesa); } - - /* FIXME: This is done for vertex shader fragments, but also needs to be - * done for vap_pvs, so I leave it as a reminder. */ -#if 0 - reg_start(R300_VAP_PVS_WAITIDLE, 0); - e32(0x00000000); -#endif } /** @@ -1848,11 +2159,6 @@ static void r300ResetHwState(r300ContextPtr r300) r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef); r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); - if (!has_tcl) - r300->hw.vap_cntl.cmd[1] = 0x0014045a; - else - r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */ - r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA | R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA @@ -1878,7 +2184,7 @@ static void r300ResetHwState(r300ContextPtr r300) /* XXX: Other families? */ if (has_tcl) { - r300->hw.vap_clip_cntl.cmd[1] = R300_221C_NORMAL; + r300->hw.vap_clip_cntl.cmd[1] = R300_PS_UCP_MODE_DIST_COP; r300->hw.vap_clip.cmd[1] = r300PackFloat32(1.0); /* X */ r300->hw.vap_clip.cmd[2] = r300PackFloat32(1.0); /* X */ @@ -1902,23 +2208,25 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666; r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666; - /* XXX: Other families? */ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = - R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16; - switch (r300->radeon.radeonScreen->chip_family) { - case CHIP_FAMILY_R300: - case CHIP_FAMILY_R350: - case CHIP_FAMILY_RV410: + R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16 /*| R300_GB_SUBPIXEL_1_16*/; + switch (r300->radeon.radeonScreen->num_gb_pipes) { + case 1: + default: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_RV300; + break; + case 2: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= R300_GB_TILE_PIPE_COUNT_R300; break; - case CHIP_FAMILY_R420: + case 3: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= - R300_GB_TILE_PIPE_COUNT_R420; + R300_GB_TILE_PIPE_COUNT_R420_3P; break; - default: + case 4: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= - R300_GB_TILE_DISABLE; /* TODO: This disables tiling totally. I guess it happened accidentially. */ + R300_GB_TILE_PIPE_COUNT_R420; break; } @@ -1967,11 +2275,15 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.sc_screendoor.cmd[1] = 0x00FFFFFF; - r300->hw.us_out_fmt.cmd[1] = 0x00001B01; - r300->hw.us_out_fmt.cmd[2] = 0x00001B0F; - r300->hw.us_out_fmt.cmd[3] = 0x00001B0F; - r300->hw.us_out_fmt.cmd[4] = 0x00001B0F; - r300->hw.us_out_fmt.cmd[5] = 0x00000001; + r300->hw.us_out_fmt.cmd[1] = R500_OUT_FMT_C4_8 | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[2] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[3] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W24; r300Enable(ctx, GL_FOG, ctx->Fog.Enabled); r300Fogfv(ctx, GL_FOG_MODE, NULL); @@ -2023,15 +2335,32 @@ static void r300ResetHwState(r300ContextPtr r300) if (r300->radeon.sarea->tiling_enabled) { /* XXX: Turn off when clearing buffers ? */ - r300->hw.zb.cmd[R300_ZB_PITCH] |= ZB_DEPTHPITCH_DEPTHMACROTILE_ENABLE; + r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE; if (ctx->Visual.depthBits == 24) r300->hw.zb.cmd[R300_ZB_PITCH] |= - ZB_DEPTHPITCH_DEPTHMICROTILE_TILED; + R300_DEPTHMICROTILE_TILED; } r300->hw.zb_depthclearvalue.cmd[1] = 0; + switch (ctx->Visual.depthBits) { + case 16: + r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_16BIT_INT_Z; + break; + case 24: + r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; + break; + default: + fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits); + _mesa_exit(-1); + } + + r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE; + r300->hw.zstencil_format.cmd[3] = 0x00000003; + r300->hw.zstencil_format.cmd[4] = 0x00000000; + r300SetEarlyZState(ctx); + r300->hw.unk4F30.cmd[1] = 0; r300->hw.unk4F30.cmd[2] = 0; @@ -2039,6 +2368,7 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.zb_hiz_pitch.cmd[1] = 0; + r300VapCntl(r300, 0, 0, 0); if (has_tcl) { r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0; r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0; @@ -2084,10 +2414,11 @@ void r300UpdateShaders(r300ContextPtr rmesa) hw_tcl_on = future_hw_tcl_on = 0; r300ResetHwState(rmesa); + r300UpdateStateParameters(ctx, _NEW_PROGRAM); return; } - r300UpdateStateParameters(ctx, _NEW_PROGRAM); } + r300UpdateStateParameters(ctx, _NEW_PROGRAM); } static void r300SetupPixelShader(r300ContextPtr rmesa) @@ -2107,26 +2438,28 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) return; } + r300SetupTextures(ctx); + R300_STATECHANGE(rmesa, fpi[0]); - rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, fp->alu_end + 1); + rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, fp->alu_end + 1); for (i = 0; i <= fp->alu_end; i++) { rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst0; } R300_STATECHANGE(rmesa, fpi[1]); - rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, fp->alu_end + 1); + rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, fp->alu_end + 1); for (i = 0; i <= fp->alu_end; i++) { rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst1; } R300_STATECHANGE(rmesa, fpi[2]); - rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, fp->alu_end + 1); + rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, fp->alu_end + 1); for (i = 0; i <= fp->alu_end; i++) { rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst2; } R300_STATECHANGE(rmesa, fpi[3]); - rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, fp->alu_end + 1); + rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, fp->alu_end + 1); for (i = 0; i <= fp->alu_end; i++) { rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst3; } @@ -2143,10 +2476,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) for (i = 0, k = (4 - (fp->cur_node + 1)); i < 4; i++, k++) { if (i < (fp->cur_node + 1)) { rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = - (fp->node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT) | - (fp->node[i].alu_end << R300_PFS_NODE_ALU_END_SHIFT) | - (fp->node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT) | - (fp->node[i].tex_end << R300_PFS_NODE_TEX_END_SHIFT) | + (fp->node[i].alu_offset << R300_ALU_START_SHIFT) | + (fp->node[i].alu_end << R300_ALU_SIZE_SHIFT) | + (fp->node[i].tex_offset << R300_TEX_START_SHIFT) | + (fp->node[i].tex_end << R300_TEX_SIZE_SHIFT) | fp->node[i].flags; } else { rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; @@ -2163,19 +2496,107 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) } } +#define bump_r500fp_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/6; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + +#define bump_r500fp_const_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + +static void r500SetupPixelShader(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r500_fragment_program *fp = (struct r500_fragment_program *) + (char *)ctx->FragmentProgram._Current; + int i; + + if (!fp) /* should only happenen once, just after context is created */ + return; + + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; + + r500TranslateFragmentShader(rmesa, fp); + if (!fp->translated) { + fprintf(stderr, "%s: No valid fragment shader, exiting\n", + __FUNCTION__); + return; + } + + r300SetupTextures(ctx); + + R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = fp->max_temp_idx; + + rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] = + R500_US_CODE_START_ADDR(fp->inst_offset) | + R500_US_CODE_END_ADDR(fp->inst_end); + rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] = + R500_US_CODE_RANGE_ADDR(fp->inst_offset) | + R500_US_CODE_RANGE_SIZE(fp->inst_end); + rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] = + R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */ + + R300_STATECHANGE(rmesa, r500fp); + /* Emit our shader... */ + for (i = 0; i < fp->inst_end+1; i++) { + rmesa->hw.r500fp.cmd[i*6+1] = fp->inst[i].inst0; + rmesa->hw.r500fp.cmd[i*6+2] = fp->inst[i].inst1; + rmesa->hw.r500fp.cmd[i*6+3] = fp->inst[i].inst2; + rmesa->hw.r500fp.cmd[i*6+4] = fp->inst[i].inst3; + rmesa->hw.r500fp.cmd[i*6+5] = fp->inst[i].inst4; + rmesa->hw.r500fp.cmd[i*6+6] = fp->inst[i].inst5; + } + + bump_r500fp_count(rmesa->hw.r500fp.cmd, (fp->inst_end + 1) * 6); + + R300_STATECHANGE(rmesa, r500fp_const); + for (i = 0; i < fp->const_nr; i++) { + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(fp->constant[i][0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(fp->constant[i][1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(fp->constant[i][2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(fp->constant[i][3]); + } + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->const_nr * 4); + +} + void r300UpdateShaderStates(r300ContextPtr rmesa) { GLcontext *ctx; ctx = rmesa->radeon.glCtx; r300UpdateTextureState(ctx); + r300SetEarlyZState(ctx); - r300SetupPixelShader(rmesa); - r300SetupTextures(ctx); + GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; + if (current_fragment_program_writes_depth(ctx)) + fgdepthsrc = R300_FG_DEPTH_SRC_SHADER; + if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) { + R300_STATECHANGE(rmesa, fg_depth_src); + rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc; + } + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupPixelShader(rmesa); + else + r300SetupPixelShader(rmesa); + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupRSUnit(ctx); + else + r300SetupRSUnit(ctx); if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) r300SetupVertexProgram(rmesa); - r300SetupRSUnit(ctx); + } /** @@ -2215,12 +2636,12 @@ void r300InitState(r300ContextPtr r300) switch (ctx->Visual.depthBits) { case 16: r300->state.depth.scale = 1.0 / (GLfloat) 0xffff; - depth_fmt = ZB_FORMAR_DEPTHFORMAT_16BIT_INT_Z; + depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z; r300->state.stencil.clear = 0x00000000; break; case 24: r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff; - depth_fmt = ZB_FORMAR_DEPTHFORMAT_24BIT_INT_Z; + depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; r300->state.stencil.clear = 0x00ff0000; break; default: @@ -2249,11 +2670,11 @@ void r300UpdateClipPlanes( GLcontext *ctx ) { r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint p; - + for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; - + R300_STATECHANGE( rmesa, vpucp[p] ); rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index a732bdb559..8aebd9be3e 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -78,31 +78,6 @@ do { \ rmesa->swtcl.vertex_attr_count++; \ } while (0) -/* this differs from the VIR0 in emit.c - TODO merge them using another option */ -static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, - int *inputs, GLint * tab, GLuint nr) -{ - GLuint i, dw; - - /* type, inputs, stop bit, size */ - for (i = 0; i + 1 < nr; i += 2) { - dw = (inputs[tab[i]] << 8) | 0x3; - dw |= ((inputs[tab[i + 1]] << 8) | 0x3) << 16; - if (i + 2 == nr) { - dw |= (R300_VAP_INPUT_ROUTE_END << 16); - } - dst[i >> 1] = dw; - } - - if (nr & 1) { - dw = (inputs[tab[nr - 1]] << 8) | 0x3; - dw |= R300_VAP_INPUT_ROUTE_END; - dst[nr >> 1] = dw; - } - - return (nr + 1) >> 1; -} - static void r300SetVertexFormat( GLcontext *ctx ) { r300ContextPtr rmesa = R300_CONTEXT( ctx ); @@ -118,19 +93,24 @@ static void r300SetVertexFormat( GLcontext *ctx ) GLint tab[VERT_ATTRIB_MAX]; int swizzle[VERT_ATTRIB_MAX][4]; GLuint i, nr; + GLuint sz, vap_fmt_1 = 0; DECLARE_RENDERINPUTS(render_inputs_bitset); RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); + vte = rmesa->hw.vte.cmd[1]; + vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT); /* Important: */ if ( VB->NdcPtr != NULL ) { VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; + vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT; } else { VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + vte |= R300_VTX_W0_FMT; } assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); @@ -140,14 +120,15 @@ static void r300SetVertexFormat( GLcontext *ctx ) * build up a hardware vertex. */ if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) { - vap_vte_cntl |= R300_VTX_W0_FMT; + sz = VB->AttribPtr[VERT_ATTRIB_POS]->size; InputsRead |= 1 << VERT_ATTRIB_POS; OutputsWritten |= 1 << VERT_RESULT_HPOS; - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); - } else + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 ); + offset = sz; + } else { + offset = 4; EMIT_PAD(4 * sizeof(float)); - - offset = 4; + } if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); @@ -156,18 +137,19 @@ static void r300SetVertexFormat( GLcontext *ctx ) } if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) { + sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size; rmesa->swtcl.coloroffset = offset; InputsRead |= 1 << VERT_ATTRIB_COLOR0; OutputsWritten |= 1 << VERT_RESULT_COL0; - EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4F ); + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 ); + offset += sz; } - offset += 4; - rmesa->swtcl.specoffset = 0; if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size; rmesa->swtcl.specoffset = offset; - EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4F ); + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 ); InputsRead |= 1 << VERT_ATTRIB_COLOR1; OutputsWritten |= 1 << VERT_RESULT_COL1; } @@ -177,9 +159,11 @@ static void r300SetVertexFormat( GLcontext *ctx ) for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + sz = VB->TexCoordPtr[i]->size; InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); - EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_4F ); + EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 ); + vap_fmt_1 |= sz << (3 * i); } } } @@ -238,7 +222,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) R300_STATECHANGE(rmesa, vof); rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; rmesa->swtcl.vertex_size = _tnl_install_attrs( ctx, @@ -250,7 +234,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); - vte = rmesa->hw.vte.cmd[1]; + R300_STATECHANGE(rmesa, vte); rmesa->hw.vte.cmd[1] = vte; rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size; @@ -591,6 +575,7 @@ static void r300RenderStart(GLcontext *ctx) r300ChooseRenderState(ctx); r300SetVertexFormat(ctx); + r300UpdateShaders(rmesa); r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 43d1406da3..78fa75228e 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -398,16 +398,18 @@ static void r300SetTexImages(r300ContextPtr rmesa, R300_TX_HEIGHTMASK_SHIFT)) | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT); + t->pitch = 0; + /* Only need to round to nearest 32 for textures, but the blitter * requires 64-byte aligned pitches, and we may/may not need the * blitter. NPOT only! */ if (baseImage->IsCompressed) { - t->pitch = + t->pitch |= (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { unsigned int align = blitWidth - 1; - t->pitch = ((tObj->Image[0][t->base.firstLevel]->Width * + t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); t->size |= R300_TX_SIZE_TXPITCH_EN; if (!t->image_override) @@ -415,11 +417,18 @@ static void r300SetTexImages(r300ContextPtr rmesa, (((tObj->Image[0][t->base.firstLevel]->Width) + align) & ~align) - 1; } else { - t->pitch = + t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); } + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + if (tObj->Image[0][t->base.firstLevel]->Width > 2048) + t->pitch_reg |= R500_TXWIDTH_BIT11; + if (tObj->Image[0][t->base.firstLevel]->Height > 2048) + t->pitch_reg |= R500_TXHEIGHT_BIT11; + } + t->dirty_state = TEX_ALL; /* FYI: r300UploadTexImages( rmesa, t ) used to be called here */ @@ -573,6 +582,7 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->radeon.glCtx, texname); r300TexObjPtr t; + uint32_t pitch_val; if (!tObj) return; @@ -585,28 +595,30 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, return; t->offset = offset; - t->pitch_reg = pitch; + t->pitch_reg &= (1 << 13) -1; + pitch_val = pitch; switch (depth) { case 32: t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); t->filter |= tx_table[2].filter; - t->pitch_reg /= 4; + pitch_val /= 4; break; case 24: default: t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); t->filter |= tx_table[4].filter; - t->pitch_reg /= 4; + pitch_val /= 4; break; case 16: t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); t->filter |= tx_table[5].filter; - t->pitch_reg /= 2; + pitch_val /= 2; break; } + pitch_val--; - t->pitch_reg--; + t->pitch_reg |= pitch_val; } static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit) diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index e91d96852d..861f0427cf 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -1426,6 +1426,8 @@ void r300SelectVertexShader(r300ContextPtr r300) GLint wpos_idx; vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; + wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten; InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; wpos_idx = -1; @@ -1439,11 +1441,9 @@ void r300SelectVertexShader(r300ContextPtr r300) _mesa_exit(-1); } - InputsRead |= (FRAG_BIT_TEX0 << i); + wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); wpos_idx = i; } - wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; - wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten; add_outputs(&wanted_key, VERT_RESULT_HPOS); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c new file mode 100644 index 0000000000..b967aa2d73 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -0,0 +1,1667 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs <darktama@iinet.net.au> + * + * \author Jerome Glisse <j.glisse@gmail.com> + * + * \author Corbin Simpson <MostAwesomeDude@gmail.com> + * + * \todo Depth write, WPOS/FOGC inputs + * + * \todo FogOption + * + * \todo Verify results of opcodes for accuracy, I've only checked them in + * specific cases. + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "r300_context.h" +#include "r500_fragprog.h" +#include "r300_reg.h" +#include "r300_state.h" + +/* + * Useful macros and values + */ +#define ERROR(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + fp->error = GL_TRUE; \ + } while(0) + +#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs + +#define R500_US_NUM_TEMP_REGS 128 +#define R500_US_NUM_CONST_REGS 256 + +/* "Register" flags */ +#define REG_CONSTANT (1 << 8) +#define REG_SRC_REL (1 << 9) +#define REG_DEST_REL (1 << 7) + +/* Swizzle tools */ +#define R500_SWIZZLE_ZERO 4 +#define R500_SWIZZLE_HALF 5 +#define R500_SWIZZLE_ONE 6 +#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) +#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) +#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) +#define R500_SWIZ_MOD_NEG 1 +#define R500_SWIZ_MOD_ABS 2 +#define R500_SWIZ_MOD_NEG_ABS 3 +/* Swizzles for inst2 */ +#define MAKE_SWIZ_TEX_STRQ(x) (x << 8) +#define MAKE_SWIZ_TEX_RGBA(x) (x << 24) +/* Swizzles for inst3 */ +#define MAKE_SWIZ_RGB_A(x) (x << 2) +#define MAKE_SWIZ_RGB_B(x) (x << 15) +/* Swizzles for inst4 */ +#define MAKE_SWIZ_ALPHA_A(x) (x << 14) +#define MAKE_SWIZ_ALPHA_B(x) (x << 21) +/* Swizzle for inst5 */ +#define MAKE_SWIZ_RGBA_C(x) (x << 14) +#define MAKE_SWIZ_ALPHA_C(x) (x << 27) + +/* Writemasks */ +#define R500_WRITEMASK_G 0x2 +#define R500_WRITEMASK_B 0x4 +#define R500_WRITEMASK_RGB 0x7 +#define R500_WRITEMASK_A 0x8 +#define R500_WRITEMASK_AR 0x9 +#define R500_WRITEMASK_AG 0xA +#define R500_WRITEMASK_ARG 0xB +#define R500_WRITEMASK_AB 0xC +#define R500_WRITEMASK_ARGB 0xF + +/* 1/(2pi), needed for quick modulus in trig insts + * Thanks to glisse for pointing out how to do it! */ +static const GLfloat RCP_2PI[] = {0.15915494309189535, + 0.15915494309189535, + 0.15915494309189535, + 0.15915494309189535}; + +static const GLfloat LIT[] = {127.999999, + 127.999999, + 127.999999, + -127.999999}; + +static void dump_program(struct r500_fragment_program *fp); + +static inline GLuint make_rgb_swizzle(struct prog_src_register src) { + GLuint swiz = 0x0; + GLuint temp; + /* This could be optimized, but it should be plenty fast already. */ + int i; + for (i = 0; i < 3; i++) { + temp = GET_SWZ(src.Swizzle, i); + /* Fix SWIZZLE_ONE */ + if (temp == 5) temp++; + swiz |= temp << i*3; + } + if (src.NegateBase) + swiz |= (R500_SWIZ_MOD_NEG << 9); + return swiz; +} + +static inline GLuint make_rgba_swizzle(GLuint src) { + GLuint swiz = 0x0; + GLuint temp; + int i; + for (i = 0; i < 4; i++) { + temp = GET_SWZ(src, i); + /* Fix SWIZZLE_ONE */ + if (temp == 5) temp++; + swiz |= temp << i*3; + } + return swiz; +} + +static inline GLuint make_alpha_swizzle(struct prog_src_register src) { + GLuint swiz = GET_SWZ(src.Swizzle, 3); + + if (swiz == 5) swiz++; + + if (src.NegateBase) + swiz |= (R500_SWIZ_MOD_NEG << 3); + + return swiz; +} + +static inline GLuint make_sop_swizzle(struct prog_src_register src) { + GLuint swiz = GET_SWZ(src.Swizzle, 0); + + if (swiz == 5) swiz++; + return swiz; +} + +static inline GLuint make_strq_swizzle(struct prog_src_register src) { + GLuint swiz = 0x0, temp = 0x0; + int i; + for (i = 0; i < 4; i++) { + temp = GET_SWZ(src.Swizzle, i) & 0x3; + swiz |= temp << i*2; + } + return swiz; +} + +static int get_temp(struct r500_fragment_program *fp, int slot) { + + COMPILE_STATE; + + int r = fp->temp_reg_offset + cs->temp_in_use + slot; + + if (r > R500_US_NUM_TEMP_REGS) { + ERROR("Too many temporary registers requested, can't compile!\n"); + } + + return r; +} + +/* Borrowed verbatim from r300_fragprog since it hasn't changed. */ +static GLuint emit_const4fv(struct r500_fragment_program *fp, + const GLfloat * cp) +{ + GLuint reg = 0x0; + int index; + + for (index = 0; index < fp->const_nr; ++index) { + if (fp->constant[index] == cp) + break; + } + + if (index >= fp->const_nr) { + if (index >= R500_US_NUM_CONST_REGS) { + ERROR("Out of hw constants!\n"); + return reg; + } + + fp->const_nr++; + fp->constant[index] = cp; + } + + reg = index | REG_CONSTANT; + return reg; +} + +static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) { + COMPILE_STATE; + GLuint reg; + switch (src.File) { + case PROGRAM_TEMPORARY: + reg = src.Index + fp->temp_reg_offset; + break; + case PROGRAM_INPUT: + reg = cs->inputs[src.Index].reg; + break; + case PROGRAM_LOCAL_PARAM: + reg = emit_const4fv(fp, + fp->mesa_program.Base.LocalParams[src. + Index]); + break; + case PROGRAM_ENV_PARAM: + reg = emit_const4fv(fp, + fp->ctx->FragmentProgram.Parameters[src. + Index]); + break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters-> + ParameterValues[src.Index]); + break; + default: + ERROR("Can't handle src.File %x\n", src.File); + reg = 0x0; + break; + } + return reg; +} + +static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) { + GLuint reg; + switch (dest.File) { + case PROGRAM_TEMPORARY: + reg = dest.Index + fp->temp_reg_offset; + break; + case PROGRAM_OUTPUT: + /* Eventually we may need to handle multiple + * rendering targets... */ + reg = dest.Index; + break; + default: + ERROR("Can't handle dest.File %x\n", dest.File); + reg = 0x0; + break; + } + return reg; +} + +static void emit_tex(struct r500_fragment_program *fp, + struct prog_instruction *fpi, int dest, int counter) +{ + int hwsrc, hwdest; + GLuint mask; + + mask = fpi->DstReg.WriteMask << 11; + hwsrc = make_src(fp, fpi->SrcReg[0]); + + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + hwdest = get_temp(fp, 0); + } else { + hwdest = dest; + } + + fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask + | R500_INST_TEX_SEM_WAIT; + + fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + + if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) + fp->inst[counter].inst1 |= R500_TEX_UNSCALED; + + switch (fpi->Opcode) { + case OPCODE_KIL: + fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL; + break; + case OPCODE_TEX: + fp->inst[counter].inst1 |= R500_TEX_INST_LD; + break; + case OPCODE_TXB: + fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS; + break; + case OPCODE_TXP: + fp->inst[counter].inst1 |= R500_TEX_INST_PROJ; + break; + default: + ERROR("emit_tex can't handle opcode %x\n", fpi->Opcode); + } + + fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc) + | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) + /* | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A */ + | R500_TEX_DST_ADDR(hwdest) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + fp->inst[counter].inst3 = 0x0; + fp->inst[counter].inst4 = 0x0; + fp->inst[counter].inst5 = 0x0; + + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_OUT + | R500_INST_TEX_SEM_WAIT | (mask << 4); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_OMOD_DISABLE; + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A) + | R500_ALPHA_OMOD_DISABLE; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + } +} + +static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) { + /* Ideally, we shouldn't have to explicitly clear memory here! */ + fp->inst[counter].inst0 = 0x0; + fp->inst[counter].inst1 = 0x0; + fp->inst[counter].inst2 = 0x0; + fp->inst[counter].inst3 = 0x0; + fp->inst[counter].inst4 = 0x0; + fp->inst[counter].inst5 = 0x0; + + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + fp->inst[counter].inst0 = R500_INST_TYPE_OUT; + + if (fpi->DstReg.Index == FRAG_RESULT_COLR) + fp->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15); + + if (fpi->DstReg.Index == FRAG_RESULT_DEPR) { + fp->inst[counter].inst4 |= R500_ALPHA_W_OMASK; + /* Notify the state emission! */ + fp->writes_depth = GL_TRUE; + } + } else { + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + /* pixel_mask */ + | (fpi->DstReg.WriteMask << 11); + } + + fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT; +} + +static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, GLuint src_reg, GLuint swizzle, GLuint dest) { + /* The r3xx shader uses MAD to implement MOV. We are using CMP, since + * it is technically more accurate and recommended by ATI/AMD. */ + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); + /* (De)mangle the swizzle from Mesa to R500. */ + swizzle = make_rgba_swizzle(swizzle); + /* 0x1FF is 9 bits, size of an RGB swizzle. */ + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A((swizzle & 0x1ff)) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B((swizzle & 0x1ff)) + | R500_ALU_RGB_OMOD_DISABLE; + fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle, 3)) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle, 3)) + | R500_ALPHA_OMOD_DISABLE; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); +} + +static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int one, int two, int three) { + /* Note: This code was all Corbin's. Corbin is a rather hackish coder. + * If you can make it pretty or fast, please do so! */ + emit_alu(fp, counter, fpi); + /* Common MAD stuff */ + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(make_dest(fp, fpi->DstReg)); + fp->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(make_dest(fp, fpi->DstReg)); + switch (one) { + case 0: + case 1: + case 2: + fp->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(fp, fpi->SrcReg[one])); + fp->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(fp, fpi->SrcReg[one])); + fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one])); + fp->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0 + | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one])); + break; + case R500_SWIZZLE_ZERO: + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO); + fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO); + break; + case R500_SWIZZLE_ONE: + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE); + break; + default: + ERROR("Bad src index in emit_mad: %d\n", one); + break; + } + switch (two) { + case 0: + case 1: + case 2: + fp->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(fp, fpi->SrcReg[two])); + fp->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(fp, fpi->SrcReg[two])); + fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two])); + fp->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1 + | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two])); + break; + case R500_SWIZZLE_ZERO: + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); + fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); + break; + case R500_SWIZZLE_ONE: + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); + break; + default: + ERROR("Bad src index in emit_mad: %d\n", two); + break; + } + switch (three) { + case 0: + case 1: + case 2: + fp->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(fp, fpi->SrcReg[three])); + fp->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(fp, fpi->SrcReg[three])); + fp->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three])); + break; + case R500_SWIZZLE_ZERO: + fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + break; + case R500_SWIZZLE_ONE: + fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE); + break; + default: + ERROR("Bad src index in emit_mad: %d\n", three); + break; + } +} + +static void emit_sop(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int opcode, GLuint src, GLuint swiz, GLuint dest) { + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src); + fp->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(swiz); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(dest); + switch (opcode) { + case OPCODE_COS: + fp->inst[counter].inst4 |= R500_ALPHA_OP_COS; + break; + case OPCODE_EX2: + fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2; + break; + case OPCODE_LG2: + fp->inst[counter].inst4 |= R500_ALPHA_OP_LN2; + break; + case OPCODE_RCP: + fp->inst[counter].inst4 |= R500_ALPHA_OP_RCP; + break; + case OPCODE_RSQ: + fp->inst[counter].inst4 |= R500_ALPHA_OP_RSQ; + break; + case OPCODE_SIN: + fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN; + break; + default: + ERROR("Bad opcode in emit_sop: %d\n", opcode); + break; + } +} + +static GLboolean parse_program(struct r500_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + const struct prog_instruction *inst = mp->Base.Instructions; + struct prog_instruction *fpi; + GLuint src[3], dest = 0; + int temp_swiz, counter = 0; + + if (!inst || inst[0].Opcode == OPCODE_END) { + ERROR("The program is empty!\n"); + return GL_FALSE; + } + + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + + if (fpi->Opcode != OPCODE_KIL) { + dest = make_dest(fp, fpi->DstReg); + } + + switch (fpi->Opcode) { + case OPCODE_ABS: + emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); + fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS + | R500_ALU_RGB_MOD_B_ABS; + fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS + | R500_ALPHA_MOD_B_ABS; + break; + case OPCODE_ADD: + /* Variation on MAD: 1*src0+src1 */ + emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1); + break; + case OPCODE_CMP: + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + src[2] = make_src(fp, fpi->SrcReg[2]); + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2]) + | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2]) + | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); + break; + case OPCODE_COS: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = emit_const4fv(fp, RCP_2PI); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(get_temp(fp, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); + counter++; + emit_sop(fp, counter, fpi, OPCODE_COS, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_DP3: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 |= R500_ALPHA_OP_DP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3 + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_DP4: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + /* Based on DP3 */ + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 |= R500_ALPHA_OP_DP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_DPH: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + /* Based on DP3 */ + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 |= R500_ALPHA_OP_DP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_DST: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + /* [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. */ + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | R500_ALU_RGB_SEL_B_SRC1; + /* Select [1, y, z, 1] */ + temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE; + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz); + /* Select [1, y, 1, w] */ + temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6); + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz); + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + break; + case OPCODE_EX2: + src[0] = make_src(fp, fpi->SrcReg[0]); + emit_sop(fp, counter, fpi, OPCODE_EX2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_FLR: + src[0] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); + counter++; + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(get_temp(fp, 0)); + fp->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC1 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC1 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGBA_MOD_C_NEG; + break; + case OPCODE_FRC: + src[0] = make_src(fp, fpi->SrcReg[0]); + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_LG2: + src[0] = make_src(fp, fpi->SrcReg[0]); + emit_sop(fp, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_LIT: + /* To be honest, I have no idea how I came up with the following. + * All I know is that it's based on the r3xx stuff, and was + * concieved with the help of NyQuil. Mmm, MyQuil. */ + + /* First instruction */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = emit_const4fv(fp, LIT); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARG << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAX + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); + counter++; + /* Second instruction */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + /* Select [w, w, w, y] */ + temp_swiz = 3 | (3 << 3) | (3 << 6); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(temp_swiz) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_LN2 + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_G; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); + counter++; + /* Third instruction */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AG << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + /* Select [x, x, x, z] */ + temp_swiz = 0; + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(temp_swiz) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_B; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | R500_ALU_RGBA_A_SWIZ_0; + counter++; + /* Fourth instruction */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AR << 11); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 = R500_ALPHA_OP_EX2 + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + /* Fifth instruction */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + /* Select [w, w, w] */ + temp_swiz = 3 | (3 << 3) | (3 << 6); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(temp_swiz); + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SWIZ_A_1 + | R500_ALPHA_SWIZ_B_1; + /* Select [-y, -y, -y] */ + temp_swiz = 1 | (1 << 3) | (1 << 6); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(temp_swiz) + | R500_ALU_RGBA_MOD_C_NEG + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + /* Final instruction */ + emit_mov(fp, counter, fpi, get_temp(fp, 0), SWIZZLE_NOOP, dest); + break; + case OPCODE_LRP: + /* src0 * src1 + INV(src0) * src2 + * 1) MUL src0, src1, temp + * 2) PRE 1-src0; MAD srcp, src2, temp */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + src[2] = make_src(fp, fpi->SrcReg[2]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[2]) + | R500_RGB_ADDR2(get_temp(fp, 0)) + | R500_RGB_SRCP_OP_1_MINUS_RGB0; + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[2]) + | R500_ALPHA_ADDR2(get_temp(fp, 0)) + | R500_ALPHA_SRCP_OP_1_MINUS_A0; + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + break; + case OPCODE_MAD: + emit_mad(fp, counter, fpi, 0, 1, 2); + break; + case OPCODE_MAX: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAX + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_MIN: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 |= R500_ALPHA_OP_MIN + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_MOV: + emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); + break; + case OPCODE_MUL: + /* Variation on MAD: src0*src1+0 */ + emit_mad(fp, counter, fpi, 0, 1, R500_SWIZZLE_ZERO); + break; + case OPCODE_POW: + /* POW(a,b) = EX2(LN2(a)*b) */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + emit_sop(fp, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), get_temp(fp, 0)); + fp->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 1)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + emit_sop(fp, counter, fpi, OPCODE_EX2, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_RCP: + src[0] = make_src(fp, fpi->SrcReg[0]); + emit_sop(fp, counter, fpi, OPCODE_RCP, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_RSQ: + src[0] = make_src(fp, fpi->SrcReg[0]); + emit_sop(fp, counter, fpi, OPCODE_RSQ, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_SCS: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = emit_const4fv(fp, RCP_2PI); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(get_temp(fp, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); + counter++; + /* Do a cosine, then a sine, masking out the channels we want to protect. */ + /* Cosine only goes in R (x) channel. */ + fpi->DstReg.WriteMask = 0x1; + emit_sop(fp, counter, fpi, OPCODE_COS, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + counter++; + /* Sine only goes in G (y) channel. */ + fpi->DstReg.WriteMask = 0x2; + emit_sop(fp, counter, fpi, OPCODE_SIN, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_SGE: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) + | R500_RGB_ADDR2(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) + | R500_ALPHA_ADDR2(src[1]); + fp->inst[counter].inst3 = /* 1 */ + MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_MOD_C_NEG + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_ALPHA_MOD_C_NEG; + counter++; + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); + fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC0 + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 + | R500_ALU_RGBA_A_SWIZ_A; + break; + case OPCODE_SIN: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = emit_const4fv(fp, RCP_2PI); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(get_temp(fp, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); + counter++; + emit_sop(fp, counter, fpi, OPCODE_SIN, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_SLT: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) + | R500_RGB_ADDR2(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) + | R500_ALPHA_ADDR2(src[1]); + fp->inst[counter].inst3 = /* 1 */ + MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_MOD_C_NEG + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_ALPHA_MOD_C_NEG; + counter++; + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC0 + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 + | R500_ALU_RGBA_A_SWIZ_A; + break; + case OPCODE_SUB: + /* Variation on MAD: 1*src0-src1 */ + fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */ + emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1); + break; + case OPCODE_SWZ: + /* TODO: The rarer negation masks! */ + emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); + break; + case OPCODE_XPD: + /* src0 * src1 - src1 * src0 + * 1) MUL temp.xyz, src0.yzx, src1.zxy + * 2) MAD src0.zxy, src1.yzx, -temp.xyz */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_RGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + /* Select [y, z, x] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); + temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(temp_swiz); + /* Select [z, x, y] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); + temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); + fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(temp_swiz); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]) + | R500_RGB_ADDR2(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]) + | R500_ALPHA_ADDR2(get_temp(fp, 0)); + /* Select [z, x, y] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); + temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(temp_swiz); + /* Select [y, z, x] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); + temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); + fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(temp_swiz); + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SWIZ_A_1 + | R500_ALPHA_SWIZ_B_1; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) + | R500_ALU_RGBA_MOD_C_NEG + | R500_ALU_RGBA_A_SWIZ_0; + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + emit_tex(fp, fpi, dest, counter); + if (fpi->DstReg.File == PROGRAM_OUTPUT) + counter++; + break; + default: + ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode)); + break; + } + + /* Finishing touches */ + if (fpi->SaturateMode == SATURATE_ZERO_ONE) { + fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; + } + + counter++; + + if (fp->error) + return GL_FALSE; + + } + + /* Finish him! (If it's an ALU/OUT instruction...) */ + if ((fp->inst[counter-1].inst0 & 0x3) == 1) { + fp->inst[counter-1].inst0 |= R500_INST_LAST; + } else { + /* We still need to put an output inst, right? */ + WARN_ONCE("Final FP instruction is not an OUT.\n"); + } + + fp->cs->nrslots = counter; + + fp->max_temp_idx++; + + return GL_TRUE; +} + +static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) +{ + struct r300_pfs_compile_state *cs = NULL; + struct gl_fragment_program *mp = &fp->mesa_program; + struct prog_instruction *fpi; + GLuint InputsRead = mp->Base.InputsRead; + GLuint temps_used = 0; + int i, j; + + /* New compile, reset tracking data */ + fp->optimization = + driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); + fp->translated = GL_FALSE; + fp->error = GL_FALSE; + fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); + fp->const_nr = 0; + /* Size of pixel stack, plus 1. */ + fp->max_temp_idx = 1; + /* Temp register offset. */ + fp->temp_reg_offset = 0; + /* Whether or not we perform any depth writing. */ + fp->writes_depth = GL_FALSE; + + _mesa_memset(cs, 0, sizeof(*fp->cs)); + for (i = 0; i < PFS_MAX_ALU_INST; i++) { + for (j = 0; j < 3; j++) { + cs->slot[i].vsrc[j] = SRC_CONST; + cs->slot[i].ssrc[j] = SRC_CONST; + } + } + + /* Work out what temps the Mesa inputs correspond to, this must match + * what setup_rs_unit does, which shouldn't be a problem as rs_unit + * configures itself based on the fragprog's InputsRead + * + * NOTE: this depends on get_hw_temp() allocating registers in order, + * starting from register 0, so we're just going to do that instead. + */ + + /* Texcoords come first */ + for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; + cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) { + cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; + cs->inputs[FRAG_ATTRIB_WPOS].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_WPOS; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) { + cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL0].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) { + cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL1].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_COL1; + + /* Anything else */ + if (InputsRead) { + WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); + /* force read from hwreg 0 for now */ + for (i = 0; i < 32; i++) + if (InputsRead & (1 << i)) + cs->inputs[i].reg = 0; + } + + if (!mp->Base.Instructions) { + ERROR("No instructions found in program, going to go die now.\n"); + return; + } + + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + for (i = 0; i < 3; i++) { + if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) { + if (fpi->SrcReg[i].Index >= temps_used) + temps_used = fpi->SrcReg[i].Index + 1; + } + } + } + + cs->temp_in_use = temps_used + 1; + + fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use; + + if (RADEON_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "FP temp indices: fp->max_temp_idx: %d cs->temp_in_use: %d\n", fp->max_temp_idx, cs->temp_in_use); +} + +static void update_params(struct r500_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters); +} + +static void dumb_shader(struct r500_fragment_program *fp) +{ + fp->inst[0].inst0 = R500_INST_TYPE_TEX + | R500_INST_TEX_SEM_WAIT + | R500_INST_RGB_WMASK_R + | R500_INST_RGB_WMASK_G + | R500_INST_RGB_WMASK_B + | R500_INST_ALPHA_WMASK + | R500_INST_RGB_CLAMP + | R500_INST_ALPHA_CLAMP; + fp->inst[0].inst1 = R500_TEX_ID(0) + | R500_TEX_INST_LD + | R500_TEX_SEM_ACQUIRE + | R500_TEX_IGNORE_UNCOVERED; + fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0) + | R500_TEX_SRC_S_SWIZ_R + | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_DST_ADDR(0) + | R500_TEX_DST_R_SWIZ_R + | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B + | R500_TEX_DST_A_SWIZ_A; + fp->inst[0].inst3 = R500_DX_ADDR(0) + | R500_DX_S_SWIZ_R + | R500_DX_T_SWIZ_R + | R500_DX_R_SWIZ_R + | R500_DX_Q_SWIZ_R + | R500_DY_ADDR(0) + | R500_DY_S_SWIZ_R + | R500_DY_T_SWIZ_R + | R500_DY_R_SWIZ_R + | R500_DY_Q_SWIZ_R; + fp->inst[0].inst4 = 0x0; + fp->inst[0].inst5 = 0x0; + + fp->inst[1].inst0 = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK; + fp->inst[1].inst1 = R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST | + R500_RGB_SRCP_OP_1_MINUS_2RGB0; + fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST | + R500_ALPHA_SRCP_OP_1_MINUS_2A0; + fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1; + fp->inst[1].inst4 = R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_1; + fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0; + + fp->cs->nrslots = 2; + fp->translated = GL_TRUE; +} + +void r500TranslateFragmentShader(r300ContextPtr r300, + struct r500_fragment_program *fp) +{ + + struct r300_pfs_compile_state *cs = NULL; + + if (!fp->translated) { + + init_program(r300, fp); + cs = fp->cs; + + if (parse_program(fp) == GL_FALSE) { + ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n"); + dumb_shader(fp); + fp->inst_offset = 0; + fp->inst_end = cs->nrslots - 1; + return; + } + fp->inst_offset = 0; + fp->inst_end = cs->nrslots - 1; + + fp->translated = GL_TRUE; + if (RADEON_DEBUG & DEBUG_PIXEL) { + fprintf(stderr, "Mesa program:\n"); + fprintf(stderr, "-------------\n"); + _mesa_print_program(&fp->mesa_program.Base); + fflush(stdout); + dump_program(fp); + } + + + r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); + } + + update_params(fp); + +} + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "1/2"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; +} + +static char *toop(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static char *to_mask(int val) +{ + char *str = NULL; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + +static void dump_program(struct r500_fragment_program *fp) +{ + + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + int n; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + + if (fp->const_nr) { + fprintf(stderr, "--------\nConstants:\n"); + for (n = 0; n < fp->const_nr; n++) { + fprintf(stderr, "Constant %d: %f %f\n\t %f %f\n", n, + fp->constant[n][0], fp->constant[n][1], fp->constant[n][2], + fp->constant[n][3]); + } + fprintf(stderr, "--------\n"); + } + + for (n = 0; n < fp->inst_end+1; n++) { + inst0 = inst = fp->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); + + switch(inst0 & 0x3) { + case 0: + case 1: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1); + inst = fp->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2); + inst = fp->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3); + inst = fp->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4); + inst = fp->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 31) & 0x1); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5); + inst = fp->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case 2: + break; + case 3: + inst = fp->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = fp->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + +} diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h new file mode 100644 index 0000000000..5dd2def1c4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ +#ifndef __R500_FRAGPROG_H_ +#define __R500_FRAGPROG_H_ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "r300_context.h" + +/* supported hw opcodes */ +#define PFS_OP_MAD 0 +#define PFS_OP_DP3 1 +#define PFS_OP_DP4 2 +#define PFS_OP_MIN 3 +#define PFS_OP_MAX 4 +#define PFS_OP_CMP 5 +#define PFS_OP_FRC 6 +#define PFS_OP_EX2 7 +#define PFS_OP_LG2 8 +#define PFS_OP_RCP 9 +#define PFS_OP_RSQ 10 +#define PFS_OP_REPL_ALPHA 11 +#define PFS_OP_CMPH 12 +#define MAX_PFS_OP 12 + +#define PFS_FLAG_SAT (1 << 0) +#define PFS_FLAG_ABS (1 << 1) + +#define ARG_NEG (1 << 5) +#define ARG_ABS (1 << 6) +#define ARG_MASK (127 << 0) +#define ARG_STRIDE 7 +#define SRC_CONST (1 << 5) +#define SRC_MASK (63 << 0) +#define SRC_STRIDE 6 + +#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 +#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 + +struct r500_fragment_program; + +extern void r500TranslateFragmentShader(r300ContextPtr r300, + struct r500_fragment_program *fp); + +#endif diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c index 9c0a5868b5..3fc724a553 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.c +++ b/src/mesa/drivers/dri/r300/radeon_context.c @@ -135,6 +135,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon, /* Fill in additional standard functions. */ radeonInitDriverFuncs(functions); + radeon->radeonScreen = screen; /* Allocate and initialize the Mesa context */ if (sharedContextPrivate) shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx; @@ -158,7 +159,6 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->dri.fd = sPriv->fd; radeon->dri.drmMinor = sPriv->drm_version.minor; - radeon->radeonScreen = screen; radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + screen->sarea_priv_offset); diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c index 31a000d5e6..0c1a19507b 100644 --- a/src/mesa/drivers/dri/r300/radeon_ioctl.c +++ b/src/mesa/drivers/dri/r300/radeon_ioctl.c @@ -215,16 +215,18 @@ void radeonCopyBuffer(__DRIdrawablePrivate * dPriv, if (rect->y2 < b->y2) b->y2 = rect->y2; - if (b->x1 < b->x2 && b->y1 < b->y2) - b++; + if (b->x1 >= b->x2 || b->y1 >= b->y2) + continue; } - else - b++; + b++; n++; } radeon->sarea->nbox = n; + if (!n) + continue; + ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP); if (ret) { diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 6ad441bdd0..2821ecc0c0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -146,10 +146,112 @@ #define PCI_CHIP_RV410_5E4C 0x5E4C #define PCI_CHIP_RV410_5E4D 0x5E4D #define PCI_CHIP_RV410_5E4F 0x5E4F + +#define PCI_CHIP_R520_7100 0x7100 +#define PCI_CHIP_R520_7101 0x7101 +#define PCI_CHIP_R520_7102 0x7102 +#define PCI_CHIP_R520_7103 0x7103 +#define PCI_CHIP_R520_7104 0x7104 +#define PCI_CHIP_R520_7105 0x7105 +#define PCI_CHIP_R520_7106 0x7106 +#define PCI_CHIP_R520_7108 0x7108 +#define PCI_CHIP_R520_7109 0x7109 +#define PCI_CHIP_R520_710A 0x710A +#define PCI_CHIP_R520_710B 0x710B +#define PCI_CHIP_R520_710C 0x710C +#define PCI_CHIP_R520_710E 0x710E +#define PCI_CHIP_R520_710F 0x710F +#define PCI_CHIP_RV515_7140 0x7140 +#define PCI_CHIP_RV515_7141 0x7141 +#define PCI_CHIP_RV515_7142 0x7142 +#define PCI_CHIP_RV515_7143 0x7143 +#define PCI_CHIP_RV515_7144 0x7144 +#define PCI_CHIP_RV515_7145 0x7145 +#define PCI_CHIP_RV515_7146 0x7146 +#define PCI_CHIP_RV515_7147 0x7147 +#define PCI_CHIP_RV515_7149 0x7149 +#define PCI_CHIP_RV515_714A 0x714A +#define PCI_CHIP_RV515_714B 0x714B +#define PCI_CHIP_RV515_714C 0x714C +#define PCI_CHIP_RV515_714D 0x714D +#define PCI_CHIP_RV515_714E 0x714E +#define PCI_CHIP_RV515_714F 0x714F +#define PCI_CHIP_RV515_7151 0x7151 +#define PCI_CHIP_RV515_7152 0x7152 +#define PCI_CHIP_RV515_7153 0x7153 +#define PCI_CHIP_RV515_715E 0x715E +#define PCI_CHIP_RV515_715F 0x715F +#define PCI_CHIP_RV515_7180 0x7180 +#define PCI_CHIP_RV515_7181 0x7181 +#define PCI_CHIP_RV515_7183 0x7183 +#define PCI_CHIP_RV515_7186 0x7186 +#define PCI_CHIP_RV515_7187 0x7187 +#define PCI_CHIP_RV515_7188 0x7188 +#define PCI_CHIP_RV515_718A 0x718A +#define PCI_CHIP_RV515_718B 0x718B +#define PCI_CHIP_RV515_718C 0x718C +#define PCI_CHIP_RV515_718D 0x718D +#define PCI_CHIP_RV515_718F 0x718F +#define PCI_CHIP_RV515_7193 0x7193 +#define PCI_CHIP_RV515_7196 0x7196 +#define PCI_CHIP_RV515_719B 0x719B +#define PCI_CHIP_RV515_719F 0x719F +#define PCI_CHIP_RV530_71C0 0x71C0 +#define PCI_CHIP_RV530_71C1 0x71C1 +#define PCI_CHIP_RV530_71C2 0x71C2 +#define PCI_CHIP_RV530_71C3 0x71C3 +#define PCI_CHIP_RV530_71C4 0x71C4 +#define PCI_CHIP_RV530_71C5 0x71C5 +#define PCI_CHIP_RV530_71C6 0x71C6 +#define PCI_CHIP_RV530_71C7 0x71C7 +#define PCI_CHIP_RV530_71CD 0x71CD +#define PCI_CHIP_RV530_71CE 0x71CE +#define PCI_CHIP_RV530_71D2 0x71D2 +#define PCI_CHIP_RV530_71D4 0x71D4 +#define PCI_CHIP_RV530_71D5 0x71D5 +#define PCI_CHIP_RV530_71D6 0x71D6 +#define PCI_CHIP_RV530_71DA 0x71DA +#define PCI_CHIP_RV530_71DE 0x71DE +#define PCI_CHIP_RV515_7200 0x7200 +#define PCI_CHIP_RV515_7210 0x7210 +#define PCI_CHIP_RV515_7211 0x7211 +#define PCI_CHIP_R580_7240 0x7240 +#define PCI_CHIP_R580_7243 0x7243 +#define PCI_CHIP_R580_7244 0x7244 +#define PCI_CHIP_R580_7245 0x7245 +#define PCI_CHIP_R580_7246 0x7246 +#define PCI_CHIP_R580_7247 0x7247 +#define PCI_CHIP_R580_7248 0x7248 +#define PCI_CHIP_R580_7249 0x7249 +#define PCI_CHIP_R580_724A 0x724A +#define PCI_CHIP_R580_724B 0x724B +#define PCI_CHIP_R580_724C 0x724C +#define PCI_CHIP_R580_724D 0x724D +#define PCI_CHIP_R580_724E 0x724E +#define PCI_CHIP_R580_724F 0x724F +#define PCI_CHIP_RV570_7280 0x7280 +#define PCI_CHIP_RV560_7281 0x7281 +#define PCI_CHIP_RV560_7283 0x7283 +#define PCI_CHIP_R580_7284 0x7284 +#define PCI_CHIP_RV560_7287 0x7287 +#define PCI_CHIP_RV570_7288 0x7288 +#define PCI_CHIP_RV570_7289 0x7289 +#define PCI_CHIP_RV570_728B 0x728B +#define PCI_CHIP_RV570_728C 0x728C +#define PCI_CHIP_RV560_7290 0x7290 +#define PCI_CHIP_RV560_7291 0x7291 +#define PCI_CHIP_RV560_7293 0x7293 +#define PCI_CHIP_RV560_7297 0x7297 + #define PCI_CHIP_RS350_7834 0x7834 #define PCI_CHIP_RS350_7835 0x7835 #define PCI_CHIP_RS690_791E 0x791E #define PCI_CHIP_RS690_791F 0x791F +#define PCI_CHIP_RS740_796C 0x796C +#define PCI_CHIP_RS740_796D 0x796D +#define PCI_CHIP_RS740_796E 0x796E +#define PCI_CHIP_RS740_796F 0x796F + enum { CHIP_FAMILY_R100, @@ -169,6 +271,13 @@ enum { CHIP_FAMILY_RV410, CHIP_FAMILY_RS400, CHIP_FAMILY_RS690, + CHIP_FAMILY_RS740, + CHIP_FAMILY_RV515, + CHIP_FAMILY_R520, + CHIP_FAMILY_RV530, + CHIP_FAMILY_R580, + CHIP_FAMILY_RV560, + CHIP_FAMILY_RV570, CHIP_FAMILY_LAST }; diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index 078ac9a29a..446025b631 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -919,16 +919,18 @@ void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, if (rect->y2 < b->y2) b->y2 = rect->y2; - if (b->x1 < b->x2 && b->y1 < b->y2) - b++; + if (b->x1 >= b->x2 || b->y1 >= b->y2) + continue; } - else - b++; + b++; n++; } rmesa->sarea->nbox = n; + if (!n) + continue; + ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); if ( ret ) { diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 6107577e40..08568a407e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -188,7 +188,7 @@ DRI_CONF_BEGIN DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(8, 2, 8) DRI_CONF_MAX_TEXTURE_COORD_UNITS(8, 2, 8) DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32) - DRI_CONF_DISABLE_FALLBACK(false) + DRI_CONF_DISABLE_FALLBACK(true) DRI_CONF_DISABLE_DOUBLE_SIDE_STENCIL(false) DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY @@ -649,13 +649,8 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->chip_flags = RADEON_CHIPSET_TCL; break; - /* RV410 SE chips have half the pipes of regular RV410 */ case PCI_CHIP_RV410_5E4C: case PCI_CHIP_RV410_5E4F: - screen->chip_family = CHIP_FAMILY_RV380; - screen->chip_flags = RADEON_CHIPSET_TCL; - break; - case PCI_CHIP_RV410_564A: case PCI_CHIP_RV410_564B: case PCI_CHIP_RV410_564F: @@ -678,12 +673,132 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RC410_5A61: case PCI_CHIP_RC410_5A62: screen->chip_family = CHIP_FAMILY_RS400; - fprintf(stderr, "Warning, xpress200 detected.\n"); break; case PCI_CHIP_RS690_791E: + case PCI_CHIP_RS690_791F: screen->chip_family = CHIP_FAMILY_RS690; - fprintf(stderr, "Warning, RS690 detected, 3D support is incomplete.\n"); + break; + case PCI_CHIP_RS740_796C: + case PCI_CHIP_RS740_796D: + case PCI_CHIP_RS740_796E: + case PCI_CHIP_RS740_796F: + screen->chip_family = CHIP_FAMILY_RS740; + break; + + case PCI_CHIP_R520_7100: + case PCI_CHIP_R520_7101: + case PCI_CHIP_R520_7102: + case PCI_CHIP_R520_7103: + case PCI_CHIP_R520_7104: + case PCI_CHIP_R520_7105: + case PCI_CHIP_R520_7106: + case PCI_CHIP_R520_7108: + case PCI_CHIP_R520_7109: + case PCI_CHIP_R520_710A: + case PCI_CHIP_R520_710B: + case PCI_CHIP_R520_710C: + case PCI_CHIP_R520_710E: + case PCI_CHIP_R520_710F: + screen->chip_family = CHIP_FAMILY_R520; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV515_7140: + case PCI_CHIP_RV515_7141: + case PCI_CHIP_RV515_7142: + case PCI_CHIP_RV515_7143: + case PCI_CHIP_RV515_7144: + case PCI_CHIP_RV515_7145: + case PCI_CHIP_RV515_7146: + case PCI_CHIP_RV515_7147: + case PCI_CHIP_RV515_7149: + case PCI_CHIP_RV515_714A: + case PCI_CHIP_RV515_714B: + case PCI_CHIP_RV515_714C: + case PCI_CHIP_RV515_714D: + case PCI_CHIP_RV515_714E: + case PCI_CHIP_RV515_714F: + case PCI_CHIP_RV515_7151: + case PCI_CHIP_RV515_7152: + case PCI_CHIP_RV515_7153: + case PCI_CHIP_RV515_715E: + case PCI_CHIP_RV515_715F: + case PCI_CHIP_RV515_7180: + case PCI_CHIP_RV515_7181: + case PCI_CHIP_RV515_7183: + case PCI_CHIP_RV515_7186: + case PCI_CHIP_RV515_7187: + case PCI_CHIP_RV515_7188: + case PCI_CHIP_RV515_718A: + case PCI_CHIP_RV515_718B: + case PCI_CHIP_RV515_718C: + case PCI_CHIP_RV515_718D: + case PCI_CHIP_RV515_718F: + case PCI_CHIP_RV515_7193: + case PCI_CHIP_RV515_7196: + case PCI_CHIP_RV515_719B: + case PCI_CHIP_RV515_719F: + case PCI_CHIP_RV515_7200: + case PCI_CHIP_RV515_7210: + case PCI_CHIP_RV515_7211: + screen->chip_family = CHIP_FAMILY_RV515; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV530_71C0: + case PCI_CHIP_RV530_71C1: + case PCI_CHIP_RV530_71C2: + case PCI_CHIP_RV530_71C3: + case PCI_CHIP_RV530_71C4: + case PCI_CHIP_RV530_71C5: + case PCI_CHIP_RV530_71C6: + case PCI_CHIP_RV530_71C7: + case PCI_CHIP_RV530_71CD: + case PCI_CHIP_RV530_71CE: + case PCI_CHIP_RV530_71D2: + case PCI_CHIP_RV530_71D4: + case PCI_CHIP_RV530_71D5: + case PCI_CHIP_RV530_71D6: + case PCI_CHIP_RV530_71DA: + case PCI_CHIP_RV530_71DE: + screen->chip_family = CHIP_FAMILY_RV530; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_R580_7240: + case PCI_CHIP_R580_7243: + case PCI_CHIP_R580_7244: + case PCI_CHIP_R580_7245: + case PCI_CHIP_R580_7246: + case PCI_CHIP_R580_7247: + case PCI_CHIP_R580_7248: + case PCI_CHIP_R580_7249: + case PCI_CHIP_R580_724A: + case PCI_CHIP_R580_724B: + case PCI_CHIP_R580_724C: + case PCI_CHIP_R580_724D: + case PCI_CHIP_R580_724E: + case PCI_CHIP_R580_724F: + case PCI_CHIP_R580_7284: + screen->chip_family = CHIP_FAMILY_R580; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV570_7280: + case PCI_CHIP_RV560_7281: + case PCI_CHIP_RV560_7283: + case PCI_CHIP_RV560_7287: + case PCI_CHIP_RV570_7288: + case PCI_CHIP_RV570_7289: + case PCI_CHIP_RV570_728B: + case PCI_CHIP_RV570_728C: + case PCI_CHIP_RV560_7290: + case PCI_CHIP_RV560_7291: + case PCI_CHIP_RV560_7293: + case PCI_CHIP_RV560_7297: + screen->chip_family = CHIP_FAMILY_RV560; + screen->chip_flags = RADEON_CHIPSET_TCL; break; default: @@ -697,6 +812,14 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) return NULL; } + if ((sPriv->drm_version.minor < 29) && (screen->chip_family >= CHIP_FAMILY_RV515)) { + fprintf(stderr, "R500 support requires a newer drm.\n"); + return NULL; + } + + if (getenv("R300_NO_TCL")) + screen->chip_flags &= ~RADEON_CHIPSET_TCL; + if (screen->chip_family <= CHIP_FAMILY_RS200) screen->chip_flags |= RADEON_CLASS_R100; else if (screen->chip_family <= CHIP_FAMILY_RV280) @@ -721,6 +844,36 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->fbLocation = (temp & 0xffff) << 16; } + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES, + &temp); + if (ret) { + fprintf(stderr, "Unable to get num_pipes, need newer drm\n"); + switch (screen->chip_family) { + case CHIP_FAMILY_R300: + case CHIP_FAMILY_R350: + screen->num_gb_pipes = 2; + break; + case CHIP_FAMILY_R420: + case CHIP_FAMILY_R520: + case CHIP_FAMILY_R580: + case CHIP_FAMILY_RV560: + case CHIP_FAMILY_RV570: + screen->num_gb_pipes = 4; + break; + case CHIP_FAMILY_RV350: + case CHIP_FAMILY_RV515: + case CHIP_FAMILY_RV530: + case CHIP_FAMILY_RV410: + default: + screen->num_gb_pipes = 1; + break; + } + } else { + screen->num_gb_pipes = temp; + } + } + if ( sPriv->drm_version.minor >= 10 ) { drm_radeon_setparam_t sp; diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index 184b0d225e..ab859d55bd 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -105,6 +105,8 @@ typedef struct { driOptionCache optionCache; const __DRIextension *extensions[8]; + + int num_gb_pipes; } radeonScreenRec, *radeonScreenPtr; #define IS_R100_CLASS(screen) \ diff --git a/src/mesa/drivers/dri/swrast/Makefile b/src/mesa/drivers/dri/swrast/Makefile new file mode 100644 index 0000000000..5f3a4f2191 --- /dev/null +++ b/src/mesa/drivers/dri/swrast/Makefile @@ -0,0 +1,24 @@ +# src/mesa/drivers/dri/swrast/Makefile + +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = swrast_dri.so + +DRIVER_SOURCES = \ + swrast.c \ + swrast_span.c + +C_SOURCES = \ + $(SWRAST_COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +SWRAST_COMMON_SOURCES = \ + ../../common/driverfuncs.c \ + ../common/utils.c + +include ../Makefile.template + +symlinks: diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c new file mode 100644 index 0000000000..c4dba59198 --- /dev/null +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -0,0 +1,727 @@ +/* + * Copyright (C) 2008 George Sapountzis <gsap7@yahoo.gr> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * DRI software rasterizer + * + * This is the mesa swrast module packaged into a DRI driver structure. + * + * The front-buffer is allocated by the loader. The loader provides read/write + * callbacks for access to the front-buffer. The driver uses a scratch row for + * front-buffer rendering to avoid repeated calls to the loader. + * + * The back-buffer is allocated by the driver and is private. + */ + +#include "context.h" +#include "extensions.h" +#include "framebuffer.h" +#include "imports.h" +#include "renderbuffer.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" +#include "tnl/t_pipeline.h" +#include "vbo/vbo.h" +#include "drivers/common/driverfuncs.h" + +#include "swrast_priv.h" + + +#define need_GL_VERSION_1_3 +#define need_GL_VERSION_1_4 +#define need_GL_VERSION_1_5 +#define need_GL_VERSION_2_0 + +/* sw extensions for imaging */ +#define need_GL_EXT_blend_color +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_convolution +#define need_GL_EXT_histogram +#define need_GL_SGI_color_table + +/* sw extensions not associated with some GL version */ +#define need_GL_ARB_shader_objects +#define need_GL_ARB_vertex_program +#define need_GL_APPLE_vertex_array_object +#define need_GL_ATI_fragment_shader +#define need_GL_EXT_depth_bounds_test +#define need_GL_EXT_framebuffer_object +#define need_GL_EXT_framebuffer_blit +#define need_GL_EXT_gpu_program_parameters +#define need_GL_EXT_paletted_texture +#define need_GL_IBM_multimode_draw_arrays +#define need_GL_MESA_resize_buffers +#define need_GL_NV_vertex_program +#define need_GL_NV_fragment_program + +#include "extension_helper.h" +#include "utils.h" + +const struct dri_extension card_extensions[] = +{ + { "GL_VERSION_1_3", GL_VERSION_1_3_functions }, + { "GL_VERSION_1_4", GL_VERSION_1_4_functions }, + { "GL_VERSION_1_5", GL_VERSION_1_5_functions }, + { "GL_VERSION_2_0", GL_VERSION_2_0_functions }, + + { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, + { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, + { "GL_EXT_convolution", GL_EXT_convolution_functions }, + { "GL_EXT_histogram", GL_EXT_histogram_functions }, + { "GL_SGI_color_table", GL_SGI_color_table_functions }, + + { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, + { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, + { "GL_APPLE_vertex_array_object", GL_APPLE_vertex_array_object_functions }, + { "GL_ATI_fragment_shader", GL_ATI_fragment_shader_functions }, + { "GL_EXT_depth_bounds_test", GL_EXT_depth_bounds_test_functions }, + { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, + { "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions }, + { "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions }, + { "GL_EXT_paletted_texture", GL_EXT_paletted_texture_functions }, + { "GL_IBM_multimode_draw_arrays", GL_IBM_multimode_draw_arrays_functions }, + { "GL_MESA_resize_buffers", GL_MESA_resize_buffers_functions }, + { "GL_NV_vertex_program", GL_NV_vertex_program_functions }, + { "GL_NV_fragment_program", GL_NV_fragment_program_functions }, + { NULL, NULL } +}; + + +/** + * Screen and config-related functions + */ + +static void +setupLoaderExtensions(__DRIscreen *psp, + const __DRIextension **extensions) +{ + int i; + + for (i = 0; extensions[i]; i++) { + if (strcmp(extensions[i]->name, __DRI_SWRAST_LOADER) == 0) + psp->swrast_loader = (__DRIswrastLoaderExtension *) extensions[i]; + } +} + +static __DRIconfig ** +swrastFillInModes(__DRIscreen *psp, + unsigned pixel_bits, unsigned depth_bits, + unsigned stencil_bits, GLboolean have_back_buffer) +{ + __DRIconfig **configs; + unsigned depth_buffer_factor; + unsigned back_buffer_factor; + GLenum fb_format; + GLenum fb_type; + + /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't + * support pageflipping at all. + */ + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML + }; + + u_int8_t depth_bits_array[4]; + u_int8_t stencil_bits_array[4]; + + depth_bits_array[0] = 0; + depth_bits_array[1] = 0; + depth_bits_array[2] = depth_bits; + depth_bits_array[3] = depth_bits; + + /* Just like with the accumulation buffer, always provide some modes + * with a stencil buffer. + */ + stencil_bits_array[0] = 0; + stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits; + stencil_bits_array[2] = 0; + stencil_bits_array[3] = (stencil_bits == 0) ? 8 : stencil_bits; + + depth_buffer_factor = 4; + back_buffer_factor = 2; + + if (pixel_bits == 8) { + fb_format = GL_RGB; + fb_type = GL_UNSIGNED_BYTE_2_3_3_REV; + } + else if (pixel_bits == 16) { + fb_format = GL_RGB; + fb_type = GL_UNSIGNED_SHORT_5_6_5; + } + else { + fb_format = GL_BGRA; + fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } + + configs = driCreateConfigs(fb_format, fb_type, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, back_buffer_modes, + back_buffer_factor); + if (configs == NULL) { + fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, + __LINE__); + return NULL; + } + + return configs; +} + +static __DRIscreen * +driCreateNewScreen(int scrn, const __DRIextension **extensions, + const __DRIconfig ***driver_configs, void *data) +{ + static const __DRIextension *emptyExtensionList[] = { NULL }; + __DRIscreen *psp; + __DRIconfig **configs8, **configs16, **configs32; + + (void) data; + + TRACE; + + psp = _mesa_calloc(sizeof(*psp)); + if (!psp) + return NULL; + + setupLoaderExtensions(psp, extensions); + + psp->num = scrn; + psp->extensions = emptyExtensionList; + + configs8 = swrastFillInModes(psp, 8, 8, 0, 1); + configs16 = swrastFillInModes(psp, 16, 16, 0, 1); + configs32 = swrastFillInModes(psp, 32, 24, 8, 1); + + configs16 = (__DRIconfig **)driConcatConfigs(configs8, configs16); + + *driver_configs = driConcatConfigs(configs16, configs32); + + driInitExtensions( NULL, card_extensions, GL_FALSE ); + + return psp; +} + +static void driDestroyScreen(__DRIscreen *psp) +{ + TRACE; + + if (psp) { + _mesa_free(psp); + } +} + +static const __DRIextension **driGetExtensions(__DRIscreen *psp) +{ + TRACE; + + return psp->extensions; +} + + +/** + * Framebuffer and renderbuffer-related functions. + */ + +static GLuint +choose_pixel_format(const GLvisual *v) +{ + if (v->rgbMode) { + int bpp = v->rgbBits; + + if (bpp == 32 + && v->redMask == 0xff0000 + && v->greenMask == 0x00ff00 + && v->blueMask == 0x0000ff) + return PF_A8R8G8B8; + else if (bpp == 16 + && v->redMask == 0xf800 + && v->greenMask == 0x07e0 + && v->blueMask == 0x001f) + return PF_R5G6B5; + else if (bpp == 8 + && v->redMask == 0x07 + && v->greenMask == 0x38 + && v->blueMask == 0xc0) + return PF_R3G3B2; + } + else { + if (v->indexBits == 8) + return PF_CI8; + } + + _mesa_problem( NULL, "unexpected format in %s", __FUNCTION__ ); + return 0; +} + +static void +swrast_delete_renderbuffer(struct gl_renderbuffer *rb) +{ + TRACE; + + _mesa_free(rb->Data); + _mesa_free(rb); +} + +static GLboolean +swrast_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLuint width, GLuint height) +{ + struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb); + int bpp; + unsigned mask = PITCH_ALIGN_BITS - 1; + + TRACE; + + rb->Data = NULL; + rb->Width = width; + rb->Height = height; + + switch (internalFormat) { + case GL_RGB: + bpp = rb->RedBits + rb->GreenBits + rb->BlueBits; + break; + case GL_RGBA: + bpp = rb->RedBits + rb->GreenBits + rb->BlueBits + rb->AlphaBits; + break; + case GL_COLOR_INDEX8_EXT: + bpp = rb->IndexBits; + break; + default: + _mesa_problem( NULL, "unexpected format in %s", __FUNCTION__ ); + return GL_FALSE; + } + + /* always pad to PITCH_ALIGN_BITS */ + xrb->pitch = ((width * bpp + mask) & ~mask) / 8; + + return GL_TRUE; +} + +static GLboolean +swrast_alloc_back_storage(GLcontext *ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLuint width, GLuint height) +{ + struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb); + + TRACE; + + _mesa_free(rb->Data); + + (void) swrast_alloc_front_storage(ctx, rb, internalFormat, width, height); + + rb->Data = _mesa_malloc(height * xrb->pitch); + + return GL_TRUE; +} + +static struct swrast_renderbuffer * +swrast_new_renderbuffer(const GLvisual *visual, GLboolean front) +{ + struct swrast_renderbuffer *xrb = _mesa_calloc(sizeof *xrb); + GLuint pixel_format; + + TRACE; + + if (xrb) { + _mesa_init_renderbuffer(&xrb->Base, 0); + + pixel_format = choose_pixel_format(visual); + + xrb->Base.Delete = swrast_delete_renderbuffer; + if (front) { + xrb->Base.AllocStorage = swrast_alloc_front_storage; + swrast_set_span_funcs_front(xrb, pixel_format); + } + else { + xrb->Base.AllocStorage = swrast_alloc_back_storage; + swrast_set_span_funcs_back(xrb, pixel_format); + } + + switch (pixel_format) { + case PF_A8R8G8B8: + xrb->Base.InternalFormat = GL_RGBA; + xrb->Base._BaseFormat = GL_RGBA; + xrb->Base.DataType = GL_UNSIGNED_BYTE; + xrb->Base.RedBits = 8 * sizeof(GLubyte); + xrb->Base.GreenBits = 8 * sizeof(GLubyte); + xrb->Base.BlueBits = 8 * sizeof(GLubyte); + xrb->Base.AlphaBits = 8 * sizeof(GLubyte); + break; + case PF_R5G6B5: + xrb->Base.InternalFormat = GL_RGB; + xrb->Base._BaseFormat = GL_RGB; + xrb->Base.DataType = GL_UNSIGNED_BYTE; + xrb->Base.RedBits = 5 * sizeof(GLubyte); + xrb->Base.GreenBits = 6 * sizeof(GLubyte); + xrb->Base.BlueBits = 5 * sizeof(GLubyte); + xrb->Base.AlphaBits = 0; + break; + case PF_R3G3B2: + xrb->Base.InternalFormat = GL_RGB; + xrb->Base._BaseFormat = GL_RGB; + xrb->Base.DataType = GL_UNSIGNED_BYTE; + xrb->Base.RedBits = 3 * sizeof(GLubyte); + xrb->Base.GreenBits = 3 * sizeof(GLubyte); + xrb->Base.BlueBits = 2 * sizeof(GLubyte); + xrb->Base.AlphaBits = 0; + break; + case PF_CI8: + xrb->Base.InternalFormat = GL_COLOR_INDEX8_EXT; + xrb->Base._BaseFormat = GL_COLOR_INDEX; + xrb->Base.DataType = GL_UNSIGNED_BYTE; + xrb->Base.IndexBits = 8 * sizeof(GLubyte); + break; + default: + return NULL; + } + } + return xrb; +} + +static __DRIdrawable * +driCreateNewDrawable(__DRIscreen *screen, + const __DRIconfig *config, void *data) +{ + __DRIdrawable *buf; + struct swrast_renderbuffer *frontrb, *backrb; + + TRACE; + + buf = _mesa_calloc(sizeof *buf); + if (!buf) + return NULL; + + buf->loaderPrivate = data; + + buf->driScreenPriv = screen; + + buf->row = _mesa_malloc(MAX_WIDTH * 4); + + /* basic framebuffer setup */ + _mesa_initialize_framebuffer(&buf->Base, &config->modes); + + /* add front renderbuffer */ + frontrb = swrast_new_renderbuffer(&config->modes, GL_TRUE); + _mesa_add_renderbuffer(&buf->Base, BUFFER_FRONT_LEFT, &frontrb->Base); + + /* add back renderbuffer */ + if (config->modes.doubleBufferMode) { + backrb = swrast_new_renderbuffer(&config->modes, GL_FALSE); + _mesa_add_renderbuffer(&buf->Base, BUFFER_BACK_LEFT, &backrb->Base); + } + + /* add software renderbuffers */ + _mesa_add_soft_renderbuffers(&buf->Base, + GL_FALSE, /* color */ + config->modes.haveDepthBuffer, + config->modes.haveStencilBuffer, + config->modes.haveAccumBuffer, + GL_FALSE, /* alpha */ + GL_FALSE /* aux bufs */); + + return buf; +} + +static void +driDestroyDrawable(__DRIdrawable *buf) +{ + TRACE; + + if (buf) { + struct gl_framebuffer *fb = &buf->Base; + + _mesa_free(buf->row); + + fb->DeletePending = GL_TRUE; + _mesa_unreference_framebuffer(&fb); + } +} + +static void driSwapBuffers(__DRIdrawable *buf) +{ + GET_CURRENT_CONTEXT(ctx); + + struct swrast_renderbuffer *frontrb = + swrast_renderbuffer(buf->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + struct swrast_renderbuffer *backrb = + swrast_renderbuffer(buf->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer); + + __DRIscreen *screen = buf->driScreenPriv; + + TRACE; + + /* check for signle-buffered */ + if (backrb == NULL) + return; + + /* check if swapping currently bound buffer */ + if (ctx && ctx->DrawBuffer == &(buf->Base)) { + /* flush pending rendering */ + _mesa_notifySwapBuffers(ctx); + } + + screen->swrast_loader->putImage(buf, __DRI_SWRAST_IMAGE_OP_SWAP, + 0, 0, + frontrb->Base.Width, + frontrb->Base.Height, + backrb->Base.Data, + buf->loaderPrivate); +} + + +/** + * General device driver functions. + */ + +static void +get_window_size( GLframebuffer *fb, GLsizei *w, GLsizei *h ) +{ + __DRIdrawable *buf = swrast_drawable(fb); + __DRIscreen *screen = buf->driScreenPriv; + int x, y; + + screen->swrast_loader->getDrawableInfo(buf, + &x, &y, w, h, + buf->loaderPrivate); +} + +static void +swrast_check_and_update_window_size( GLcontext *ctx, GLframebuffer *fb ) +{ + GLsizei width, height; + + get_window_size(fb, &width, &height); + if (fb->Width != width || fb->Height != height) { + _mesa_resize_framebuffer(ctx, fb, width, height); + } +} + +static const GLubyte * +get_string(GLcontext *ctx, GLenum pname) +{ + (void) ctx; + switch (pname) { + case GL_VENDOR: + return (const GLubyte *) "Mesa Project"; + case GL_RENDERER: + return (const GLubyte *) "Software Rasterizer"; + default: + return NULL; + } +} + +static void +update_state( GLcontext *ctx, GLuint new_state ) +{ + /* not much to do here - pass it on */ + _swrast_InvalidateState( ctx, new_state ); + _swsetup_InvalidateState( ctx, new_state ); + _vbo_InvalidateState( ctx, new_state ); + _tnl_InvalidateState( ctx, new_state ); +} + +static void +viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) +{ + GLframebuffer *draw = ctx->WinSysDrawBuffer; + GLframebuffer *read = ctx->WinSysReadBuffer; + + swrast_check_and_update_window_size(ctx, draw); + swrast_check_and_update_window_size(ctx, read); +} + +static void +swrast_init_driver_functions(struct dd_function_table *driver) +{ + driver->GetString = get_string; + driver->UpdateState = update_state; + driver->GetBufferSize = NULL; + driver->Viewport = viewport; +} + + +/** + * Context-related functions. + */ + +static __DRIcontext * +driCreateNewContext(__DRIscreen *screen, const __DRIconfig *config, + __DRIcontext *shared, void *data) +{ + __DRIcontext *ctx; + GLcontext *mesaCtx; + struct dd_function_table functions; + + TRACE; + + ctx = _mesa_calloc(sizeof *ctx); + if (!ctx) + return NULL; + + ctx->loaderPrivate = data; + + ctx->driScreenPriv = screen; + + /* build table of device driver functions */ + _mesa_init_driver_functions(&functions); + swrast_init_driver_functions(&functions); + + if (!_mesa_initialize_context(&ctx->Base, &config->modes, + shared ? &shared->Base : NULL, + &functions, (void *) ctx)) { + _mesa_free(ctx); + return NULL; + } + + mesaCtx = &ctx->Base; + + /* do bounds checking to prevent segfaults and server crashes! */ + mesaCtx->Const.CheckArrayBounds = GL_TRUE; + + /* create module contexts */ + _swrast_CreateContext( mesaCtx ); + _vbo_CreateContext( mesaCtx ); + _tnl_CreateContext( mesaCtx ); + _swsetup_CreateContext( mesaCtx ); + _swsetup_Wakeup( mesaCtx ); + + /* use default TCL pipeline */ + { + TNLcontext *tnl = TNL_CONTEXT(mesaCtx); + tnl->Driver.RunPipeline = _tnl_run_pipeline; + } + + _mesa_enable_sw_extensions(mesaCtx); + _mesa_enable_1_3_extensions(mesaCtx); + _mesa_enable_1_4_extensions(mesaCtx); + _mesa_enable_1_5_extensions(mesaCtx); + _mesa_enable_2_0_extensions(mesaCtx); + _mesa_enable_2_1_extensions(mesaCtx); + + return ctx; +} + +static void +driDestroyContext(__DRIcontext *ctx) +{ + GLcontext *mesaCtx; + TRACE; + + if (ctx) { + mesaCtx = &ctx->Base; + _swsetup_DestroyContext( mesaCtx ); + _swrast_DestroyContext( mesaCtx ); + _tnl_DestroyContext( mesaCtx ); + _vbo_DestroyContext( mesaCtx ); + _mesa_destroy_context( mesaCtx ); + } +} + +static int +driCopyContext(__DRIcontext *dst, __DRIcontext *src, unsigned long mask) +{ + TRACE; + + _mesa_copy_context(&src->Base, &dst->Base, mask); + return GL_TRUE; +} + +static int driBindContext(__DRIcontext *ctx, + __DRIdrawable *draw, + __DRIdrawable *read) +{ + GLcontext *mesaCtx; + GLframebuffer *mesaDraw; + GLframebuffer *mesaRead; + TRACE; + + if (ctx) { + if (!draw || !read) + return GL_FALSE; + + mesaCtx = &ctx->Base; + mesaDraw = &draw->Base; + mesaRead = &read->Base; + + /* check for same context and buffer */ + if (mesaCtx == _mesa_get_current_context() + && mesaCtx->DrawBuffer == mesaDraw + && mesaCtx->ReadBuffer == mesaRead) { + return GL_TRUE; + } + + _glapi_check_multithread(); + + swrast_check_and_update_window_size(mesaCtx, mesaDraw); + if (read != draw) + swrast_check_and_update_window_size(mesaCtx, mesaRead); + + _mesa_make_current( mesaCtx, + mesaDraw, + mesaRead ); + } + else { + /* unbind */ + _mesa_make_current( NULL, NULL, NULL ); + } + + return GL_TRUE; +} + +static int driUnbindContext(__DRIcontext *ctx) +{ + TRACE; + (void) ctx; + _mesa_make_current(NULL, NULL, NULL); + return GL_TRUE; +} + + +static const __DRIcoreExtension driCoreExtension = { + { __DRI_CORE, __DRI_CORE_VERSION }, + NULL, /* driCreateNewScreen */ + driDestroyScreen, + driGetExtensions, + driGetConfigAttrib, + driIndexConfigAttrib, + NULL, /* driCreateNewDrawable */ + driDestroyDrawable, + driSwapBuffers, + driCreateNewContext, + driCopyContext, + driDestroyContext, + driBindContext, + driUnbindContext +}; + +static const __DRIswrastExtension driSWRastExtension = { + { __DRI_SWRAST, __DRI_SWRAST_VERSION }, + driCreateNewScreen, + driCreateNewDrawable +}; + +/* This is the table of extensions that the loader will dlsym() for. */ +PUBLIC const __DRIextension *__driDriverExtensions[] = { + &driCoreExtension.base, + &driSWRastExtension.base, + NULL +}; diff --git a/src/mesa/drivers/dri/swrast/swrast_priv.h b/src/mesa/drivers/dri/swrast/swrast_priv.h new file mode 100644 index 0000000000..a3e3922f12 --- /dev/null +++ b/src/mesa/drivers/dri/swrast/swrast_priv.h @@ -0,0 +1,142 @@ +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * George Sapountzis <gsap7@yahoo.gr> + */ + + +#ifndef _SWRAST_PRIV_H +#define _SWRAST_PRIV_H + +#include <GL/gl.h> +#include <GL/internal/dri_interface.h> +#include "mtypes.h" + + +/** + * Debugging + */ +#define DEBUG_CORE 0 +#define DEBUG_SPAN 0 + +#if DEBUG_CORE +#define TRACE _mesa_printf("--> %s\n", __FUNCTION__) +#else +#define TRACE +#endif + +#if DEBUG_SPAN +#define TRACE_SPAN _mesa_printf("--> %s\n", __FUNCTION__) +#else +#define TRACE_SPAN +#endif + + +/** + * Data types + */ +struct __DRIscreenRec { + int num; + + const __DRIextension **extensions; + + const __DRIswrastLoaderExtension *swrast_loader; +}; + +struct __DRIcontextRec { + GLcontext Base; + + void *loaderPrivate; + + __DRIscreen *driScreenPriv; +}; + +struct __DRIdrawableRec { + GLframebuffer Base; + + void *loaderPrivate; + + __DRIscreen *driScreenPriv; + + /* scratch row for optimized front-buffer rendering */ + char *row; +}; + +struct swrast_renderbuffer { + struct gl_renderbuffer Base; + + /* renderbuffer pitch (in bytes) */ + GLuint pitch; +}; + +static inline __DRIcontext * +swrast_context(GLcontext *ctx) +{ + return (__DRIcontext *) ctx; +} + +static inline __DRIdrawable * +swrast_drawable(GLframebuffer *fb) +{ + return (__DRIdrawable *) fb; +} + +static inline struct swrast_renderbuffer * +swrast_renderbuffer(struct gl_renderbuffer *rb) +{ + return (struct swrast_renderbuffer *) rb; +} + + +/** + * Pixel formats we support + */ +#define PF_CI8 1 /**< Color Index mode */ +#define PF_A8R8G8B8 2 /**< 32-bit TrueColor: 8-A, 8-R, 8-G, 8-B bits */ +#define PF_R5G6B5 3 /**< 16-bit TrueColor: 5-R, 6-G, 5-B bits */ +#define PF_R3G3B2 4 /**< 8-bit TrueColor: 3-R, 3-G, 2-B bits */ + + +/** + * Renderbuffer pitch alignment (in bits). + * + * The xorg loader requires padding images to 32 bits. However, this should + * become a screen/drawable parameter XXX + */ +#define PITCH_ALIGN_BITS 32 + + +/* swrast_span.c */ + +extern void +swrast_set_span_funcs_back(struct swrast_renderbuffer *xrb, + GLuint pixel_format); + +extern void +swrast_set_span_funcs_front(struct swrast_renderbuffer *xrb, + GLuint pixel_format); + +#endif /* _SWRAST_PRIV_H_ */ diff --git a/src/mesa/drivers/dri/swrast/swrast_span.c b/src/mesa/drivers/dri/swrast/swrast_span.c new file mode 100644 index 0000000000..5e990368b2 --- /dev/null +++ b/src/mesa/drivers/dri/swrast/swrast_span.c @@ -0,0 +1,367 @@ +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * George Sapountzis <gsap7@yahoo.gr> + */ + +#include "swrast_priv.h" + +#define YFLIP(_xrb, Y) ((_xrb)->Base.Height - (Y) - 1) + +/* + * Dithering support takes the "computation" extreme in the "computation vs. + * storage" trade-off. This approach is very simple to implement and any + * computational overhead should be acceptable. XMesa uses table lookups for + * around 8KB of storage overhead per visual. + */ +#define DITHER 1 + +static const GLubyte kernel[16] = { + 0*16, 8*16, 2*16, 10*16, + 12*16, 4*16, 14*16, 6*16, + 3*16, 11*16, 1*16, 9*16, + 15*16, 7*16, 13*16, 5*16, +}; + +#if DITHER +#define DITHER_COMP(X, Y) kernel[((X) & 0x3) | (((Y) & 0x3) << 2)] + +#define DITHER_CLAMP(X) (((X) < CHAN_MAX) ? (X) : CHAN_MAX) +#else +#define DITHER_COMP(X, Y) 0 + +#define DITHER_CLAMP(X) (X) +#endif + + +/* + * Pixel macros shared across front/back buffer span functions. + */ + +/* 32-bit BGRA */ +#define STORE_PIXEL_A8R8G8B8(DST, X, Y, VALUE) \ + DST[3] = VALUE[ACOMP]; \ + DST[2] = VALUE[RCOMP]; \ + DST[1] = VALUE[GCOMP]; \ + DST[0] = VALUE[BCOMP] +#define STORE_PIXEL_RGB_A8R8G8B8(DST, X, Y, VALUE) \ + DST[3] = 0xff; \ + DST[2] = VALUE[RCOMP]; \ + DST[1] = VALUE[GCOMP]; \ + DST[0] = VALUE[BCOMP] +#define FETCH_PIXEL_A8R8G8B8(DST, SRC) \ + DST[ACOMP] = SRC[3]; \ + DST[RCOMP] = SRC[2]; \ + DST[GCOMP] = SRC[1]; \ + DST[BCOMP] = SRC[0] + + +/* 16-bit BGR */ +#define STORE_PIXEL_R5G6B5(DST, X, Y, VALUE) \ + do { \ + int d = DITHER_COMP(X, Y) >> 6; \ + GLushort *p = (GLushort *)DST; \ + *p = ( ((DITHER_CLAMP((VALUE[RCOMP]) + d) & 0xf8) << 8) | \ + ((DITHER_CLAMP((VALUE[GCOMP]) + d) & 0xfc) << 3) | \ + ((DITHER_CLAMP((VALUE[BCOMP]) + d) & 0xf8) >> 3) ); \ + } while(0) +#define FETCH_PIXEL_R5G6B5(DST, SRC) \ + do { \ + GLushort p = *(GLushort *)SRC; \ + DST[ACOMP] = 0xff; \ + DST[RCOMP] = ((p >> 8) & 0xf8) * 255 / 0xf8; \ + DST[GCOMP] = ((p >> 3) & 0xfc) * 255 / 0xfc; \ + DST[BCOMP] = ((p << 3) & 0xf8) * 255 / 0xf8; \ + } while(0) + + +/* 8-bit BGR */ +#define STORE_PIXEL_R3G3B2(DST, X, Y, VALUE) \ + do { \ + int d = DITHER_COMP(X, Y) >> 3; \ + GLubyte *p = (GLubyte *)DST; \ + *p = ( ((DITHER_CLAMP((VALUE[RCOMP]) + d) & 0xe0) >> 5) | \ + ((DITHER_CLAMP((VALUE[GCOMP]) + d) & 0xe0) >> 2) | \ + ((DITHER_CLAMP((VALUE[BCOMP]) + d) & 0xc0) >> 0) ); \ + } while(0) +#define FETCH_PIXEL_R3G3B2(DST, SRC) \ + do { \ + GLubyte p = *(GLubyte *)SRC; \ + DST[ACOMP] = 0xff; \ + DST[RCOMP] = ((p << 5) & 0xe0) * 255 / 0xe0; \ + DST[GCOMP] = ((p << 2) & 0xe0) * 255 / 0xe0; \ + DST[BCOMP] = ((p << 0) & 0xc0) * 255 / 0xc0; \ + } while(0) + + +/* + * Generate code for back-buffer span functions. + */ + +/* 32-bit BGRA */ +#define NAME(FUNC) FUNC##_A8R8G8B8 +#define RB_TYPE GLubyte +#define SPAN_VARS \ + struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb); +#define INIT_PIXEL_PTR(P, X, Y) \ + GLubyte *P = (GLubyte *)xrb->Base.Data + YFLIP(xrb, Y) * xrb->pitch + (X) * 4; +#define INC_PIXEL_PTR(P) P += 4 +#define STORE_PIXEL(DST, X, Y, VALUE) \ + STORE_PIXEL_A8R8G8B8(DST, X, Y, VALUE) +#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \ + STORE_PIXEL_RGB_A8R8G8B8(DST, X, Y, VALUE) +#define FETCH_PIXEL(DST, SRC) \ + FETCH_PIXEL_A8R8G8B8(DST, SRC) + +#include "swrast/s_spantemp.h" + + +/* 16-bit BGR */ +#define NAME(FUNC) FUNC##_R5G6B5 +#define RB_TYPE GLubyte +#define SPAN_VARS \ + struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb); +#define INIT_PIXEL_PTR(P, X, Y) \ + GLubyte *P = (GLubyte *)xrb->Base.Data + YFLIP(xrb, Y) * xrb->pitch + (X) * 2; +#define INC_PIXEL_PTR(P) P += 2 +#define STORE_PIXEL(DST, X, Y, VALUE) \ + STORE_PIXEL_R5G6B5(DST, X, Y, VALUE) +#define FETCH_PIXEL(DST, SRC) \ + FETCH_PIXEL_R5G6B5(DST, SRC) + +#include "swrast/s_spantemp.h" + + +/* 8-bit BGR */ +#define NAME(FUNC) FUNC##_R3G3B2 +#define RB_TYPE GLubyte +#define SPAN_VARS \ + struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb); +#define INIT_PIXEL_PTR(P, X, Y) \ + GLubyte *P = (GLubyte *)xrb->Base.Data + YFLIP(xrb, Y) * xrb->pitch + (X) * 1; +#define INC_PIXEL_PTR(P) P += 1 +#define STORE_PIXEL(DST, X, Y, VALUE) \ + STORE_PIXEL_R3G3B2(DST, X, Y, VALUE) +#define FETCH_PIXEL(DST, SRC) \ + FETCH_PIXEL_R3G3B2(DST, SRC) + +#include "swrast/s_spantemp.h" + + +/* 8-bit color index */ +#define NAME(FUNC) FUNC##_CI8 +#define CI_MODE +#define RB_TYPE GLubyte +#define SPAN_VARS \ + struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb); +#define INIT_PIXEL_PTR(P, X, Y) \ + GLubyte *P = (GLubyte *)xrb->Base.Data + YFLIP(xrb, Y) * xrb->pitch + (X); +#define INC_PIXEL_PTR(P) P += 1 +#define STORE_PIXEL(DST, X, Y, VALUE) \ + *DST = VALUE[0] +#define FETCH_PIXEL(DST, SRC) \ + DST = SRC[0] + +#include "swrast/s_spantemp.h" + + +/* + * Generate code for front-buffer span functions. + */ + +/* 32-bit BGRA */ +#define NAME(FUNC) FUNC##_A8R8G8B8_front +#define RB_TYPE GLubyte +#define SPAN_VARS \ + struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb); +#define INIT_PIXEL_PTR(P, X, Y) \ + GLubyte *P = (GLubyte *)row; +#define INC_PIXEL_PTR(P) P += 4 +#define STORE_PIXEL(DST, X, Y, VALUE) \ + STORE_PIXEL_A8R8G8B8(DST, X, Y, VALUE) +#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \ + STORE_PIXEL_RGB_A8R8G8B8(DST, X, Y, VALUE) +#define FETCH_PIXEL(DST, SRC) \ + FETCH_PIXEL_A8R8G8B8(DST, SRC) + +#include "swrast_spantemp.h" + + +/* 16-bit BGR */ +#define NAME(FUNC) FUNC##_R5G6B5_front +#define RB_TYPE GLubyte +#define SPAN_VARS \ + struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb); +#define INIT_PIXEL_PTR(P, X, Y) \ + GLubyte *P = (GLubyte *)row; +#define INC_PIXEL_PTR(P) P += 2 +#define STORE_PIXEL(DST, X, Y, VALUE) \ + STORE_PIXEL_R5G6B5(DST, X, Y, VALUE) +#define FETCH_PIXEL(DST, SRC) \ + FETCH_PIXEL_R5G6B5(DST, SRC) + +#include "swrast_spantemp.h" + + +/* 8-bit BGR */ +#define NAME(FUNC) FUNC##_R3G3B2_front +#define RB_TYPE GLubyte +#define SPAN_VARS \ + struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb); +#define INIT_PIXEL_PTR(P, X, Y) \ + GLubyte *P = (GLubyte *)row; +#define INC_PIXEL_PTR(P) P += 1 +#define STORE_PIXEL(DST, X, Y, VALUE) \ + STORE_PIXEL_R3G3B2(DST, X, Y, VALUE) +#define FETCH_PIXEL(DST, SRC) \ + FETCH_PIXEL_R3G3B2(DST, SRC) + +#include "swrast_spantemp.h" + + +/* 8-bit color index */ +#define NAME(FUNC) FUNC##_CI8_front +#define CI_MODE +#define RB_TYPE GLubyte +#define SPAN_VARS \ + struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb); +#define INIT_PIXEL_PTR(P, X, Y) \ + GLubyte *P = (GLubyte *)row; +#define INC_PIXEL_PTR(P) P += 1 +#define STORE_PIXEL(DST, X, Y, VALUE) \ + *DST = VALUE[0] +#define FETCH_PIXEL(DST, SRC) \ + DST = SRC[0] + +#include "swrast_spantemp.h" + + +/* + * Back-buffers are malloced memory and always private. + * + * BACK_PIXMAP (not supported) + * BACK_XIMAGE + */ +void +swrast_set_span_funcs_back(struct swrast_renderbuffer *xrb, + GLuint pixel_format) +{ + switch (pixel_format) { + case PF_A8R8G8B8: + xrb->Base.GetRow = get_row_A8R8G8B8; + xrb->Base.GetValues = get_values_A8R8G8B8; + xrb->Base.PutRow = put_row_A8R8G8B8; + xrb->Base.PutRowRGB = put_row_rgb_A8R8G8B8; + xrb->Base.PutMonoRow = put_mono_row_A8R8G8B8; + xrb->Base.PutValues = put_values_A8R8G8B8; + xrb->Base.PutMonoValues = put_mono_values_A8R8G8B8; + break; + case PF_R5G6B5: + xrb->Base.GetRow = get_row_R5G6B5; + xrb->Base.GetValues = get_values_R5G6B5; + xrb->Base.PutRow = put_row_R5G6B5; + xrb->Base.PutRowRGB = put_row_rgb_R5G6B5; + xrb->Base.PutMonoRow = put_mono_row_R5G6B5; + xrb->Base.PutValues = put_values_R5G6B5; + xrb->Base.PutMonoValues = put_mono_values_R5G6B5; + break; + case PF_R3G3B2: + xrb->Base.GetRow = get_row_R3G3B2; + xrb->Base.GetValues = get_values_R3G3B2; + xrb->Base.PutRow = put_row_R3G3B2; + xrb->Base.PutRowRGB = put_row_rgb_R3G3B2; + xrb->Base.PutMonoRow = put_mono_row_R3G3B2; + xrb->Base.PutValues = put_values_R3G3B2; + xrb->Base.PutMonoValues = put_mono_values_R3G3B2; + break; + case PF_CI8: + xrb->Base.GetRow = get_row_CI8; + xrb->Base.GetValues = get_values_CI8; + xrb->Base.PutRow = put_row_CI8; + xrb->Base.PutMonoRow = put_mono_row_CI8; + xrb->Base.PutValues = put_values_CI8; + xrb->Base.PutMonoValues = put_mono_values_CI8; + break; + default: + assert(0); + return; + } +} + + +/* + * Front-buffers are provided by the loader, the xorg loader uses pixmaps. + * + * WINDOW, An X window + * GLXWINDOW, GLX window + * PIXMAP, GLX pixmap + * PBUFFER GLX Pbuffer + */ +void +swrast_set_span_funcs_front(struct swrast_renderbuffer *xrb, + GLuint pixel_format) +{ + switch (pixel_format) { + case PF_A8R8G8B8: + xrb->Base.GetRow = get_row_A8R8G8B8_front; + xrb->Base.GetValues = get_values_A8R8G8B8_front; + xrb->Base.PutRow = put_row_A8R8G8B8_front; + xrb->Base.PutRowRGB = put_row_rgb_A8R8G8B8_front; + xrb->Base.PutMonoRow = put_mono_row_A8R8G8B8_front; + xrb->Base.PutValues = put_values_A8R8G8B8_front; + xrb->Base.PutMonoValues = put_mono_values_A8R8G8B8_front; + break; + case PF_R5G6B5: + xrb->Base.GetRow = get_row_R5G6B5_front; + xrb->Base.GetValues = get_values_R5G6B5_front; + xrb->Base.PutRow = put_row_R5G6B5_front; + xrb->Base.PutRowRGB = put_row_rgb_R5G6B5_front; + xrb->Base.PutMonoRow = put_mono_row_R5G6B5_front; + xrb->Base.PutValues = put_values_R5G6B5_front; + xrb->Base.PutMonoValues = put_mono_values_R5G6B5_front; + break; + case PF_R3G3B2: + xrb->Base.GetRow = get_row_R3G3B2_front; + xrb->Base.GetValues = get_values_R3G3B2_front; + xrb->Base.PutRow = put_row_R3G3B2_front; + xrb->Base.PutRowRGB = put_row_rgb_R3G3B2_front; + xrb->Base.PutMonoRow = put_mono_row_R3G3B2_front; + xrb->Base.PutValues = put_values_R3G3B2_front; + xrb->Base.PutMonoValues = put_mono_values_R3G3B2_front; + break; + case PF_CI8: + xrb->Base.GetRow = get_row_CI8_front; + xrb->Base.GetValues = get_values_CI8_front; + xrb->Base.PutRow = put_row_CI8_front; + xrb->Base.PutMonoRow = put_mono_row_CI8_front; + xrb->Base.PutValues = put_values_CI8_front; + xrb->Base.PutMonoValues = put_mono_values_CI8_front; + break; + default: + assert(0); + return; + } +} diff --git a/src/mesa/drivers/dri/swrast/swrast_spantemp.h b/src/mesa/drivers/dri/swrast/swrast_spantemp.h new file mode 100644 index 0000000000..e7a9c86d7d --- /dev/null +++ b/src/mesa/drivers/dri/swrast/swrast_spantemp.h @@ -0,0 +1,328 @@ +/* + * Mesa 3-D graphics library + * Version: 6.5.1 + * + * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/* + * Modified version of swrast/s_spantemp.h for front-buffer rendering. The + * no-mask paths use a scratch row to avoid repeated calls to the loader. + * + * For the mask paths we always use an array of 4 elements of RB_TYPE. This is + * to satisfy the xorg loader requirement of an image pitch of 32 bits and + * should be ok for other loaders also. + */ + + +#ifndef _SWRAST_SPANTEMP_ONCE +#define _SWRAST_SPANTEMP_ONCE + +static inline void +PUT_PIXEL( GLcontext *glCtx, GLint x, GLint y, GLubyte *p ) +{ + __DRIcontext *ctx = swrast_context(glCtx); + __DRIdrawable *draw = swrast_drawable(glCtx->DrawBuffer); + + __DRIscreen *screen = ctx->driScreenPriv; + + screen->swrast_loader->putImage(draw, __DRI_SWRAST_IMAGE_OP_DRAW, + x, y, 1, 1, (char *)p, + draw->loaderPrivate); +} + + +static inline void +GET_PIXEL( GLcontext *glCtx, GLint x, GLint y, GLubyte *p ) +{ + __DRIcontext *ctx = swrast_context(glCtx); + __DRIdrawable *read = swrast_drawable(glCtx->ReadBuffer); + + __DRIscreen *screen = ctx->driScreenPriv; + + screen->swrast_loader->getImage(read, x, y, 1, 1, (char *)p, + read->loaderPrivate); +} + +static inline void +PUT_ROW( GLcontext *glCtx, GLint x, GLint y, GLuint n, char *row ) +{ + __DRIcontext *ctx = swrast_context(glCtx); + __DRIdrawable *draw = swrast_drawable(glCtx->DrawBuffer); + + __DRIscreen *screen = ctx->driScreenPriv; + + screen->swrast_loader->putImage(draw, __DRI_SWRAST_IMAGE_OP_DRAW, + x, y, n, 1, row, + draw->loaderPrivate); +} + +static inline void +GET_ROW( GLcontext *glCtx, GLint x, GLint y, GLuint n, char *row ) +{ + __DRIcontext *ctx = swrast_context(glCtx); + __DRIdrawable *read = swrast_drawable(glCtx->ReadBuffer); + + __DRIscreen *screen = ctx->driScreenPriv; + + screen->swrast_loader->getImage(read, x, y, n, 1, row, + read->loaderPrivate); +} + +#endif /* _SWRAST_SPANTEMP_ONCE */ + + +/* + * Templates for the span/pixel-array write/read functions called via + * the gl_renderbuffer's GetRow, GetValues, PutRow, PutMonoRow, PutValues + * and PutMonoValues functions. + * + * Define the following macros before including this file: + * NAME(BASE) to generate the function name (i.e. add prefix or suffix) + * RB_TYPE the renderbuffer DataType + * CI_MODE if set, color index mode, else RGBA + * SPAN_VARS to declare any local variables + * INIT_PIXEL_PTR(P, X, Y) to initialize a pointer to a pixel + * INC_PIXEL_PTR(P) to increment a pixel pointer by one pixel + * STORE_PIXEL(DST, X, Y, VALUE) to store pixel values in buffer + * FETCH_PIXEL(DST, SRC) to fetch pixel values from buffer + * + * Note that in the STORE_PIXEL macros, we also pass in the (X,Y) coordinates + * for the pixels to be stored. This is useful when dithering and probably + * ignored otherwise. + */ + +#include "macros.h" + + +#ifdef CI_MODE +#define RB_COMPONENTS 1 +#elif !defined(RB_COMPONENTS) +#define RB_COMPONENTS 4 +#endif + + +static void +NAME(get_row)( GLcontext *ctx, struct gl_renderbuffer *rb, + GLuint count, GLint x, GLint y, void *values ) +{ +#ifdef SPAN_VARS + SPAN_VARS +#endif +#ifdef CI_MODE + RB_TYPE *dest = (RB_TYPE *) values; +#else + RB_TYPE (*dest)[RB_COMPONENTS] = (RB_TYPE (*)[RB_COMPONENTS]) values; +#endif + GLuint i; + char *row = swrast_drawable(ctx->ReadBuffer)->row; + INIT_PIXEL_PTR(pixel, x, y); + GET_ROW( ctx, x, YFLIP(xrb, y), count, row ); + for (i = 0; i < count; i++) { + FETCH_PIXEL(dest[i], pixel); + INC_PIXEL_PTR(pixel); + } + (void) rb; +} + + +static void +NAME(get_values)( GLcontext *ctx, struct gl_renderbuffer *rb, + GLuint count, const GLint x[], const GLint y[], void *values ) +{ +#ifdef SPAN_VARS + SPAN_VARS +#endif +#ifdef CI_MODE + RB_TYPE *dest = (RB_TYPE *) values; +#else + RB_TYPE (*dest)[RB_COMPONENTS] = (RB_TYPE (*)[RB_COMPONENTS]) values; +#endif + GLuint i; + for (i = 0; i < count; i++) { + RB_TYPE pixel[4]; + GET_PIXEL(ctx, x[i], YFLIP(xrb, y[i]), pixel); + FETCH_PIXEL(dest[i], pixel); + } + (void) rb; +} + + +static void +NAME(put_row)( GLcontext *ctx, struct gl_renderbuffer *rb, + GLuint count, GLint x, GLint y, + const void *values, const GLubyte mask[] ) +{ +#ifdef SPAN_VARS + SPAN_VARS +#endif + const RB_TYPE (*src)[RB_COMPONENTS] = (const RB_TYPE (*)[RB_COMPONENTS]) values; + GLuint i; + if (mask) { + for (i = 0; i < count; i++) { + if (mask[i]) { + RB_TYPE pixel[4]; + STORE_PIXEL(pixel, x + i, y, src[i]); + PUT_PIXEL(ctx, x + i, YFLIP(xrb, y), pixel); + } + } + } + else { + char *row = swrast_drawable(ctx->DrawBuffer)->row; + INIT_PIXEL_PTR(pixel, x, y); + for (i = 0; i < count; i++) { + STORE_PIXEL(pixel, x + i, y, src[i]); + INC_PIXEL_PTR(pixel); + } + PUT_ROW( ctx, x, YFLIP(xrb, y), count, row ); + } + (void) rb; +} + + +#if !defined(CI_MODE) +static void +NAME(put_row_rgb)( GLcontext *ctx, struct gl_renderbuffer *rb, + GLuint count, GLint x, GLint y, + const void *values, const GLubyte mask[] ) +{ +#ifdef SPAN_VARS + SPAN_VARS +#endif + const RB_TYPE (*src)[3] = (const RB_TYPE (*)[3]) values; + GLuint i; + if (mask) { + for (i = 0; i < count; i++) { + if (mask[i]) { + RB_TYPE pixel[4]; +#ifdef STORE_PIXEL_RGB + STORE_PIXEL_RGB(pixel, x + i, y, src[i]); +#else + STORE_PIXEL(pixel, x + i, y, src[i]); +#endif + PUT_PIXEL(ctx, x + i, YFLIP(xrb, y), pixel); + } + } + } + else { + char *row = swrast_drawable(ctx->DrawBuffer)->row; + INIT_PIXEL_PTR(pixel, x, y); + for (i = 0; i < count; i++) { +#ifdef STORE_PIXEL_RGB + STORE_PIXEL_RGB(pixel, x + i, y, src[i]); +#else + STORE_PIXEL(pixel, x + i, y, src[i]); +#endif + INC_PIXEL_PTR(pixel); + } + PUT_ROW( ctx, x, YFLIP(xrb, y), count, row ); + } + (void) rb; +} +#endif + + +static void +NAME(put_mono_row)( GLcontext *ctx, struct gl_renderbuffer *rb, + GLuint count, GLint x, GLint y, + const void *value, const GLubyte mask[] ) +{ +#ifdef SPAN_VARS + SPAN_VARS +#endif + const RB_TYPE *src = (const RB_TYPE *) value; + GLuint i; + if (mask) { + for (i = 0; i < count; i++) { + if (mask[i]) { + RB_TYPE pixel[4]; + STORE_PIXEL(pixel, x + i, y, src); + PUT_PIXEL(ctx, x + i, YFLIP(xrb, y), pixel); + } + } + } + else { + char *row = swrast_drawable(ctx->DrawBuffer)->row; + INIT_PIXEL_PTR(pixel, x, y); + for (i = 0; i < count; i++) { + STORE_PIXEL(pixel, x + i, y, src); + INC_PIXEL_PTR(pixel); + } + PUT_ROW( ctx, x, YFLIP(xrb, y), count, row ); + } + (void) rb; +} + + +static void +NAME(put_values)( GLcontext *ctx, struct gl_renderbuffer *rb, + GLuint count, const GLint x[], const GLint y[], + const void *values, const GLubyte mask[] ) +{ +#ifdef SPAN_VARS + SPAN_VARS +#endif + const RB_TYPE (*src)[RB_COMPONENTS] = (const RB_TYPE (*)[RB_COMPONENTS]) values; + GLuint i; + ASSERT(mask); + for (i = 0; i < count; i++) { + if (mask[i]) { + RB_TYPE pixel[4]; + STORE_PIXEL(pixel, x[i], y[i], src[i]); + PUT_PIXEL(ctx, x[i], YFLIP(xrb, y[i]), pixel); + } + } + (void) rb; +} + + +static void +NAME(put_mono_values)( GLcontext *ctx, struct gl_renderbuffer *rb, + GLuint count, const GLint x[], const GLint y[], + const void *value, const GLubyte mask[] ) +{ +#ifdef SPAN_VARS + SPAN_VARS +#endif + const RB_TYPE *src = (const RB_TYPE *) value; + GLuint i; + ASSERT(mask); + for (i = 0; i < count; i++) { + if (mask[i]) { + RB_TYPE pixel[4]; + STORE_PIXEL(pixel, x[i], y[i], src); + PUT_PIXEL(ctx, x[i], YFLIP(xrb, y[i]), pixel); + } + } + (void) rb; +} + + +#undef NAME +#undef RB_TYPE +#undef RB_COMPONENTS +#undef CI_MODE +#undef SPAN_VARS +#undef INIT_PIXEL_PTR +#undef INC_PIXEL_PTR +#undef STORE_PIXEL +#undef STORE_PIXEL_RGB +#undef FETCH_PIXEL diff --git a/src/mesa/drivers/dri/unichrome/via_context.c b/src/mesa/drivers/dri/unichrome/via_context.c index bbb198bf0e..d9b4f2d112 100644 --- a/src/mesa/drivers/dri/unichrome/via_context.c +++ b/src/mesa/drivers/dri/unichrome/via_context.c @@ -679,46 +679,48 @@ void viaDestroyContext(__DRIcontextPrivate *driContextPriv) { GET_CURRENT_CONTEXT(ctx); - struct via_context *vmesa = + struct via_context *vmesa = (struct via_context *)driContextPriv->driverPrivate; struct via_context *current = ctx ? VIA_CONTEXT(ctx) : NULL; + assert(vmesa); /* should never be null */ + if (vmesa->driDrawable) { + viaWaitIdle(vmesa, GL_FALSE); + + if (vmesa->doPageFlip) { + LOCK_HARDWARE(vmesa); + if (vmesa->pfCurrentOffset != 0) { + fprintf(stderr, "%s - reset pf\n", __FUNCTION__); + viaResetPageFlippingLocked(vmesa); + } + UNLOCK_HARDWARE(vmesa); + } + } + /* check if we're deleting the currently bound context */ if (vmesa == current) { VIA_FLUSH_DMA(vmesa); _mesa_make_current(NULL, NULL, NULL); } - if (vmesa) { - viaWaitIdle(vmesa, GL_FALSE); - if (vmesa->doPageFlip) { - LOCK_HARDWARE(vmesa); - if (vmesa->pfCurrentOffset != 0) { - fprintf(stderr, "%s - reset pf\n", __FUNCTION__); - viaResetPageFlippingLocked(vmesa); - } - UNLOCK_HARDWARE(vmesa); - } - - _swsetup_DestroyContext(vmesa->glCtx); - _tnl_DestroyContext(vmesa->glCtx); - _vbo_DestroyContext(vmesa->glCtx); - _swrast_DestroyContext(vmesa->glCtx); - /* free the Mesa context */ - _mesa_destroy_context(vmesa->glCtx); - /* release our data */ - FreeBuffer(vmesa); + _swsetup_DestroyContext(vmesa->glCtx); + _tnl_DestroyContext(vmesa->glCtx); + _vbo_DestroyContext(vmesa->glCtx); + _swrast_DestroyContext(vmesa->glCtx); + /* free the Mesa context */ + _mesa_destroy_context(vmesa->glCtx); + /* release our data */ + FreeBuffer(vmesa); - assert (is_empty_list(&vmesa->tex_image_list[VIA_MEM_AGP])); - assert (is_empty_list(&vmesa->tex_image_list[VIA_MEM_VIDEO])); - assert (is_empty_list(&vmesa->tex_image_list[VIA_MEM_SYSTEM])); - assert (is_empty_list(&vmesa->freed_tex_buffers)); + assert (is_empty_list(&vmesa->tex_image_list[VIA_MEM_AGP])); + assert (is_empty_list(&vmesa->tex_image_list[VIA_MEM_VIDEO])); + assert (is_empty_list(&vmesa->tex_image_list[VIA_MEM_SYSTEM])); + assert (is_empty_list(&vmesa->freed_tex_buffers)); - driDestroyOptionCache(&vmesa->optionCache); + driDestroyOptionCache(&vmesa->optionCache); - FREE(vmesa); - } + FREE(vmesa); } diff --git a/src/mesa/drivers/dri/unichrome/via_screen.c b/src/mesa/drivers/dri/unichrome/via_screen.c index ca193bfa53..3648710533 100644 --- a/src/mesa/drivers/dri/unichrome/via_screen.c +++ b/src/mesa/drivers/dri/unichrome/via_screen.c @@ -24,8 +24,8 @@ #include <stdio.h> -#include "utils.h" #include "dri_util.h" +#include "utils.h" #include "glheader.h" #include "context.h" #include "framebuffer.h" diff --git a/src/mesa/drivers/dri/unichrome/via_tex.c b/src/mesa/drivers/dri/unichrome/via_tex.c index 0261a3ff17..15f15a89a6 100644 --- a/src/mesa/drivers/dri/unichrome/via_tex.c +++ b/src/mesa/drivers/dri/unichrome/via_tex.c @@ -820,9 +820,7 @@ static void viaTexImage(GLcontext *ctx, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + _mesa_generate_mipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); diff --git a/src/mesa/drivers/glslcompiler/Makefile b/src/mesa/drivers/glslcompiler/Makefile index 858457ddd4..dc4abd44d4 100644 --- a/src/mesa/drivers/glslcompiler/Makefile +++ b/src/mesa/drivers/glslcompiler/Makefile @@ -41,4 +41,4 @@ glslcompiler.o: glslcompiler.c clean: - rm -f *.o *~ $(PROGRAM) + -rm -f *.o *~ $(PROGRAM) diff --git a/src/mesa/drivers/windows/gdi/mesa.def b/src/mesa/drivers/windows/gdi/mesa.def index c525945426..3f2d644e86 100644 --- a/src/mesa/drivers/windows/gdi/mesa.def +++ b/src/mesa/drivers/windows/gdi/mesa.def @@ -867,8 +867,6 @@ EXPORTS _glapi_get_proc_address _mesa_add_soft_renderbuffers _mesa_add_renderbuffer - _mesa_attach_shader - _mesa_bind_attrib_location _mesa_buffer_data _mesa_buffer_get_subdata _mesa_buffer_map @@ -877,24 +875,20 @@ EXPORTS _mesa_bzero _mesa_calloc _mesa_choose_tex_format - _mesa_compile_shader _mesa_compressed_texture_size _mesa_create_framebuffer - _mesa_create_program - _mesa_create_shader _mesa_create_visual _mesa_delete_array_object _mesa_delete_buffer_object _mesa_delete_program - _mesa_delete_program2 - _mesa_delete_shader _mesa_delete_texture_object _mesa_destroy_framebuffer _mesa_destroy_visual - _mesa_detach_shader _mesa_enable_1_3_extensions _mesa_enable_1_4_extensions _mesa_enable_1_5_extensions + _mesa_enable_2_0_extensions + _mesa_enable_2_1_extensions _mesa_enable_sw_extensions _mesa_error _mesa_finish_render_texture @@ -902,28 +896,15 @@ EXPORTS _mesa_free _mesa_free_context_data _mesa_free_texture_image_data - _mesa_get_active_attrib - _mesa_get_active_uniform - _mesa_get_attached_shaders - _mesa_get_attrib_location + _mesa_generate_mipmap _mesa_get_compressed_teximage _mesa_get_current_context - _mesa_get_handle - _mesa_get_programiv - _mesa_get_program_info_log _mesa_get_program_register - _mesa_get_shaderiv - _mesa_get_shader_info_log - _mesa_get_shader_source _mesa_get_teximage - _mesa_get_uniformfv - _mesa_get_uniform_location _mesa_init_driver_functions + _mesa_init_glsl_driver_functions _mesa_init_renderbuffer _mesa_initialize_context - _mesa_is_program - _mesa_is_shader - _mesa_link_program _mesa_make_current _mesa_memcpy _mesa_memset @@ -942,7 +923,6 @@ EXPORTS _mesa_render_texture _mesa_ResizeBuffersMESA _mesa_resize_framebuffer - _mesa_shader_source _mesa_store_compressed_teximage1d _mesa_store_compressed_teximage2d _mesa_store_compressed_teximage3d @@ -957,12 +937,9 @@ EXPORTS _mesa_store_texsubimage3d _mesa_strcmp _mesa_test_proxy_teximage - _mesa_uniform - _mesa_uniform_matrix _mesa_unreference_framebuffer _mesa_update_framebuffer_visual _mesa_use_program - _mesa_validate_program _mesa_Viewport _swrast_Accum _swrast_Bitmap diff --git a/src/mesa/drivers/windows/gdi/wgl.c b/src/mesa/drivers/windows/gdi/wgl.c index f7028d01a3..0e2d2b3ca8 100644 --- a/src/mesa/drivers/windows/gdi/wgl.c +++ b/src/mesa/drivers/windows/gdi/wgl.c @@ -581,6 +581,13 @@ WINGDIAPI BOOL GLAPIENTRY wglUseFontBitmapsA(HDC hdc, DWORD first, return success; } +WINGDIAPI BOOL GLAPIENTRY wglShareLists(HGLRC hglrc1, + HGLRC hglrc2) +{ + WMesaShareLists((WMesaContext)hglrc1, (WMesaContext)hglrc2); + return(TRUE); +} + /* NOT IMPLEMENTED YET */ @@ -600,13 +607,6 @@ WINGDIAPI HGLRC GLAPIENTRY wglCreateLayerContext(HDC hdc, return(NULL); } -WINGDIAPI BOOL GLAPIENTRY wglShareLists(HGLRC hglrc1, - HGLRC hglrc2) -{ - WMesaShareLists(hglrc1, hglrc2); - return(TRUE); -} - WINGDIAPI BOOL GLAPIENTRY wglUseFontBitmapsW(HDC hdc, DWORD first, diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 7a170b4d3d..e5cd8f0b80 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -1183,11 +1183,12 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) * GLX_ARB_multisample */ case GLX_SAMPLE_BUFFERS_ARB: - /* ms not supported */ - return NULL; case GLX_SAMPLES_ARB: - /* ms not supported */ - return NULL; + parselist++; + if (*parselist++ != 0) + /* ms not supported */ + return NULL; + break; /* * FBConfig attribs. diff --git a/src/mesa/drivers/xorg/.gitignore b/src/mesa/drivers/xorg/.gitignore deleted file mode 100644 index 18a777939c..0000000000 --- a/src/mesa/drivers/xorg/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -glxheader.h -xmesaP.h -xm_* diff --git a/src/mesa/drivers/xorg/Makefile b/src/mesa/drivers/xorg/Makefile deleted file mode 100644 index a1b417447b..0000000000 --- a/src/mesa/drivers/xorg/Makefile +++ /dev/null @@ -1,95 +0,0 @@ -# src/mesa/drivers/xorg/Makefile - -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = libGLcore.so - -SYMLINKS = \ - glxheader.h \ - xmesaP.h \ - xm_api.c \ - xm_buffer.c \ - xm_dd.c \ - xm_image.c \ - xm_image.h \ - xm_line.c \ - xm_span.c \ - xm_tri.c - -C_SOURCES = \ - xm_api.c \ - xm_buffer.c \ - xm_dd.c \ - xm_image.c \ - xm_line.c \ - xm_span.c \ - xm_tri.c \ - glcore.c - -######################################## - -MESA_MODULES = $(TOP)/src/mesa/libmesa.a - -C_SOURCES += ../common/driverfuncs.c -ifeq ("${DRIVER_DIRS}", "dri") -C_SOURCES += ../dri/common/utils.c -endif - -OBJECTS = $(C_SOURCES:.c=.o) - -### Include directories -INCLUDES = \ - -I. \ - -I.. \ - -I$(TOP)/include \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main \ - -I$(TOP)/src/mesa/glapi \ - `pkg-config --cflags xorg-server` - -ifeq ("${DRIVER_DIRS}", "dri") -INCLUDES += \ - -I$(TOP)/src/mesa/drivers/dri/common \ - `pkg-config --cflags libdrm` -endif - -# undef 'USE_XSHM' to make it explicit that 'XFree86Server' takes precedence -DRIVER_DEFINES = -UUSE_XSHM -DXFree86Server - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - - -##### TARGETS ##### - -default: depend symlinks $(LIBNAME) - - -$(LIBNAME): $(OBJECTS) $(MESA_MODULES) Makefile - $(TOP)/bin/mklib -noprefix -o $@ \ - $(OBJECTS) $(MESA_MODULES) $(GLCORE_LIB_DEPS) - - -depend: $(C_SOURCES) $(SYMLINKS) - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \ - > /dev/null - - -clean: - -rm -f *.o *.so $(SYMLINKS) - -rm -f depend depend.bak - -install: $(LIBNAME) - $(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) - $(INSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) - -$(SYMLINKS): - @[ -e $@ ] || ln -sf ../x11/$@ ./ - -symlinks: $(SYMLINKS) - -include depend diff --git a/src/mesa/drivers/xorg/glcore.c b/src/mesa/drivers/xorg/glcore.c deleted file mode 100644 index a0199117c6..0000000000 --- a/src/mesa/drivers/xorg/glcore.c +++ /dev/null @@ -1,24 +0,0 @@ - -#define _NEED_GL_CORE_IF -#include <GL/xmesa.h> -#include <GL/internal/glcore.h> -#include "xmesaP.h" - -PUBLIC -__GLcoreModule GL_Core = { - XMesaCreateVisual, - XMesaDestroyVisual, - - XMesaCreateWindowBuffer, - XMesaCreatePixmapBuffer, - XMesaDestroyBuffer, - XMesaSwapBuffers, - XMesaResizeBuffers, - - XMesaCreateContext, - XMesaDestroyContext, - XMesaCopyContext, - XMesaMakeCurrent2, - XMesaForceCurrent, - XMesaLoseCurrent -}; diff --git a/src/mesa/glapi/Makefile b/src/mesa/glapi/Makefile index 6520f75e13..adc53d9249 100644 --- a/src/mesa/glapi/Makefile +++ b/src/mesa/glapi/Makefile @@ -7,27 +7,43 @@ TOP = ../../.. include $(TOP)/configs/current -GLX_DIR = ../../glx/x11 - OUTPUTS = glprocs.h glapitemp.h glapioffsets.h glapitable.h dispatch.h \ ../main/enums.c \ ../x86/glapi_x86.S \ ../x86-64/glapi_x86-64.S \ ../sparc/glapi_sparc.S \ ../drivers/dri/common/extension_helper.h \ - $(GLX_DIR)/indirect.c \ - $(GLX_DIR)/indirect.h \ - $(GLX_DIR)/indirect_init.c \ - $(GLX_DIR)/indirect_size.h \ - $(GLX_DIR)/indirect_size.c \ + ../../glx/x11/indirect.c \ + ../../glx/x11/indirect.h \ + ../../glx/x11/indirect_init.c \ + ../../glx/x11/indirect_size.h \ + ../../glx/x11/indirect_size.c + + +GLX_DIR = $(XORG_BASE)/glx + +SERVER_GLAPI_FILES = \ + $(GLX_DIR)/glapi.h \ + $(GLX_DIR)/glapi.c \ + $(GLX_DIR)/glthread.c \ + $(GLX_DIR)/glthread.h + +SERVER_OUTPUTS = \ $(GLX_DIR)/indirect_dispatch.c \ $(GLX_DIR)/indirect_dispatch_swap.c \ $(GLX_DIR)/indirect_dispatch.h \ $(GLX_DIR)/indirect_reqsize.c \ $(GLX_DIR)/indirect_reqsize.h \ + $(GLX_DIR)/indirect_size.h \ $(GLX_DIR)/indirect_size_get.c \ $(GLX_DIR)/indirect_size_get.h \ - $(GLX_DIR)/indirect_table.c + $(GLX_DIR)/indirect_table.c \ + $(GLX_DIR)/glapitemp.h \ + $(GLX_DIR)/glapitable.h \ + $(GLX_DIR)/glapioffsets.h \ + $(GLX_DIR)/glprocs.h \ + $(GLX_DIR)/dispatch.h \ + $(SERVER_GLAPI_FILES) API_XML = gl_API.xml \ EXT_framebuffer_object.xml \ @@ -36,21 +52,33 @@ API_XML = gl_API.xml \ COMMON = gl_XML.py glX_XML.py license.py $(API_XML) typeexpr.py COMMON_GLX = $(COMMON) glX_API.xml glX_XML.py glX_proto_common.py -all: $(OUTPUTS) +all: check-xorg-source $(OUTPUTS) $(SERVER_OUTPUTS) + +check-xorg-source: + @if ! test -d $(GLX_DIR); then \ + echo "ERROR: Must specify path to xserver checkout; set XORG_BASE."; \ + exit 1; \ + fi + +$(GLX_DIR)/%.c: %.c + cp $< $@ + +$(GLX_DIR)/%.h: %.h + cp $< $@ -glprocs.h: gl_procs.py $(COMMON) +glprocs.h $(GLX_DIR)/glprocs.h: gl_procs.py $(COMMON) $(PYTHON2) $(PYTHON_FLAGS) $< > $@ -glapitemp.h: gl_apitemp.py $(COMMON) +glapitemp.h $(GLX_DIR)/glapitemp.h: gl_apitemp.py $(COMMON) $(PYTHON2) $(PYTHON_FLAGS) $< > $@ -glapioffsets.h: gl_offsets.py $(COMMON) +glapioffsets.h $(GLX_DIR)/glapioffsets.h: gl_offsets.py $(COMMON) $(PYTHON2) $(PYTHON_FLAGS) $< > $@ -glapitable.h: gl_table.py $(COMMON) +glapitable.h $(GLX_DIR)/glapitable.h: gl_table.py $(COMMON) $(PYTHON2) $(PYTHON_FLAGS) $< > $@ -dispatch.h: gl_table.py $(COMMON) +dispatch.h $(GLX_DIR)/dispatch.h: gl_table.py $(COMMON) $(PYTHON2) $(PYTHON_FLAGS) $< -m remap_table > $@ ../main/enums.c: gl_enums.py $(COMMON) @@ -69,20 +97,20 @@ dispatch.h: gl_table.py $(COMMON) ../drivers/dri/common/extension_helper.h: extension_helper.py $(COMMON) $(PYTHON2) $(PYTHON_FLAGS) $< > $@ -$(GLX_DIR)/indirect.c: glX_proto_send.py $(COMMON_GLX) +../../glx/x11/indirect.c: glX_proto_send.py $(COMMON_GLX) $(PYTHON2) $(PYTHON_FLAGS) $< -m proto | $(INDENT) $(INDENT_FLAGS) > $@ -$(GLX_DIR)/indirect.h: glX_proto_send.py $(COMMON_GLX) +../../glx/x11/indirect.h: glX_proto_send.py $(COMMON_GLX) $(PYTHON2) $(PYTHON_FLAGS) $< -m init_h > $@ -$(GLX_DIR)/indirect_init.c: glX_proto_send.py $(COMMON_GLX) +../../glx/x11/indirect_init.c: glX_proto_send.py $(COMMON_GLX) $(PYTHON2) $(PYTHON_FLAGS) $< -m init_c > $@ -$(GLX_DIR)/indirect_size.h: glX_proto_size.py $(COMMON_GLX) +../../glx/x11/indirect_size.h $(GLX_DIR)/indirect_size.h: glX_proto_size.py $(COMMON_GLX) $(PYTHON2) $(PYTHON_FLAGS) $< -m size_h --only-set -h _INDIRECT_SIZE_H_ \ | $(INDENT) $(INDENT_FLAGS) > $@ -$(GLX_DIR)/indirect_size.c: glX_proto_size.py $(COMMON_GLX) +../../glx/x11/indirect_size.c: glX_proto_size.py $(COMMON_GLX) $(PYTHON2) $(PYTHON_FLAGS) $< -m size_c --only-set \ | $(INDENT) $(INDENT_FLAGS) > $@ @@ -113,5 +141,5 @@ $(GLX_DIR)/indirect_table.c: glX_server_table.py gl_and_glX_API.xml $(COMMON_GLX $(PYTHON2) $(PYTHON_FLAGS) $< -f gl_and_glX_API.xml > $@ clean: - rm -f *~ *.pyo - rm -f $(OUTPUTS) + -rm -f *~ *.pyo + -rm -f $(OUTPUTS) diff --git a/src/mesa/glapi/glapi.c b/src/mesa/glapi/glapi.c index 36b09e68e5..c4d101aee5 100644 --- a/src/mesa/glapi/glapi.c +++ b/src/mesa/glapi/glapi.c @@ -51,10 +51,19 @@ #ifdef HAVE_DIX_CONFIG_H + #include <dix-config.h> -#endif +#define PUBLIC + +#else #include "glheader.h" + +#endif + +#include <stdlib.h> +#include <string.h> + #include "glapi.h" #include "glapioffsets.h" #include "glapitable.h" diff --git a/src/mesa/glapi/glthread.c b/src/mesa/glapi/glthread.c index 92f2e5bf56..813d6f9dbc 100644 --- a/src/mesa/glapi/glthread.c +++ b/src/mesa/glapi/glthread.c @@ -25,7 +25,7 @@ /* * XXX There's probably some work to do in order to make this file - * truly reusable outside of Mesa. First, the glheader.h include must go. + * truly reusable outside of Mesa. */ @@ -33,7 +33,7 @@ #include <dix-config.h> #endif -#include "glheader.h" +#include <stdlib.h> #include "glthread.h" diff --git a/src/mesa/glapi/glthread.h b/src/mesa/glapi/glthread.h index a61086d0dc..e2765cebb1 100644 --- a/src/mesa/glapi/glthread.h +++ b/src/mesa/glapi/glthread.h @@ -259,11 +259,11 @@ typedef benaphore _glthread_Mutex; * THREADS not defined */ -typedef GLuint _glthread_TSD; +typedef int _glthread_TSD; -typedef GLuint _glthread_Thread; +typedef int _glthread_Thread; -typedef GLuint _glthread_Mutex; +typedef int _glthread_Mutex; #define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = 0 diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index ab0f035b45..58a39d11d4 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -188,6 +188,7 @@ #define MAX_PROGRAM_ADDRESS_REGS 2 #define MAX_UNIFORMS 128 #define MAX_VARYING 8 +#define MAX_SAMPLERS 8 /*@}*/ /** For GL_NV_vertex_program */ diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 733aaad030..2c387d8e2c 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -149,8 +149,6 @@ int MESA_DEBUG_FLAGS = 0; /* ubyte -> float conversion */ GLfloat _mesa_ubyte_to_float_color_tab[256]; -static void -free_shared_state( GLcontext *ctx, struct gl_shared_state *ss ); /** @@ -420,12 +418,14 @@ alloc_shared_state( GLcontext *ctx ) #endif #if FEATURE_ARB_vertex_program - ss->DefaultVertexProgram = ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); + ss->DefaultVertexProgram = (struct gl_vertex_program *) + ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); if (!ss->DefaultVertexProgram) goto cleanup; #endif #if FEATURE_ARB_fragment_program - ss->DefaultFragmentProgram = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + ss->DefaultFragmentProgram = (struct gl_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); if (!ss->DefaultFragmentProgram) goto cleanup; #endif @@ -502,12 +502,10 @@ cleanup: _mesa_DeleteHashTable(ss->Programs); #endif #if FEATURE_ARB_vertex_program - if (ss->DefaultVertexProgram) - ctx->Driver.DeleteProgram(ctx, ss->DefaultVertexProgram); + _mesa_reference_vertprog(ctx, &ss->DefaultVertexProgram, NULL); #endif #if FEATURE_ARB_fragment_program - if (ss->DefaultFragmentProgram) - ctx->Driver.DeleteProgram(ctx, ss->DefaultFragmentProgram); + _mesa_reference_fragprog(ctx, &ss->DefaultFragmentProgram, NULL); #endif #if FEATURE_ATI_fragment_shader if (ss->DefaultFragmentShader) @@ -584,6 +582,8 @@ delete_program_cb(GLuint id, void *data, void *userData) { struct gl_program *prog = (struct gl_program *) data; GLcontext *ctx = (GLcontext *) userData; + ASSERT(prog->RefCount == 1); /* should only be referenced by hash table */ + prog->RefCount = 0; /* now going away */ ctx->Driver.DeleteProgram(ctx, prog); } @@ -709,15 +709,21 @@ free_shared_state( GLcontext *ctx, struct gl_shared_state *ss ) _mesa_HashDeleteAll(ss->DisplayList, delete_displaylist_cb, ctx); _mesa_DeleteHashTable(ss->DisplayList); +#if FEATURE_ARB_shader_objects + _mesa_HashWalk(ss->ShaderObjects, free_shader_program_data_cb, ctx); + _mesa_HashDeleteAll(ss->ShaderObjects, delete_shader_cb, ctx); + _mesa_DeleteHashTable(ss->ShaderObjects); +#endif + #if defined(FEATURE_NV_vertex_program) || defined(FEATURE_NV_fragment_program) _mesa_HashDeleteAll(ss->Programs, delete_program_cb, ctx); _mesa_DeleteHashTable(ss->Programs); #endif #if FEATURE_ARB_vertex_program - ctx->Driver.DeleteProgram(ctx, ss->DefaultVertexProgram); + _mesa_reference_vertprog(ctx, &ss->DefaultVertexProgram, NULL); #endif #if FEATURE_ARB_fragment_program - ctx->Driver.DeleteProgram(ctx, ss->DefaultFragmentProgram); + _mesa_reference_fragprog(ctx, &ss->DefaultFragmentProgram, NULL); #endif #if FEATURE_ATI_fragment_shader @@ -734,12 +740,6 @@ free_shared_state( GLcontext *ctx, struct gl_shared_state *ss ) _mesa_HashDeleteAll(ss->ArrayObjects, delete_arrayobj_cb, ctx); _mesa_DeleteHashTable(ss->ArrayObjects); -#if FEATURE_ARB_shader_objects - _mesa_HashWalk(ss->ShaderObjects, free_shader_program_data_cb, ctx); - _mesa_HashDeleteAll(ss->ShaderObjects, delete_shader_cb, ctx); - _mesa_DeleteHashTable(ss->ShaderObjects); -#endif - #if FEATURE_EXT_framebuffer_object _mesa_HashDeleteAll(ss->FrameBuffers, delete_framebuffer_cb, ctx); _mesa_DeleteHashTable(ss->FrameBuffers); @@ -1251,6 +1251,14 @@ _mesa_free_context_data( GLcontext *ctx ) _mesa_unreference_framebuffer(&ctx->DrawBuffer); _mesa_unreference_framebuffer(&ctx->ReadBuffer); + _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, NULL); + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL); + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, NULL); + + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram, NULL); + _mesa_free_attrib_data(ctx); _mesa_free_lighting_data( ctx ); _mesa_free_eval_data( ctx ); diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index ce33905af1..e3ded41aca 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -328,6 +328,12 @@ struct dd_function_table { GLsizei width, GLsizei height ); /** + * Called by glGenerateMipmap() or when GL_GENERATE_MIPMAP_SGIS is enabled. + */ + void (*GenerateMipmap)(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj); + + /** * Called by glTexImage[123]D when user specifies a proxy texture * target. * diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 23ede7bb68..63a00e04f5 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -3248,6 +3248,36 @@ save_StencilFuncSeparate(GLenum face, GLenum func, GLint ref, GLuint mask) static void GLAPIENTRY +save_StencilFuncSeparateATI(GLenum frontfunc, GLenum backfunc, GLint ref, + GLuint mask) +{ + GET_CURRENT_CONTEXT(ctx); + Node *n; + ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx); + /* GL_FRONT */ + n = ALLOC_INSTRUCTION(ctx, OPCODE_STENCIL_FUNC_SEPARATE, 4); + if (n) { + n[1].e = GL_FRONT; + n[2].e = frontfunc; + n[3].i = ref; + n[4].ui = mask; + } + /* GL_BACK */ + n = ALLOC_INSTRUCTION(ctx, OPCODE_STENCIL_FUNC_SEPARATE, 4); + if (n) { + n[1].e = GL_BACK; + n[2].e = backfunc; + n[3].i = ref; + n[4].ui = mask; + } + if (ctx->ExecuteFlag) { + CALL_StencilFuncSeparate(ctx->Exec, (GL_FRONT, frontfunc, ref, mask)); + CALL_StencilFuncSeparate(ctx->Exec, (GL_BACK, backfunc, ref, mask)); + } +} + + +static void GLAPIENTRY save_StencilMaskSeparate(GLenum face, GLuint mask) { GET_CURRENT_CONTEXT(ctx); @@ -7838,6 +7868,9 @@ _mesa_init_dlist_table(struct _glapi_table *table) SET_StencilMaskSeparate(table, save_StencilMaskSeparate); SET_StencilOpSeparate(table, save_StencilOpSeparate); + /* ATI_separate_stencil */ + SET_StencilFuncSeparateATI(table, save_StencilFuncSeparateATI); + /* GL_ARB_imaging */ /* Not all are supported */ SET_BlendColor(table, save_BlendColor); diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 9b60c73294..8e9948cb45 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1544,7 +1544,7 @@ _mesa_GenerateMipmapEXT(GLenum target) /* XXX this might not handle cube maps correctly */ _mesa_lock_texture(ctx, texObj); - _mesa_generate_mipmap(ctx, target, texUnit, texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); _mesa_unlock_texture(ctx, texObj); } diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index 44357fbd6a..8ca912b3a9 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -934,7 +934,6 @@ make_2d_stack_mipmap(const struct gl_texture_format *format, GLint border, */ void _mesa_generate_mipmap(GLcontext *ctx, GLenum target, - const struct gl_texture_unit *texUnit, struct gl_texture_object *texObj) { const struct gl_texture_image *srcImage; diff --git a/src/mesa/main/mipmap.h b/src/mesa/main/mipmap.h index df78603283..46e16902c8 100644 --- a/src/mesa/main/mipmap.h +++ b/src/mesa/main/mipmap.h @@ -30,7 +30,6 @@ extern void _mesa_generate_mipmap(GLcontext *ctx, GLenum target, - const struct gl_texture_unit *texUnit, struct gl_texture_object *texObj); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index c8718a7f63..04da767ec9 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1544,19 +1544,17 @@ struct gl_texture_unit /*@}*/ }; -struct texenvprog_cache_item { - GLuint hash; - void *key; - struct gl_fragment_program *data; - struct texenvprog_cache_item *next; -}; -struct texenvprog_cache { +struct texenvprog_cache_item; + +struct texenvprog_cache +{ struct texenvprog_cache_item **items; GLuint size, n_items; GLcontext *ctx; }; + /** * Texture attribute group (GL_TEXTURE_BIT). */ @@ -1865,6 +1863,7 @@ enum register_file /** Vertex and fragment instructions */ struct prog_instruction; struct gl_program_parameter_list; +struct gl_uniform_list; /** @@ -1884,6 +1883,7 @@ struct gl_program GLbitfield InputsRead; /**< Bitmask of which input regs are read */ GLbitfield OutputsWritten; /**< Bitmask of which output regs are written to */ GLbitfield TexturesUsed[MAX_TEXTURE_IMAGE_UNITS]; /**< TEXTURE_x_BIT bitmask */ + GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */ GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */ /** Named parameters, constants, etc. from program text */ @@ -1896,6 +1896,11 @@ struct gl_program /** Vertex program user-defined attributes */ struct gl_program_parameter_list *Attributes; + /** Map from sampler unit to texture unit (set by glUniform1i()) */ + GLubyte SamplerUnits[MAX_SAMPLERS]; + /** Which texture target is being sampled (TEXTURE_1D/2D/3D/etc_INDEX) */ + GLubyte SamplerTargets[MAX_SAMPLERS]; + /** Logical counts */ /*@{*/ GLuint NumInstructions; @@ -2088,7 +2093,7 @@ struct gl_query_state /** - * A GLSL shader object. + * A GLSL vertex or fragment shader object. */ struct gl_shader { @@ -2106,7 +2111,8 @@ struct gl_shader /** - * A GLSL program object. Basically a linked collection of "shaders". + * A GLSL program object. + * Basically a linked collection of vertex and fragment shaders. */ struct gl_shader_program { @@ -2121,7 +2127,7 @@ struct gl_shader_program /* post-link info: */ struct gl_vertex_program *VertexProgram; /**< Linked vertex program */ struct gl_fragment_program *FragmentProgram; /**< Linked fragment prog */ - struct gl_program_parameter_list *Uniforms; /**< Plus constants, etc */ + struct gl_uniform_list *Uniforms; struct gl_program_parameter_list *Varying; struct gl_program_parameter_list *Attributes; /**< Vertex attributes */ GLboolean LinkStatus; /**< GL_LINK_STATUS */ @@ -2185,10 +2191,10 @@ struct gl_shared_state /*@{*/ struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */ #if FEATURE_ARB_vertex_program - struct gl_program *DefaultVertexProgram; + struct gl_vertex_program *DefaultVertexProgram; #endif #if FEATURE_ARB_fragment_program - struct gl_program *DefaultFragmentProgram; + struct gl_fragment_program *DefaultFragmentProgram; #endif /*@}*/ diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 5ff67b654e..1c73c5c462 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -978,50 +978,60 @@ update_program(GLcontext *ctx) * 3. Programs derived from fixed-function state. */ - ctx->FragmentProgram._Current = NULL; + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL); if (shProg && shProg->LinkStatus) { /* Use shader programs */ /* XXX this isn't quite right, since we may have either a vertex * _or_ fragment shader (not always both). */ - ctx->VertexProgram._Current = shProg->VertexProgram; - ctx->FragmentProgram._Current = shProg->FragmentProgram; + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, + shProg->VertexProgram); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, + shProg->FragmentProgram); } else { if (ctx->VertexProgram._Enabled) { /* use user-defined vertex program */ - ctx->VertexProgram._Current = ctx->VertexProgram.Current; + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, + ctx->VertexProgram.Current); } else if (ctx->VertexProgram._MaintainTnlProgram) { /* Use vertex program generated from fixed-function state. * The _Current pointer will get set in * _tnl_UpdateFixedFunctionProgram() later if appropriate. */ - ctx->VertexProgram._Current = NULL; + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL); } else { /* no vertex program */ - ctx->VertexProgram._Current = NULL; + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL); } if (ctx->FragmentProgram._Enabled) { /* use user-defined vertex program */ - ctx->FragmentProgram._Current = ctx->FragmentProgram.Current; + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, + ctx->FragmentProgram.Current); } else if (ctx->FragmentProgram._MaintainTexEnvProgram) { /* Use fragment program generated from fixed-function state. * The _Current pointer will get set in _mesa_UpdateTexEnvProgram() * later if appropriate. */ - ctx->FragmentProgram._Current = NULL; + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL); } else { /* no fragment program */ - ctx->FragmentProgram._Current = NULL; + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL); } } + if (ctx->VertexProgram._Current) + assert(ctx->VertexProgram._Current->Base.Parameters); + if (ctx->FragmentProgram._Current) + assert(ctx->FragmentProgram._Current->Base.Parameters); + + ctx->FragmentProgram._Active = ctx->FragmentProgram._Enabled; if (ctx->FragmentProgram._MaintainTexEnvProgram && !ctx->FragmentProgram._Enabled) { diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c index b6991f45ed..45f344b0c5 100644 --- a/src/mesa/main/texcompress_fxt1.c +++ b/src/mesa/main/texcompress_fxt1.c @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 7.1 * - * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -298,22 +298,17 @@ const struct gl_texture_format _mesa_texformat_rgba_fxt1 = { /* * Define a 64-bit unsigned integer type and macros */ -#if defined(__GNUC__) && !defined(__cplusplus) +#ifdef GL_EXT_timer_query /* this extensions defines the GLuint64EXT type */ #define FX64_NATIVE 1 -#ifdef __MINGW32__ -typedef unsigned long Fx64; -#else -typedef unsigned long long Fx64; -#endif - +typedef GLuint64EXT Fx64; #define FX64_MOV32(a, b) a = b #define FX64_OR32(a, b) a |= b #define FX64_SHL(a, c) a <<= c -#else /* !__GNUC__ */ +#else /* !GL_EXT_timer_query */ #define FX64_NATIVE 0 @@ -335,7 +330,7 @@ typedef struct { } \ } while (0) -#endif /* !__GNUC__ */ +#endif /* !GL_EXT_timer_query */ #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */ diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index fb68bf0720..af19a38c42 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -28,12 +28,23 @@ #include "glheader.h" #include "macros.h" #include "enums.h" +#include "shader/program.h" #include "shader/prog_parameter.h" #include "shader/prog_instruction.h" #include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "texenvprogram.h" + +struct texenvprog_cache_item +{ + GLuint hash; + void *key; + struct gl_fragment_program *data; + struct texenvprog_cache_item *next; +}; + + /** * This MAX is probably a bit generous, but that's OK. There can be * up to four instructions per texture unit (TEX + 3 for combine), @@ -1133,7 +1144,7 @@ search_cache(const struct texenvprog_cache *cache, for (c = cache->items[hash % cache->size]; c; c = c->next) { if (c->hash == hash && memcmp(c->key, key, keysize) == 0) - return (struct gl_fragment_program *) c->data; + return c->data; } return NULL; @@ -1161,7 +1172,7 @@ static void rehash( struct texenvprog_cache *cache ) cache->size = size; } -static void clear_cache( struct texenvprog_cache *cache ) +static void clear_cache(GLcontext *ctx, struct texenvprog_cache *cache) { struct texenvprog_cache_item *c, *next; GLuint i; @@ -1170,8 +1181,7 @@ static void clear_cache( struct texenvprog_cache *cache ) for (c = cache->items[i]; c; c = next) { next = c->next; _mesa_free(c->key); - cache->ctx->Driver.DeleteProgram(cache->ctx, - (struct gl_program *) c->data); + _mesa_reference_fragprog(ctx, &c->data, NULL); _mesa_free(c); } cache->items[i] = NULL; @@ -1182,25 +1192,25 @@ static void clear_cache( struct texenvprog_cache *cache ) } -static void cache_item( struct texenvprog_cache *cache, +static void cache_item( GLcontext *ctx, + struct texenvprog_cache *cache, GLuint hash, const struct state_key *key, - void *data ) + struct gl_fragment_program *prog) { - struct texenvprog_cache_item *c - = (struct texenvprog_cache_item *) MALLOC(sizeof(*c)); + struct texenvprog_cache_item *c = CALLOC_STRUCT(texenvprog_cache_item); c->hash = hash; c->key = _mesa_malloc(sizeof(*key)); memcpy(c->key, key, sizeof(*key)); - c->data = (struct gl_fragment_program *) data; + c->data = prog; if (cache->n_items > cache->size * 1.5) { if (cache->size < 1000) rehash(cache); else - clear_cache(cache); + clear_cache(ctx, cache); } cache->n_items++; @@ -1243,32 +1253,30 @@ _mesa_UpdateTexEnvProgram( GLcontext *ctx ) /* If a conventional fragment program/shader isn't in effect... */ if (!ctx->FragmentProgram._Enabled && (!ctx->Shader.CurrentProgram || !ctx->Shader.CurrentProgram->FragmentProgram)) { + struct gl_fragment_program *newProg; + make_state_key(ctx, &key); hash = hash_key(&key); - ctx->FragmentProgram._Current = - ctx->FragmentProgram._TexEnvProgram = - search_cache(&ctx->Texture.env_fp_cache, hash, &key, sizeof(key)); + newProg = search_cache(&ctx->Texture.env_fp_cache, hash, &key, sizeof(key)); + + if (!newProg) { + /* create new tex env program */ - if (!ctx->FragmentProgram._TexEnvProgram) { if (0) _mesa_printf("Building new texenv proggy for key %x\n", hash); - /* create new tex env program */ - ctx->FragmentProgram._Current = - ctx->FragmentProgram._TexEnvProgram = - (struct gl_fragment_program *) + newProg = (struct gl_fragment_program *) ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); - create_new_program(ctx, &key, ctx->FragmentProgram._TexEnvProgram); + create_new_program(ctx, &key, newProg); - cache_item(&ctx->Texture.env_fp_cache, hash, &key, - ctx->FragmentProgram._TexEnvProgram); - } - else { - if (0) - _mesa_printf("Found existing texenv program for key %x\n", hash); + /* Our ownership of newProg is transferred to the cache */ + cache_item(ctx, &ctx->Texture.env_fp_cache, hash, &key, newProg); } + + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, newProg); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram, newProg); } else { /* _Current pointer has been updated in update_program */ @@ -1298,6 +1306,6 @@ void _mesa_TexEnvProgramCacheInit( GLcontext *ctx ) void _mesa_TexEnvProgramCacheDestroy( GLcontext *ctx ) { - clear_cache(&ctx->Texture.env_fp_cache); + clear_cache(ctx, &ctx->Texture.env_fp_cache); _mesa_free(ctx->Texture.env_fp_cache.items); } diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 90edca86e8..5363e9e080 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -2918,9 +2918,7 @@ _mesa_store_teximage1d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3004,9 +3002,7 @@ _mesa_store_teximage2d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3080,9 +3076,7 @@ _mesa_store_teximage3d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3128,9 +3122,7 @@ _mesa_store_texsubimage1d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3183,9 +3175,7 @@ _mesa_store_texsubimage2d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3238,9 +3228,7 @@ _mesa_store_texsubimage3d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3314,9 +3302,7 @@ _mesa_store_compressed_teximage2d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, &ctx->Unpack); @@ -3426,9 +3412,7 @@ _mesa_store_compressed_texsubimage2d(GLcontext *ctx, GLenum target, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, &ctx->Unpack); diff --git a/src/mesa/main/version.h b/src/mesa/main/version.h index 9229077f42..2f459e517a 100644 --- a/src/mesa/main/version.h +++ b/src/mesa/main/version.h @@ -31,7 +31,7 @@ #define MESA_MAJOR 7 #define MESA_MINOR 1 #define MESA_PATCH 0 -#define MESA_VERSION_STRING "7.1" +#define MESA_VERSION_STRING "7.1 rc1" /* To make version comparison easy */ #define MESA_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index 74004e9b13..60aaabe679 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -3851,8 +3851,11 @@ _mesa_parse_arb_fragment_program(GLcontext* ctx, GLenum target, program->Base.NumNativeTexIndirections = ap.Base.NumTexIndirections; program->Base.InputsRead = ap.Base.InputsRead; program->Base.OutputsWritten = ap.Base.OutputsWritten; - for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) + for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) { program->Base.TexturesUsed[i] = ap.TexturesUsed[i]; + if (ap.TexturesUsed[i]) + program->Base.SamplersUsed |= (1 << i); + } program->Base.ShadowSamplers = ap.ShadowSamplers; program->FogOption = ap.FogOption; program->UsesKill = ap.UsesKill; diff --git a/src/mesa/shader/descrip.mms b/src/mesa/shader/descrip.mms index 157c193c79..bdac946efe 100644 --- a/src/mesa/shader/descrip.mms +++ b/src/mesa/shader/descrip.mms @@ -1,6 +1,6 @@ # Makefile for core library for VMS # contributed by Jouk Jansen joukj@hrem.nano.tudelft.nl -# Last revision : 3 October 2007 +# Last revision : 27 May 2008 .first define gl [---.include.gl] define math [-.math] @@ -35,7 +35,7 @@ SOURCES = \ prog_parameter.c \ prog_print.c \ prog_statevars.c \ - shader_api.c + shader_api.c prog_uniform.c OBJECTS = \ atifragshader.obj,\ @@ -52,7 +52,7 @@ OBJECTS = \ prog_parameter.obj,\ prog_print.obj,\ prog_statevars.obj,\ - shader_api.obj + shader_api.obj,prog_uniform.obj ##### RULES ##### @@ -90,3 +90,4 @@ prog_parameter.obj : prog_parameter.c prog_print.obj : prog_print.c prog_statevars.obj : prog_statevars.c shader_api.obj : shader_api.c +prog_uniform.obj : prog_uniform.c diff --git a/src/mesa/shader/prog_execute.c b/src/mesa/shader/prog_execute.c index cb17aa501c..8ce2ca3964 100644 --- a/src/mesa/shader/prog_execute.c +++ b/src/mesa/shader/prog_execute.c @@ -310,6 +310,8 @@ fetch_texel(GLcontext *ctx, const GLfloat texcoord[4], GLfloat lodBias, GLfloat color[4]) { + const GLuint unit = machine->Samplers[inst->TexSrcUnit]; + /* Note: we only have the right derivatives for fragment input attribs. */ if (machine->NumDeriv > 0 && @@ -320,12 +322,10 @@ fetch_texel(GLcontext *ctx, machine->FetchTexelDeriv(ctx, texcoord, machine->DerivX[attr], machine->DerivY[attr], - lodBias, - inst->TexSrcUnit, color); + lodBias, unit, color); } else { - machine->FetchTexelLod(ctx, texcoord, lodBias, - inst->TexSrcUnit, color); + machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color); } } @@ -1522,9 +1522,7 @@ _mesa_execute_program(GLcontext * ctx, default: _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program", inst->Opcode); - assert(0); return GL_TRUE; /* return value doesn't matter */ - } numExec++; diff --git a/src/mesa/shader/prog_execute.h b/src/mesa/shader/prog_execute.h index 3ea0ba1565..18b13e11a4 100644 --- a/src/mesa/shader/prog_execute.h +++ b/src/mesa/shader/prog_execute.h @@ -63,6 +63,8 @@ struct gl_program_machine GLuint CondCodes[4]; /**< COND_* value for x/y/z/w */ GLint AddressReg[MAX_PROGRAM_ADDRESS_REGS][4]; + const GLubyte *Samplers; /** Array mapping sampler var to tex unit */ + GLuint CallStack[MAX_PROGRAM_CALL_DEPTH]; /**< For CAL/RET instructions */ GLuint StackDepth; /**< Index/ptr to top of CallStack[] */ diff --git a/src/mesa/shader/prog_instruction.c b/src/mesa/shader/prog_instruction.c index d6b5652a27..bea5d0551e 100644 --- a/src/mesa/shader/prog_instruction.c +++ b/src/mesa/shader/prog_instruction.c @@ -119,6 +119,23 @@ _mesa_copy_instructions(struct prog_instruction *dest, /** + * Free an array of instructions + */ +void +_mesa_free_instructions(struct prog_instruction *inst, GLuint count) +{ + GLuint i; + for (i = 0; i < count; i++) { + if (inst[i].Data) + _mesa_free(inst[i].Data); + if (inst[i].Comment) + _mesa_free((char *) inst[i].Comment); + } + _mesa_free(inst); +} + + +/** * Basic info about each instruction */ struct instruction_info @@ -126,6 +143,7 @@ struct instruction_info gl_inst_opcode Opcode; const char *Name; GLuint NumSrcRegs; + GLuint NumDstRegs; }; /** @@ -133,91 +151,91 @@ struct instruction_info * \note Opcode should equal array index! */ static const struct instruction_info InstInfo[MAX_OPCODE] = { - { OPCODE_NOP, "NOP", 0 }, - { OPCODE_ABS, "ABS", 1 }, - { OPCODE_ADD, "ADD", 2 }, - { OPCODE_ARA, "ARA", 1 }, - { OPCODE_ARL, "ARL", 1 }, - { OPCODE_ARL_NV, "ARL", 1 }, - { OPCODE_ARR, "ARL", 1 }, - { OPCODE_BGNLOOP,"BGNLOOP", 0 }, - { OPCODE_BGNSUB, "BGNSUB", 0 }, - { OPCODE_BRA, "BRA", 0 }, - { OPCODE_BRK, "BRK", 0 }, - { OPCODE_CAL, "CAL", 0 }, - { OPCODE_CMP, "CMP", 3 }, - { OPCODE_CONT, "CONT", 0 }, - { OPCODE_COS, "COS", 1 }, - { OPCODE_DDX, "DDX", 1 }, - { OPCODE_DDY, "DDY", 1 }, - { OPCODE_DP3, "DP3", 2 }, - { OPCODE_DP4, "DP4", 2 }, - { OPCODE_DPH, "DPH", 2 }, - { OPCODE_DST, "DST", 2 }, - { OPCODE_ELSE, "ELSE", 0 }, - { OPCODE_END, "END", 0 }, - { OPCODE_ENDIF, "ENDIF", 0 }, - { OPCODE_ENDLOOP,"ENDLOOP", 0 }, - { OPCODE_ENDSUB, "ENDSUB", 0 }, - { OPCODE_EX2, "EX2", 1 }, - { OPCODE_EXP, "EXP", 1 }, - { OPCODE_FLR, "FLR", 1 }, - { OPCODE_FRC, "FRC", 1 }, - { OPCODE_IF, "IF", 0 }, - { OPCODE_INT, "INT", 1 }, - { OPCODE_KIL, "KIL", 1 }, - { OPCODE_KIL_NV, "KIL", 0 }, - { OPCODE_LG2, "LG2", 1 }, - { OPCODE_LIT, "LIT", 1 }, - { OPCODE_LOG, "LOG", 1 }, - { OPCODE_LRP, "LRP", 3 }, - { OPCODE_MAD, "MAD", 3 }, - { OPCODE_MAX, "MAX", 2 }, - { OPCODE_MIN, "MIN", 2 }, - { OPCODE_MOV, "MOV", 1 }, - { OPCODE_MUL, "MUL", 2 }, - { OPCODE_NOISE1, "NOISE1", 1 }, - { OPCODE_NOISE2, "NOISE2", 1 }, - { OPCODE_NOISE3, "NOISE3", 1 }, - { OPCODE_NOISE4, "NOISE4", 1 }, - { OPCODE_PK2H, "PK2H", 1 }, - { OPCODE_PK2US, "PK2US", 1 }, - { OPCODE_PK4B, "PK4B", 1 }, - { OPCODE_PK4UB, "PK4UB", 1 }, - { OPCODE_POW, "POW", 2 }, - { OPCODE_POPA, "POPA", 0 }, - { OPCODE_PRINT, "PRINT", 1 }, - { OPCODE_PUSHA, "PUSHA", 0 }, - { OPCODE_RCC, "RCC", 1 }, - { OPCODE_RCP, "RCP", 1 }, - { OPCODE_RET, "RET", 0 }, - { OPCODE_RFL, "RFL", 1 }, - { OPCODE_RSQ, "RSQ", 1 }, - { OPCODE_SCS, "SCS", 1 }, - { OPCODE_SEQ, "SEQ", 2 }, - { OPCODE_SFL, "SFL", 0 }, - { OPCODE_SGE, "SGE", 2 }, - { OPCODE_SGT, "SGT", 2 }, - { OPCODE_SIN, "SIN", 1 }, - { OPCODE_SLE, "SLE", 2 }, - { OPCODE_SLT, "SLT", 2 }, - { OPCODE_SNE, "SNE", 2 }, - { OPCODE_SSG, "SSG", 1 }, - { OPCODE_STR, "STR", 0 }, - { OPCODE_SUB, "SUB", 2 }, - { OPCODE_SWZ, "SWZ", 1 }, - { OPCODE_TEX, "TEX", 1 }, - { OPCODE_TXB, "TXB", 1 }, - { OPCODE_TXD, "TXD", 3 }, - { OPCODE_TXL, "TXL", 1 }, - { OPCODE_TXP, "TXP", 1 }, - { OPCODE_TXP_NV, "TXP", 1 }, - { OPCODE_UP2H, "UP2H", 1 }, - { OPCODE_UP2US, "UP2US", 1 }, - { OPCODE_UP4B, "UP4B", 1 }, - { OPCODE_UP4UB, "UP4UB", 1 }, - { OPCODE_X2D, "X2D", 3 }, - { OPCODE_XPD, "XPD", 2 } + { OPCODE_NOP, "NOP", 0, 0 }, + { OPCODE_ABS, "ABS", 1, 1 }, + { OPCODE_ADD, "ADD", 2, 1 }, + { OPCODE_ARA, "ARA", 1, 1 }, + { OPCODE_ARL, "ARL", 1, 1 }, + { OPCODE_ARL_NV, "ARL", 1, 1 }, + { OPCODE_ARR, "ARL", 1, 1 }, + { OPCODE_BGNLOOP,"BGNLOOP", 0, 0 }, + { OPCODE_BGNSUB, "BGNSUB", 0, 0 }, + { OPCODE_BRA, "BRA", 0, 0 }, + { OPCODE_BRK, "BRK", 0, 0 }, + { OPCODE_CAL, "CAL", 0, 0 }, + { OPCODE_CMP, "CMP", 3, 1 }, + { OPCODE_CONT, "CONT", 0, 0 }, + { OPCODE_COS, "COS", 1, 1 }, + { OPCODE_DDX, "DDX", 1, 1 }, + { OPCODE_DDY, "DDY", 1, 1 }, + { OPCODE_DP3, "DP3", 2, 1 }, + { OPCODE_DP4, "DP4", 2, 1 }, + { OPCODE_DPH, "DPH", 2, 1 }, + { OPCODE_DST, "DST", 2, 1 }, + { OPCODE_ELSE, "ELSE", 0, 0 }, + { OPCODE_END, "END", 0, 0 }, + { OPCODE_ENDIF, "ENDIF", 0, 0 }, + { OPCODE_ENDLOOP,"ENDLOOP", 0, 0 }, + { OPCODE_ENDSUB, "ENDSUB", 0, 0 }, + { OPCODE_EX2, "EX2", 1, 1 }, + { OPCODE_EXP, "EXP", 1, 1 }, + { OPCODE_FLR, "FLR", 1, 1 }, + { OPCODE_FRC, "FRC", 1, 1 }, + { OPCODE_IF, "IF", 1, 0 }, + { OPCODE_INT, "INT", 1, 1 }, + { OPCODE_KIL, "KIL", 1, 0 }, + { OPCODE_KIL_NV, "KIL", 0, 0 }, + { OPCODE_LG2, "LG2", 1, 1 }, + { OPCODE_LIT, "LIT", 1, 1 }, + { OPCODE_LOG, "LOG", 1, 1 }, + { OPCODE_LRP, "LRP", 3, 1 }, + { OPCODE_MAD, "MAD", 3, 1 }, + { OPCODE_MAX, "MAX", 2, 1 }, + { OPCODE_MIN, "MIN", 2, 1 }, + { OPCODE_MOV, "MOV", 1, 1 }, + { OPCODE_MUL, "MUL", 2, 1 }, + { OPCODE_NOISE1, "NOISE1", 1, 1 }, + { OPCODE_NOISE2, "NOISE2", 1, 1 }, + { OPCODE_NOISE3, "NOISE3", 1, 1 }, + { OPCODE_NOISE4, "NOISE4", 1, 1 }, + { OPCODE_PK2H, "PK2H", 1, 1 }, + { OPCODE_PK2US, "PK2US", 1, 1 }, + { OPCODE_PK4B, "PK4B", 1, 1 }, + { OPCODE_PK4UB, "PK4UB", 1, 1 }, + { OPCODE_POW, "POW", 2, 1 }, + { OPCODE_POPA, "POPA", 0, 0 }, + { OPCODE_PRINT, "PRINT", 1, 0 }, + { OPCODE_PUSHA, "PUSHA", 0, 0 }, + { OPCODE_RCC, "RCC", 1, 1 }, + { OPCODE_RCP, "RCP", 1, 1 }, + { OPCODE_RET, "RET", 0, 0 }, + { OPCODE_RFL, "RFL", 1, 1 }, + { OPCODE_RSQ, "RSQ", 1, 1 }, + { OPCODE_SCS, "SCS", 1, 1 }, + { OPCODE_SEQ, "SEQ", 2, 1 }, + { OPCODE_SFL, "SFL", 0, 1 }, + { OPCODE_SGE, "SGE", 2, 1 }, + { OPCODE_SGT, "SGT", 2, 1 }, + { OPCODE_SIN, "SIN", 1, 1 }, + { OPCODE_SLE, "SLE", 2, 1 }, + { OPCODE_SLT, "SLT", 2, 1 }, + { OPCODE_SNE, "SNE", 2, 1 }, + { OPCODE_SSG, "SSG", 1, 1 }, + { OPCODE_STR, "STR", 0, 1 }, + { OPCODE_SUB, "SUB", 2, 1 }, + { OPCODE_SWZ, "SWZ", 1, 1 }, + { OPCODE_TEX, "TEX", 1, 1 }, + { OPCODE_TXB, "TXB", 1, 1 }, + { OPCODE_TXD, "TXD", 3, 1 }, + { OPCODE_TXL, "TXL", 1, 1 }, + { OPCODE_TXP, "TXP", 1, 1 }, + { OPCODE_TXP_NV, "TXP", 1, 1 }, + { OPCODE_UP2H, "UP2H", 1, 1 }, + { OPCODE_UP2US, "UP2US", 1, 1 }, + { OPCODE_UP4B, "UP4B", 1, 1 }, + { OPCODE_UP4UB, "UP4UB", 1, 1 }, + { OPCODE_X2D, "X2D", 3, 1 }, + { OPCODE_XPD, "XPD", 2, 1 } }; @@ -234,6 +252,29 @@ _mesa_num_inst_src_regs(gl_inst_opcode opcode) /** + * Return the number of dst registers for the given instruction/opcode. + */ +GLuint +_mesa_num_inst_dst_regs(gl_inst_opcode opcode) +{ + ASSERT(opcode == InstInfo[opcode].Opcode); + ASSERT(OPCODE_XPD == InstInfo[OPCODE_XPD].Opcode); + return InstInfo[opcode].NumDstRegs; +} + + +GLboolean +_mesa_is_tex_instruction(gl_inst_opcode opcode) +{ + return (opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXD || + opcode == OPCODE_TXL || + opcode == OPCODE_TXP); +} + + +/** * Return string name for given program opcode. */ const char * diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h index c800757aa0..aca768376a 100644 --- a/src/mesa/shader/prog_instruction.h +++ b/src/mesa/shader/prog_instruction.h @@ -437,9 +437,18 @@ extern struct prog_instruction * _mesa_copy_instructions(struct prog_instruction *dest, const struct prog_instruction *src, GLuint n); +extern void +_mesa_free_instructions(struct prog_instruction *inst, GLuint count); + extern GLuint _mesa_num_inst_src_regs(gl_inst_opcode opcode); +extern GLuint +_mesa_num_inst_dst_regs(gl_inst_opcode opcode); + +extern GLboolean +_mesa_is_tex_instruction(gl_inst_opcode opcode); + extern const char * _mesa_opcode_string(gl_inst_opcode opcode); diff --git a/src/mesa/shader/prog_parameter.c b/src/mesa/shader/prog_parameter.c index 3ad7215755..152bd79f69 100644 --- a/src/mesa/shader/prog_parameter.c +++ b/src/mesa/shader/prog_parameter.c @@ -282,6 +282,8 @@ _mesa_add_uniform(struct gl_program_parameter_list *paramList, * Add a sampler to the parameter list. * \param name uniform's name * \param datatype GL_SAMPLER_2D, GL_SAMPLER_2D_RECT_ARB, etc. + * \param index the sampler number (as seen in TEX instructions) + * \return sampler index (starting at zero) or -1 if error */ GLint _mesa_add_sampler(struct gl_program_parameter_list *paramList, @@ -292,13 +294,20 @@ _mesa_add_sampler(struct gl_program_parameter_list *paramList, ASSERT(paramList->Parameters[i].Size == 1); ASSERT(paramList->Parameters[i].DataType == datatype); /* already in list */ - return i; + return (GLint) paramList->ParameterValues[i][0]; } else { const GLint size = 1; /* a sampler is basically a texture unit number */ - i = _mesa_add_parameter(paramList, PROGRAM_SAMPLER, name, - size, datatype, NULL, NULL); - return i; + GLfloat value; + GLint numSamplers = 0; + for (i = 0; i < paramList->NumParameters; i++) { + if (paramList->Parameters[i].Type == PROGRAM_SAMPLER) + numSamplers++; + } + value = (GLfloat) numSamplers; + (void) _mesa_add_parameter(paramList, PROGRAM_SAMPLER, name, + size, datatype, &value, NULL); + return numSamplers; } } @@ -599,11 +608,46 @@ _mesa_clone_parameter_list(const struct gl_program_parameter_list *list) } } + clone->StateFlags = list->StateFlags; + return clone; } /** + * Return a new parameter list which is listA + listB. + */ +struct gl_program_parameter_list * +_mesa_combine_parameter_lists(const struct gl_program_parameter_list *listA, + const struct gl_program_parameter_list *listB) +{ + struct gl_program_parameter_list *list; + + if (listA) { + list = _mesa_clone_parameter_list(listA); + if (list && listB) { + GLuint i; + for (i = 0; i < listB->NumParameters; i++) { + struct gl_program_parameter *param = listB->Parameters + i; + _mesa_add_parameter(list, param->Type, param->Name, param->Size, + param->DataType, + listB->ParameterValues[i], + param->StateIndexes); + } + } + } + else if (listB) { + list = _mesa_clone_parameter_list(listB); + } + else { + list = NULL; + } + return list; +} + + + +/** * Find longest name of all uniform parameters in list. */ GLuint diff --git a/src/mesa/shader/prog_parameter.h b/src/mesa/shader/prog_parameter.h index 09ff851ea7..105f6f24de 100644 --- a/src/mesa/shader/prog_parameter.h +++ b/src/mesa/shader/prog_parameter.h @@ -78,6 +78,16 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList); extern struct gl_program_parameter_list * _mesa_clone_parameter_list(const struct gl_program_parameter_list *list); +extern struct gl_program_parameter_list * +_mesa_combine_parameter_lists(const struct gl_program_parameter_list *a, + const struct gl_program_parameter_list *b); + +static INLINE GLuint +_mesa_num_parameters(const struct gl_program_parameter_list *list) +{ + return list ? list->NumParameters : 0; +} + extern GLint _mesa_add_parameter(struct gl_program_parameter_list *paramList, enum register_file type, const char *name, diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c index 308cce2206..1c35ce3fec 100644 --- a/src/mesa/shader/prog_print.c +++ b/src/mesa/shader/prog_print.c @@ -320,7 +320,7 @@ _mesa_swizzle_string(GLuint swizzle, GLuint negateBase, GLboolean extended) if (!extended) s[i++] = '.'; - if (negateBase & 0x1) + if (negateBase & NEGATE_X) s[i++] = '-'; s[i++] = swz[GET_SWZ(swizzle, 0)]; @@ -328,7 +328,7 @@ _mesa_swizzle_string(GLuint swizzle, GLuint negateBase, GLboolean extended) s[i++] = ','; } - if (negateBase & 0x2) + if (negateBase & NEGATE_Y) s[i++] = '-'; s[i++] = swz[GET_SWZ(swizzle, 1)]; @@ -336,7 +336,7 @@ _mesa_swizzle_string(GLuint swizzle, GLuint negateBase, GLboolean extended) s[i++] = ','; } - if (negateBase & 0x4) + if (negateBase & NEGATE_Z) s[i++] = '-'; s[i++] = swz[GET_SWZ(swizzle, 2)]; @@ -344,7 +344,7 @@ _mesa_swizzle_string(GLuint swizzle, GLuint negateBase, GLboolean extended) s[i++] = ','; } - if (negateBase & 0x8) + if (negateBase & NEGATE_W) s[i++] = '-'; s[i++] = swz[GET_SWZ(swizzle, 3)]; @@ -541,7 +541,7 @@ _mesa_print_instruction_opt(const struct prog_instruction *inst, GLint indent, _mesa_printf("_SAT"); _mesa_printf(" "); print_dst_reg(&inst->DstReg, mode, prog); - _mesa_printf("%s[%d], %s", + _mesa_printf(", %s[%d], %s", file_string((enum register_file) inst->SrcReg[0].File, mode), inst->SrcReg[0].Index, @@ -551,6 +551,7 @@ _mesa_print_instruction_opt(const struct prog_instruction *inst, GLint indent, break; case OPCODE_TEX: case OPCODE_TXP: + case OPCODE_TXL: case OPCODE_TXB: _mesa_printf("%s", _mesa_opcode_string(inst->Opcode)); if (inst->SaturateMode == SATURATE_ZERO_ONE) @@ -571,6 +572,23 @@ _mesa_print_instruction_opt(const struct prog_instruction *inst, GLint indent, } print_comment(inst); break; + + case OPCODE_KIL: + _mesa_printf("%s", _mesa_opcode_string(inst->Opcode)); + _mesa_printf(" "); + print_src_reg(&inst->SrcReg[0], mode, prog); + print_comment(inst); + break; + case OPCODE_KIL_NV: + _mesa_printf("%s", _mesa_opcode_string(inst->Opcode)); + _mesa_printf(" "); + _mesa_printf("%s.%s", + _mesa_condcode_string(inst->DstReg.CondMask), + _mesa_swizzle_string(inst->DstReg.CondSwizzle, + GL_FALSE, GL_FALSE)); + print_comment(inst); + break; + case OPCODE_ARL: _mesa_printf("ARL addr.x, "); print_src_reg(&inst->SrcReg[0], mode, prog); @@ -735,6 +753,8 @@ _mesa_print_program_opt(const struct gl_program *prog, void _mesa_print_program_parameters(GLcontext *ctx, const struct gl_program *prog) { + GLuint i; + _mesa_printf("InputsRead: 0x%x\n", prog->InputsRead); _mesa_printf("OutputsWritten: 0x%x\n", prog->OutputsWritten); _mesa_printf("NumInstructions=%d\n", prog->NumInstructions); @@ -742,9 +762,14 @@ _mesa_print_program_parameters(GLcontext *ctx, const struct gl_program *prog) _mesa_printf("NumParameters=%d\n", prog->NumParameters); _mesa_printf("NumAttributes=%d\n", prog->NumAttributes); _mesa_printf("NumAddressRegs=%d\n", prog->NumAddressRegs); - + _mesa_printf("Samplers=[ "); + for (i = 0; i < MAX_SAMPLERS; i++) { + _mesa_printf("%d ", prog->SamplerUnits[i]); + } + _mesa_printf("]\n"); + _mesa_load_state_parameters(ctx, prog->Parameters); - + #if 0 _mesa_printf("Local Params:\n"); for (i = 0; i < MAX_PROGRAM_LOCAL_PARAMS; i++){ @@ -762,6 +787,9 @@ _mesa_print_parameter_list(const struct gl_program_parameter_list *list) const gl_prog_print_mode mode = PROG_PRINT_DEBUG; GLuint i; + if (!list) + return; + _mesa_printf("param list %p\n", (void *) list); for (i = 0; i < list->NumParameters; i++){ struct gl_program_parameter *param = list->Parameters + i; diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c index 4ae74c1d42..539057b438 100644 --- a/src/mesa/shader/prog_statevars.c +++ b/src/mesa/shader/prog_statevars.c @@ -253,7 +253,8 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], value[0] = ctx->Fog.Density; value[1] = ctx->Fog.Start; value[2] = ctx->Fog.End; - value[3] = 1.0F / (ctx->Fog.End - ctx->Fog.Start); + value[3] = (ctx->Fog.End == ctx->Fog.Start) + ? 1.0 : 1.0F / (ctx->Fog.End - ctx->Fog.Start); return; case STATE_CLIPPLANE: { @@ -278,6 +279,7 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], case STATE_MVP_MATRIX: case STATE_TEXTURE_MATRIX: case STATE_PROGRAM_MATRIX: + case STATE_COLOR_MATRIX: { /* state[0] = modelview, projection, texture, etc. */ /* state[1] = which texture matrix or program matrix */ @@ -311,6 +313,9 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], else if (mat == STATE_PROGRAM_MATRIX) { matrix = ctx->ProgramMatrixStack[index].Top; } + else if (mat == STATE_COLOR_MATRIX) { + matrix = ctx->ColorMatrixStack.Top; + } else { _mesa_problem(ctx, "Bad matrix name in _mesa_fetch_state()"); return; @@ -420,8 +425,9 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], * exp: 2^-(density/ln(2) * fogcoord) * exp2: 2^-((density/(ln(2)^2) * fogcoord)^2) */ - value[0] = -1.0F / (ctx->Fog.End - ctx->Fog.Start); - value[1] = ctx->Fog.End / (ctx->Fog.End - ctx->Fog.Start); + value[0] = (ctx->Fog.End == ctx->Fog.Start) + ? 1.0 : -1.0F / (ctx->Fog.End - ctx->Fog.Start); + value[1] = ctx->Fog.End * -value[0]; value[2] = ctx->Fog.Density * ONE_DIV_LN2; value[3] = ctx->Fog.Density * ONE_DIV_SQRT_LN2; return; @@ -434,6 +440,24 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], value[3] = ctx->Light.Light[ln]._CosCutoff; return; } + case STATE_PT_SCALE: + value[0] = ctx->Pixel.RedScale; + value[1] = ctx->Pixel.GreenScale; + value[2] = ctx->Pixel.BlueScale; + value[3] = ctx->Pixel.AlphaScale; + break; + case STATE_PT_BIAS: + value[0] = ctx->Pixel.RedBias; + value[1] = ctx->Pixel.GreenBias; + value[2] = ctx->Pixel.BlueBias; + value[3] = ctx->Pixel.AlphaBias; + break; + case STATE_PCM_SCALE: + COPY_4V(value, ctx->Pixel.PostColorMatrixScale); + break; + case STATE_PCM_BIAS: + COPY_4V(value, ctx->Pixel.PostColorMatrixBias); + break; default: /* unknown state indexes are silently ignored * should be handled by the driver. @@ -492,6 +516,8 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH]) return _NEW_TEXTURE_MATRIX; case STATE_PROGRAM_MATRIX: return _NEW_TRACK_MATRIX; + case STATE_COLOR_MATRIX: + return _NEW_COLOR_MATRIX; case STATE_DEPTH_RANGE: return _NEW_VIEWPORT; @@ -585,6 +611,9 @@ append_token(char *dst, gl_state_index k) case STATE_PROGRAM_MATRIX: append(dst, "matrix.program"); break; + case STATE_COLOR_MATRIX: + append(dst, "matrix.color"); + break; case STATE_MATRIX_INVERSE: append(dst, ".inverse"); break; @@ -670,6 +699,18 @@ append_token(char *dst, gl_state_index k) case STATE_POSITION_NORMALIZED: append(dst, "(internal)"); break; + case STATE_PT_SCALE: + append(dst, "PTscale"); + break; + case STATE_PT_BIAS: + append(dst, "PTbias"); + break; + case STATE_PCM_SCALE: + append(dst, "PCMscale"); + break; + case STATE_PCM_BIAS: + append(dst, "PCMbias"); + break; default: ; } @@ -748,6 +789,7 @@ _mesa_program_state_string(const gl_state_index state[STATE_LENGTH]) case STATE_MVP_MATRIX: case STATE_TEXTURE_MATRIX: case STATE_PROGRAM_MATRIX: + case STATE_COLOR_MATRIX: { /* state[0] = modelview, projection, texture, etc. */ /* state[1] = which texture matrix or program matrix */ @@ -815,10 +857,12 @@ _mesa_load_state_parameters(GLcontext *ctx, if (!paramList) return; + /*assert(ctx->Driver.NeedFlush == 0);*/ + for (i = 0; i < paramList->NumParameters; i++) { if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { _mesa_fetch_state(ctx, - paramList->Parameters[i].StateIndexes, + (gl_state_index *) paramList->Parameters[i].StateIndexes, paramList->ParameterValues[i]); } } diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h index 22bb8e07ad..64820a5b68 100644 --- a/src/mesa/shader/prog_statevars.h +++ b/src/mesa/shader/prog_statevars.h @@ -67,6 +67,7 @@ typedef enum gl_state_index_ { STATE_MVP_MATRIX, STATE_TEXTURE_MATRIX, STATE_PROGRAM_MATRIX, + STATE_COLOR_MATRIX, STATE_MATRIX_INVERSE, STATE_MATRIX_TRANSPOSE, STATE_MATRIX_INVTRANS, @@ -108,6 +109,10 @@ typedef enum gl_state_index_ { STATE_POSITION_NORMALIZED, /* normalized light position */ STATE_FOG_PARAMS_OPTIMIZED, /* for faster fog calc */ STATE_SPOT_DIR_NORMALIZED, /* pre-normalized spot dir */ + STATE_PT_SCALE, /**< Pixel transfer RGBA scale */ + STATE_PT_BIAS, /**< Pixel transfer RGBA bias */ + STATE_PCM_SCALE, /**< Post color matrix RGBA scale */ + STATE_PCM_BIAS, /**< Post color matrix RGBA bias */ STATE_INTERNAL_DRIVER /* first available state index for drivers (must be last) */ } gl_state_index; diff --git a/src/mesa/shader/prog_uniform.c b/src/mesa/shader/prog_uniform.c new file mode 100644 index 0000000000..20e004b350 --- /dev/null +++ b/src/mesa/shader/prog_uniform.c @@ -0,0 +1,157 @@ +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file prog_uniform.c + * Shader uniform functions. + * \author Brian Paul + */ + +#include "main/imports.h" +#include "main/mtypes.h" +#include "prog_uniform.h" + + +struct gl_uniform_list * +_mesa_new_uniform_list(void) +{ + return CALLOC_STRUCT(gl_uniform_list); +} + + +void +_mesa_free_uniform_list(struct gl_uniform_list *list) +{ + GLuint i; + for (i = 0; i < list->NumUniforms; i++) { + _mesa_free((void *) list->Uniforms[i].Name); + } + _mesa_free(list->Uniforms); + _mesa_free(list); +} + + +GLboolean +_mesa_append_uniform(struct gl_uniform_list *list, + const char *name, GLenum target, GLuint progPos) +{ + const GLuint oldNum = list->NumUniforms; + GLint index; + + assert(target == GL_VERTEX_PROGRAM_ARB || + target == GL_FRAGMENT_PROGRAM_ARB); + + index = _mesa_lookup_uniform(list, name); + if (index < 0) { + /* not found - append to list */ + + if (oldNum + 1 > list->Size) { + /* Need to grow the list array (alloc some extra) */ + list->Size += 4; + + /* realloc arrays */ + list->Uniforms = (struct gl_uniform *) + _mesa_realloc(list->Uniforms, + oldNum * sizeof(struct gl_uniform), + list->Size * sizeof(struct gl_uniform)); + } + + if (!list->Uniforms) { + /* out of memory */ + list->NumUniforms = 0; + list->Size = 0; + return GL_FALSE; + } + + list->Uniforms[oldNum].Name = _mesa_strdup(name); + list->Uniforms[oldNum].VertPos = -1; + list->Uniforms[oldNum].FragPos = -1; + index = oldNum; + list->NumUniforms++; + } + + /* update position for the vertex or fragment program */ + if (target == GL_VERTEX_PROGRAM_ARB) { + if (list->Uniforms[index].VertPos != -1) { + /* this uniform is already in the list - that shouldn't happen */ + return GL_FALSE; + } + list->Uniforms[index].VertPos = progPos; + } + else { + if (list->Uniforms[index].FragPos != -1) { + /* this uniform is already in the list - that shouldn't happen */ + return GL_FALSE; + } + list->Uniforms[index].FragPos = progPos; + } + + return GL_TRUE; +} + + +/** + * Return the location/index of the named uniform in the uniform list, + * or -1 if not found. + */ +GLint +_mesa_lookup_uniform(const struct gl_uniform_list *list, const char *name) +{ + GLuint i; + for (i = 0; i < list->NumUniforms; i++) { + if (!_mesa_strcmp(list->Uniforms[i].Name, name)) { + return i; + } + } + return -1; +} + + +GLint +_mesa_longest_uniform_name(const struct gl_uniform_list *list) +{ + GLint max = 0; + GLuint i; + for (i = 0; i < list->NumUniforms; i++) { + GLuint len = _mesa_strlen(list->Uniforms[i].Name); + if (len > max) + max = len; + } + return max; +} + + +void +_mesa_print_uniforms(const struct gl_uniform_list *list) +{ + GLuint i; + printf("Uniform list %p:\n", (void *) list); + for (i = 0; i < list->NumUniforms; i++) { + printf("%d: %s %d %d\n", + i, + list->Uniforms[i].Name, + list->Uniforms[i].VertPos, + list->Uniforms[i].FragPos); + } +} diff --git a/src/mesa/shader/prog_uniform.h b/src/mesa/shader/prog_uniform.h new file mode 100644 index 0000000000..735de28705 --- /dev/null +++ b/src/mesa/shader/prog_uniform.h @@ -0,0 +1,91 @@ +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file prog_uniform.c + * Shader uniform functions. + * \author Brian Paul + */ + +#ifndef PROG_UNIFORM_H +#define PROG_UNIFORM_H + +#include "main/mtypes.h" +#include "prog_statevars.h" + + +/** + * Shader program uniform variable. + * The glGetUniformLocation() and glUniform() commands will use this + * information. + * Note that a uniform such as "binormal" might be used in both the + * vertex shader and the fragment shader. When glUniform() is called to + * set the uniform's value, it must be updated in both the vertex and + * fragment shaders. The uniform may be in different locations in the + * two shaders so we keep track of that here. + */ +struct gl_uniform +{ + const char *Name; /**< Null-terminated string */ + GLint VertPos; + GLint FragPos; +#if 0 + GLenum DataType; /**< GL_FLOAT, GL_FLOAT_VEC2, etc */ + GLuint Size; /**< Number of components (1..4) */ +#endif +}; + + +/** + * List of gl_uniforms + */ +struct gl_uniform_list +{ + GLuint Size; /**< allocated size of Uniforms array */ + GLuint NumUniforms; /**< number of uniforms in the array */ + struct gl_uniform *Uniforms; /**< Array [Size] */ +}; + + +extern struct gl_uniform_list * +_mesa_new_uniform_list(void); + +extern void +_mesa_free_uniform_list(struct gl_uniform_list *list); + +extern GLboolean +_mesa_append_uniform(struct gl_uniform_list *list, + const char *name, GLenum target, GLuint progPos); + +extern GLint +_mesa_lookup_uniform(const struct gl_uniform_list *list, const char *name); + +extern GLint +_mesa_longest_uniform_name(const struct gl_uniform_list *list); + +extern void +_mesa_print_uniforms(const struct gl_uniform_list *list); + + +#endif /* PROG_UNIFORM_H */ diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c index c539b52720..7048770cf7 100644 --- a/src/mesa/shader/program.c +++ b/src/mesa/shader/program.c @@ -59,9 +59,9 @@ _mesa_init_program(GLcontext *ctx) ctx->VertexProgram.Enabled = GL_FALSE; ctx->VertexProgram.PointSizeEnabled = GL_FALSE; ctx->VertexProgram.TwoSideEnabled = GL_FALSE; - ctx->VertexProgram.Current = (struct gl_vertex_program *) ctx->Shared->DefaultVertexProgram; + _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, + ctx->Shared->DefaultVertexProgram); assert(ctx->VertexProgram.Current); - ctx->VertexProgram.Current->Base.RefCount++; for (i = 0; i < MAX_NV_VERTEX_PROGRAM_PARAMS / 4; i++) { ctx->VertexProgram.TrackMatrix[i] = GL_NONE; ctx->VertexProgram.TrackMatrixTransform[i] = GL_IDENTITY_NV; @@ -70,9 +70,9 @@ _mesa_init_program(GLcontext *ctx) #if FEATURE_NV_fragment_program || FEATURE_ARB_fragment_program ctx->FragmentProgram.Enabled = GL_FALSE; - ctx->FragmentProgram.Current = (struct gl_fragment_program *) ctx->Shared->DefaultFragmentProgram; + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, + ctx->Shared->DefaultFragmentProgram); assert(ctx->FragmentProgram.Current); - ctx->FragmentProgram.Current->Base.RefCount++; #endif /* XXX probably move this stuff */ @@ -92,18 +92,10 @@ void _mesa_free_program_data(GLcontext *ctx) { #if FEATURE_NV_vertex_program || FEATURE_ARB_vertex_program - if (ctx->VertexProgram.Current) { - ctx->VertexProgram.Current->Base.RefCount--; - if (ctx->VertexProgram.Current->Base.RefCount <= 0) - ctx->Driver.DeleteProgram(ctx, &(ctx->VertexProgram.Current->Base)); - } + _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, NULL); #endif #if FEATURE_NV_fragment_program || FEATURE_ARB_fragment_program - if (ctx->FragmentProgram.Current) { - ctx->FragmentProgram.Current->Base.RefCount--; - if (ctx->FragmentProgram.Current->Base.RefCount <= 0) - ctx->Driver.DeleteProgram(ctx, &(ctx->FragmentProgram.Current->Base)); - } + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL); #endif /* XXX probably move this stuff */ #if FEATURE_ATI_fragment_shader @@ -127,25 +119,17 @@ void _mesa_update_default_objects_program(GLcontext *ctx) { #if FEATURE_NV_vertex_program || FEATURE_ARB_vertex_program - if (ctx->VertexProgram.Current) { - ctx->VertexProgram.Current->Base.RefCount--; - if (ctx->VertexProgram.Current->Base.RefCount <= 0) - ctx->Driver.DeleteProgram(ctx, &(ctx->VertexProgram.Current->Base)); - } - ctx->VertexProgram.Current = (struct gl_vertex_program *) ctx->Shared->DefaultVertexProgram; + _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, + (struct gl_vertex_program *) + ctx->Shared->DefaultVertexProgram); assert(ctx->VertexProgram.Current); - ctx->VertexProgram.Current->Base.RefCount++; #endif #if FEATURE_NV_fragment_program || FEATURE_ARB_fragment_program - if (ctx->FragmentProgram.Current) { - ctx->FragmentProgram.Current->Base.RefCount--; - if (ctx->FragmentProgram.Current->Base.RefCount <= 0) - ctx->Driver.DeleteProgram(ctx, &(ctx->FragmentProgram.Current->Base)); - } - ctx->FragmentProgram.Current = (struct gl_fragment_program *) ctx->Shared->DefaultFragmentProgram; + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, + (struct gl_fragment_program *) + ctx->Shared->DefaultFragmentProgram); assert(ctx->FragmentProgram.Current); - ctx->FragmentProgram.Current->Base.RefCount++; #endif /* XXX probably move this stuff */ @@ -230,11 +214,17 @@ _mesa_init_program_struct( GLcontext *ctx, struct gl_program *prog, { (void) ctx; if (prog) { + GLuint i; + _mesa_bzero(prog, sizeof(*prog)); prog->Id = id; prog->Target = target; prog->Resident = GL_TRUE; prog->RefCount = 1; prog->Format = GL_PROGRAM_FORMAT_ASCII_ARB; + + /* default mapping from samplers to texture units */ + for (i = 0; i < MAX_SAMPLERS; i++) + prog->SamplerUnits[i] = i; } return prog; @@ -284,19 +274,23 @@ _mesa_init_vertex_program( GLcontext *ctx, struct gl_vertex_program *prog, struct gl_program * _mesa_new_program(GLcontext *ctx, GLenum target, GLuint id) { + struct gl_program *prog; switch (target) { case GL_VERTEX_PROGRAM_ARB: /* == GL_VERTEX_PROGRAM_NV */ - return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program), + prog = _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program), target, id ); + break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: - return _mesa_init_fragment_program(ctx, + prog =_mesa_init_fragment_program(ctx, CALLOC_STRUCT(gl_fragment_program), target, id ); + break; default: _mesa_problem(ctx, "bad target in _mesa_new_program"); - return NULL; + prog = NULL; } + return prog; } @@ -311,6 +305,7 @@ _mesa_delete_program(GLcontext *ctx, struct gl_program *prog) { (void) ctx; ASSERT(prog); + ASSERT(prog->RefCount==0); if (prog == &_mesa_DummyProgram) return; @@ -318,16 +313,7 @@ _mesa_delete_program(GLcontext *ctx, struct gl_program *prog) if (prog->String) _mesa_free(prog->String); - if (prog->Instructions) { - GLuint i; - for (i = 0; i < prog->NumInstructions; i++) { - if (prog->Instructions[i].Data) - _mesa_free(prog->Instructions[i].Data); - if (prog->Instructions[i].Comment) - _mesa_free((char *) prog->Instructions[i].Comment); - } - _mesa_free(prog->Instructions); - } + _mesa_free_instructions(prog->Instructions, prog->NumInstructions); if (prog->Parameters) { _mesa_free_parameter_list(prog->Parameters); @@ -366,6 +352,63 @@ _mesa_lookup_program(GLcontext *ctx, GLuint id) /** + * Reference counting for vertex/fragment programs + */ +void +_mesa_reference_program(GLcontext *ctx, + struct gl_program **ptr, + struct gl_program *prog) +{ + assert(ptr); + if (*ptr && prog) { + /* sanity check */ + ASSERT((*ptr)->Target == prog->Target); + } + if (*ptr == prog) { + return; /* no change */ + } + if (*ptr) { + GLboolean deleteFlag; + + /*_glthread_LOCK_MUTEX((*ptr)->Mutex);*/ +#if 0 + printf("Program %p ID=%u Target=%s Refcount-- to %d\n", + *ptr, (*ptr)->Id, + ((*ptr)->Target == GL_VERTEX_PROGRAM_ARB ? "VP" : "FP"), + (*ptr)->RefCount - 1); +#endif + ASSERT((*ptr)->RefCount > 0); + (*ptr)->RefCount--; + + deleteFlag = ((*ptr)->RefCount == 0); + /*_glthread_UNLOCK_MUTEX((*ptr)->Mutex);*/ + + if (deleteFlag) { + ASSERT(ctx); + ctx->Driver.DeleteProgram(ctx, *ptr); + } + + *ptr = NULL; + } + + assert(!*ptr); + if (prog) { + /*_glthread_LOCK_MUTEX(prog->Mutex);*/ + prog->RefCount++; +#if 0 + printf("Program %p ID=%u Target=%s Refcount++ to %d\n", + prog, prog->Id, + (prog->Target == GL_VERTEX_PROGRAM_ARB ? "VP" : "FP"), + prog->RefCount); +#endif + /*_glthread_UNLOCK_MUTEX(prog->Mutex);*/ + } + + *ptr = prog; +} + + +/** * Return a copy of a program. * XXX Problem here if the program object is actually OO-derivation * made by a device driver. @@ -380,18 +423,20 @@ _mesa_clone_program(GLcontext *ctx, const struct gl_program *prog) return NULL; assert(clone->Target == prog->Target); + assert(clone->RefCount == 1); + clone->String = (GLubyte *) _mesa_strdup((char *) prog->String); - clone->RefCount = 1; clone->Format = prog->Format; clone->Instructions = _mesa_alloc_instructions(prog->NumInstructions); if (!clone->Instructions) { - ctx->Driver.DeleteProgram(ctx, clone); + _mesa_reference_program(ctx, &clone, NULL); return NULL; } _mesa_copy_instructions(clone->Instructions, prog->Instructions, prog->NumInstructions); clone->InputsRead = prog->InputsRead; clone->OutputsWritten = prog->OutputsWritten; + clone->SamplersUsed = prog->SamplersUsed; memcpy(clone->TexturesUsed, prog->TexturesUsed, sizeof(prog->TexturesUsed)); if (prog->Parameters) @@ -445,6 +490,236 @@ _mesa_clone_program(GLcontext *ctx, const struct gl_program *prog) } +/** + * Insert 'count' NOP instructions at 'start' in the given program. + * Adjust branch targets accordingly. + */ +GLboolean +_mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count) +{ + const GLuint origLen = prog->NumInstructions; + const GLuint newLen = origLen + count; + struct prog_instruction *newInst; + GLuint i; + + /* adjust branches */ + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + if (inst->BranchTarget > 0) { + if (inst->BranchTarget >= start) { + inst->BranchTarget += count; + } + } + } + + /* Alloc storage for new instructions */ + newInst = _mesa_alloc_instructions(newLen); + if (!newInst) { + return GL_FALSE; + } + + /* Copy 'start' instructions into new instruction buffer */ + _mesa_copy_instructions(newInst, prog->Instructions, start); + + /* init the new instructions */ + _mesa_init_instructions(newInst + start, count); + + /* Copy the remaining/tail instructions to new inst buffer */ + _mesa_copy_instructions(newInst + start + count, + prog->Instructions + start, + origLen - start); + + /* free old instructions */ + _mesa_free_instructions(prog->Instructions, origLen); + + /* install new instructions */ + prog->Instructions = newInst; + prog->NumInstructions = newLen; + + return GL_TRUE; +} + + +/** + * Search instructions for registers that match (oldFile, oldIndex), + * replacing them with (newFile, newIndex). + */ +static void +replace_registers(struct prog_instruction *inst, GLuint numInst, + GLuint oldFile, GLuint oldIndex, + GLuint newFile, GLuint newIndex) +{ + GLuint i, j; + for (i = 0; i < numInst; i++) { + /* src regs */ + for (j = 0; j < _mesa_num_inst_src_regs(inst->Opcode); j++) { + if (inst[i].SrcReg[j].File == oldFile && + inst[i].SrcReg[j].Index == oldIndex) { + inst[i].SrcReg[j].File = newFile; + inst[i].SrcReg[j].Index = newIndex; + } + } + /* dst reg */ + if (inst[i].DstReg.File == oldFile && inst[i].DstReg.Index == oldIndex) { + inst[i].DstReg.File = newFile; + inst[i].DstReg.Index = newIndex; + } + } +} + + +/** + * Search instructions for references to program parameters. When found, + * increment the parameter index by 'offset'. + * Used when combining programs. + */ +static void +adjust_param_indexes(struct prog_instruction *inst, GLuint numInst, + GLuint offset) +{ + GLuint i, j; + for (i = 0; i < numInst; i++) { + for (j = 0; j < _mesa_num_inst_src_regs(inst->Opcode); j++) { + GLuint f = inst[i].SrcReg[j].File; + if (f == PROGRAM_CONSTANT || + f == PROGRAM_UNIFORM || + f == PROGRAM_STATE_VAR) { + inst[i].SrcReg[j].Index += offset; + } + } + } +} + + +/** + * Combine two programs into one. Fix instructions so the outputs of + * the first program go to the inputs of the second program. + */ +struct gl_program * +_mesa_combine_programs(GLcontext *ctx, + const struct gl_program *progA, + const struct gl_program *progB) +{ + struct prog_instruction *newInst; + struct gl_program *newProg; + const GLuint lenA = progA->NumInstructions - 1; /* omit END instr */ + const GLuint lenB = progB->NumInstructions; + const GLuint numParamsA = _mesa_num_parameters(progA->Parameters); + const GLuint newLength = lenA + lenB; + GLbitfield inputsB; + GLuint i; + + ASSERT(progA->Target == progB->Target); + + newInst = _mesa_alloc_instructions(newLength); + if (!newInst) + return GL_FALSE; + + _mesa_copy_instructions(newInst, progA->Instructions, lenA); + _mesa_copy_instructions(newInst + lenA, progB->Instructions, lenB); + + /* adjust branch / instruction addresses for B's instructions */ + for (i = 0; i < lenB; i++) { + newInst[lenA + i].BranchTarget += lenA; + } + + newProg = ctx->Driver.NewProgram(ctx, progA->Target, 0); + newProg->Instructions = newInst; + newProg->NumInstructions = newLength; + + if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprogA, *fprogB, *newFprog; + fprogA = (struct gl_fragment_program *) progA; + fprogB = (struct gl_fragment_program *) progB; + newFprog = (struct gl_fragment_program *) newProg; + + newFprog->UsesKill = fprogA->UsesKill || fprogB->UsesKill; + + /* Connect color outputs of fprogA to color inputs of fprogB, via a + * new temporary register. + */ + if ((progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) && + (progB->InputsRead & (1 << FRAG_ATTRIB_COL0))) { + GLint tempReg = _mesa_find_free_register(newProg, PROGRAM_TEMPORARY); + if (tempReg < 0) { + _mesa_problem(ctx, "No free temp regs found in " + "_mesa_combine_programs(), using 31"); + tempReg = 31; + } + /* replace writes to result.color[0] with tempReg */ + replace_registers(newInst, lenA, + PROGRAM_OUTPUT, FRAG_RESULT_COLR, + PROGRAM_TEMPORARY, tempReg); + /* replace reads from input.color[0] with tempReg */ + replace_registers(newInst + lenA, lenB, + PROGRAM_INPUT, FRAG_ATTRIB_COL0, + PROGRAM_TEMPORARY, tempReg); + } + + inputsB = progB->InputsRead; + if (progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) { + inputsB &= ~(1 << FRAG_ATTRIB_COL0); + } + newProg->InputsRead = progA->InputsRead | inputsB; + newProg->OutputsWritten = progB->OutputsWritten; + newProg->SamplersUsed = progA->SamplersUsed | progB->SamplersUsed; + } + else { + /* vertex program */ + assert(0); /* XXX todo */ + } + + /* + * Merge parameters (uniforms, constants, etc) + */ + newProg->Parameters = _mesa_combine_parameter_lists(progA->Parameters, + progB->Parameters); + + adjust_param_indexes(newInst + lenA, lenB, numParamsA); + + + return newProg; +} + + + + +/** + * Scan the given program to find a free register of the given type. + * \param regFile - PROGRAM_INPUT, PROGRAM_OUTPUT or PROGRAM_TEMPORARY + */ +GLint +_mesa_find_free_register(const struct gl_program *prog, GLuint regFile) +{ + GLboolean used[MAX_PROGRAM_TEMPS]; + GLuint i, k; + + assert(regFile == PROGRAM_INPUT || + regFile == PROGRAM_OUTPUT || + regFile == PROGRAM_TEMPORARY); + + _mesa_memset(used, 0, sizeof(used)); + + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); + + for (k = 0; k < n; k++) { + if (inst->SrcReg[k].File == regFile) { + used[inst->SrcReg[k].Index] = GL_TRUE; + } + } + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!used[i]) + return i; + } + + return -1; +} + + /** * Mixing ARB and NV vertex/fragment programs can be tricky. @@ -513,9 +788,9 @@ _mesa_BindProgram(GLenum target, GLuint id) /* Bind a default program */ newProg = NULL; if (target == GL_VERTEX_PROGRAM_ARB) /* == GL_VERTEX_PROGRAM_NV */ - newProg = ctx->Shared->DefaultVertexProgram; + newProg = &ctx->Shared->DefaultVertexProgram->Base; else - newProg = ctx->Shared->DefaultFragmentProgram; + newProg = &ctx->Shared->DefaultFragmentProgram->Base; } else { /* Bind a user program */ @@ -543,26 +818,16 @@ _mesa_BindProgram(GLenum target, GLuint id) return; } - /* unbind/delete oldProg */ - if (curProg->Id != 0) { - /* decrement refcount on previously bound fragment program */ - curProg->RefCount--; - /* and delete if refcount goes below one */ - if (curProg->RefCount <= 0) { - /* the program ID was already removed from the hash table */ - ctx->Driver.DeleteProgram(ctx, curProg); - } - } - /* bind newProg */ if (target == GL_VERTEX_PROGRAM_ARB) { /* == GL_VERTEX_PROGRAM_NV */ - ctx->VertexProgram.Current = (struct gl_vertex_program *) newProg; + _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, + (struct gl_vertex_program *) newProg); } else if (target == GL_FRAGMENT_PROGRAM_NV || target == GL_FRAGMENT_PROGRAM_ARB) { - ctx->FragmentProgram.Current = (struct gl_fragment_program *) newProg; + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, + (struct gl_fragment_program *) newProg); } - newProg->RefCount++; /* Never null pointers */ ASSERT(ctx->VertexProgram.Current); @@ -620,10 +885,7 @@ _mesa_DeletePrograms(GLsizei n, const GLuint *ids) } /* The ID is immediately available for re-use now */ _mesa_HashRemove(ctx->Shared->Programs, ids[i]); - prog->RefCount--; - if (prog->RefCount <= 0) { - ctx->Driver.DeleteProgram(ctx, prog); - } + _mesa_reference_program(ctx, &prog, NULL); } } } diff --git a/src/mesa/shader/program.h b/src/mesa/shader/program.h index ea2c8c3050..f1a69a2c01 100644 --- a/src/mesa/shader/program.h +++ b/src/mesa/shader/program.h @@ -86,10 +86,43 @@ _mesa_delete_program(GLcontext *ctx, struct gl_program *prog); extern struct gl_program * _mesa_lookup_program(GLcontext *ctx, GLuint id); +extern void +_mesa_reference_program(GLcontext *ctx, + struct gl_program **ptr, + struct gl_program *prog); + +static INLINE void +_mesa_reference_vertprog(GLcontext *ctx, + struct gl_vertex_program **ptr, + struct gl_vertex_program *prog) +{ + _mesa_reference_program(ctx, (struct gl_program **) ptr, + (struct gl_program *) prog); +} + +static INLINE void +_mesa_reference_fragprog(GLcontext *ctx, + struct gl_fragment_program **ptr, + struct gl_fragment_program *prog) +{ + _mesa_reference_program(ctx, (struct gl_program **) ptr, + (struct gl_program *) prog); +} extern struct gl_program * _mesa_clone_program(GLcontext *ctx, const struct gl_program *prog); +extern GLboolean +_mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count); + +extern struct gl_program * +_mesa_combine_programs(GLcontext *ctx, + const struct gl_program *progA, + const struct gl_program *progB); + +extern GLint +_mesa_find_free_register(const struct gl_program *prog, GLuint regFile); + /* * API functions common to ARB/NV_vertex/fragment_program diff --git a/src/mesa/shader/programopt.c b/src/mesa/shader/programopt.c index fc5b0497fe..7d560c74a5 100644 --- a/src/mesa/shader/programopt.c +++ b/src/mesa/shader/programopt.c @@ -35,6 +35,7 @@ #include "context.h" #include "prog_parameter.h" #include "prog_statevars.h" +#include "program.h" #include "programopt.h" #include "prog_instruction.h" @@ -102,7 +103,7 @@ _mesa_insert_mvp_code(GLcontext *ctx, struct gl_vertex_program *vprog) _mesa_copy_instructions (newInst + 4, vprog->Base.Instructions, origLen); /* free old instructions */ - _mesa_free(vprog->Base.Instructions); + _mesa_free_instructions(vprog->Base.Instructions, origLen); /* install new instructions */ vprog->Base.Instructions = newInst; @@ -192,13 +193,13 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) inst->DstReg.WriteMask = WRITEMASK_X; inst->SrcReg[0].File = PROGRAM_INPUT; inst->SrcReg[0].Index = FRAG_ATTRIB_FOGC; - inst->SrcReg[0].Swizzle = SWIZZLE_X; + inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; inst->SrcReg[1].File = PROGRAM_STATE_VAR; inst->SrcReg[1].Index = fogPRefOpt; - inst->SrcReg[1].Swizzle = SWIZZLE_X; + inst->SrcReg[1].Swizzle = SWIZZLE_XXXX; inst->SrcReg[2].File = PROGRAM_STATE_VAR; inst->SrcReg[2].Index = fogPRefOpt; - inst->SrcReg[2].Swizzle = SWIZZLE_Y; + inst->SrcReg[2].Swizzle = SWIZZLE_YYYY; inst->SaturateMode = SATURATE_ZERO_ONE; inst++; } @@ -214,10 +215,10 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) inst->SrcReg[0].File = PROGRAM_STATE_VAR; inst->SrcReg[0].Index = fogPRefOpt; inst->SrcReg[0].Swizzle - = (fprog->FogOption == GL_EXP) ? SWIZZLE_Z : SWIZZLE_W; + = (fprog->FogOption == GL_EXP) ? SWIZZLE_ZZZZ : SWIZZLE_WWWW; inst->SrcReg[1].File = PROGRAM_INPUT; inst->SrcReg[1].Index = FRAG_ATTRIB_FOGC; - inst->SrcReg[1].Swizzle = SWIZZLE_X; + inst->SrcReg[1].Swizzle = SWIZZLE_XXXX; inst++; if (fprog->FogOption == GL_EXP2) { /* MUL fogFactorTemp.x, fogFactorTemp.x, fogFactorTemp.x; */ @@ -227,10 +228,10 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) inst->DstReg.WriteMask = WRITEMASK_X; inst->SrcReg[0].File = PROGRAM_TEMPORARY; inst->SrcReg[0].Index = fogFactorTemp; - inst->SrcReg[0].Swizzle = SWIZZLE_X; + inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; inst->SrcReg[1].File = PROGRAM_TEMPORARY; inst->SrcReg[1].Index = fogFactorTemp; - inst->SrcReg[1].Swizzle = SWIZZLE_X; + inst->SrcReg[1].Swizzle = SWIZZLE_XXXX; inst++; } /* EX2_SAT fogFactorTemp.x, -fogFactorTemp.x; */ @@ -240,8 +241,8 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) inst->DstReg.WriteMask = WRITEMASK_X; inst->SrcReg[0].File = PROGRAM_TEMPORARY; inst->SrcReg[0].Index = fogFactorTemp; - inst->SrcReg[0].NegateBase = GL_TRUE; - inst->SrcReg[0].Swizzle = SWIZZLE_X; + inst->SrcReg[0].NegateBase = NEGATE_XYZW; + inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; inst->SaturateMode = SATURATE_ZERO_ONE; inst++; } @@ -252,8 +253,7 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) inst->DstReg.WriteMask = WRITEMASK_XYZ; inst->SrcReg[0].File = PROGRAM_TEMPORARY; inst->SrcReg[0].Index = fogFactorTemp; - inst->SrcReg[0].Swizzle - = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); + inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; inst->SrcReg[1].File = PROGRAM_TEMPORARY; inst->SrcReg[1].Index = colorTemp; inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; @@ -275,7 +275,7 @@ _mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) inst++; /* free old instructions */ - _mesa_free(fprog->Base.Instructions); + _mesa_free_instructions(fprog->Base.Instructions, origLen); /* install new instructions */ fprog->Base.Instructions = newInst; @@ -365,3 +365,94 @@ _mesa_count_texture_instructions(struct gl_program *prog) } } + +/** + * Scan/rewrite program to remove reads of varying (output) registers. + * In GLSL vertex shaders, varying vars can be read and written. + * Normally, vertex varying vars are implemented as output registers. + * On some hardware, trying to read an output register causes trouble. + * So, rewrite the program to use a temporary register in this case. + */ +void +_mesa_remove_varying_reads(struct gl_program *prog) +{ + GLuint i; + GLint outputMap[VERT_RESULT_MAX]; + GLuint numVaryingReads = 0; + + assert(prog->Target == GL_VERTEX_PROGRAM_ARB); + + for (i = 0; i < VERT_RESULT_MAX; i++) + outputMap[i] = -1; + + /* look for instructions which read from varying vars */ + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_VARYING) { + /* replace the read with a temp reg */ + const GLuint var = inst->SrcReg[j].Index; + if (outputMap[var] == -1) { + numVaryingReads++; + outputMap[var] = _mesa_find_free_register(prog, + PROGRAM_TEMPORARY); + } + inst->SrcReg[j].File = PROGRAM_TEMPORARY; + inst->SrcReg[j].Index = outputMap[var]; + } + } + } + + if (numVaryingReads == 0) + return; /* nothing to be done */ + + /* look for instructions which write to the varying vars identified above */ + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->DstReg.File == PROGRAM_VARYING && + outputMap[inst->DstReg.Index] >= 0) { + /* change inst to write to the temp reg, instead of the varying */ + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = outputMap[inst->DstReg.Index]; + } + } + } + + /* insert new instructions to copy the temp vars to the varying vars */ + { + struct prog_instruction *inst; + GLint endPos, var; + + /* Look for END instruction and insert the new varying writes */ + endPos = -1; + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + if (inst->Opcode == OPCODE_END) { + endPos = i; + _mesa_insert_instructions(prog, i, numVaryingReads); + break; + } + } + + assert(endPos >= 0); + + /* insert new MOV instructions here */ + inst = prog->Instructions + endPos; + for (var = 0; var < VERT_RESULT_MAX; var++) { + if (outputMap[var] >= 0) { + /* MOV VAR[var], TEMP[tmp]; */ + inst->Opcode = OPCODE_MOV; + inst->DstReg.File = PROGRAM_VARYING; + inst->DstReg.Index = var; + inst->SrcReg[0].File = PROGRAM_TEMPORARY; + inst->SrcReg[0].Index = outputMap[var]; + inst++; + } + } + } +} diff --git a/src/mesa/shader/programopt.h b/src/mesa/shader/programopt.h index ce63644bbf..47ff2f0c7b 100644 --- a/src/mesa/shader/programopt.h +++ b/src/mesa/shader/programopt.h @@ -39,5 +39,7 @@ _mesa_count_texture_indirections(struct gl_program *prog); extern void _mesa_count_texture_instructions(struct gl_program *prog); +extern void +_mesa_remove_varying_reads(struct gl_program *prog); #endif /* PROGRAMOPT_H */ diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index b0f79c29c1..0bb96a0d6c 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -43,6 +43,7 @@ #include "prog_parameter.h" #include "prog_print.h" #include "prog_statevars.h" +#include "prog_uniform.h" #include "shader/shader_api.h" #include "shader/slang/slang_compile.h" #include "shader/slang/slang_link.h" @@ -52,7 +53,7 @@ /** * Allocate a new gl_shader_program object, initialize it. */ -struct gl_shader_program * +static struct gl_shader_program * _mesa_new_shader_program(GLcontext *ctx, GLuint name) { struct gl_shader_program *shProg; @@ -74,26 +75,11 @@ void _mesa_clear_shader_program_data(GLcontext *ctx, struct gl_shader_program *shProg) { - if (shProg->VertexProgram) { - if (shProg->VertexProgram->Base.Parameters == shProg->Uniforms) { - /* to prevent a double-free in the next call */ - shProg->VertexProgram->Base.Parameters = NULL; - } - ctx->Driver.DeleteProgram(ctx, &shProg->VertexProgram->Base); - shProg->VertexProgram = NULL; - } - - if (shProg->FragmentProgram) { - if (shProg->FragmentProgram->Base.Parameters == shProg->Uniforms) { - /* to prevent a double-free in the next call */ - shProg->FragmentProgram->Base.Parameters = NULL; - } - ctx->Driver.DeleteProgram(ctx, &shProg->FragmentProgram->Base); - shProg->FragmentProgram = NULL; - } + _mesa_reference_vertprog(ctx, &shProg->VertexProgram, NULL); + _mesa_reference_fragprog(ctx, &shProg->FragmentProgram, NULL); if (shProg->Uniforms) { - _mesa_free_parameter_list(shProg->Uniforms); + _mesa_free_uniform_list(shProg->Uniforms); shProg->Uniforms = NULL; } @@ -177,8 +163,10 @@ _mesa_reference_shader_program(GLcontext *ctx, ASSERT(old->RefCount > 0); old->RefCount--; - /*printf("SHPROG DECR %p (%d) to %d\n", - (void*) old, old->Name, old->RefCount);*/ +#if 0 + printf("ShaderProgram %p ID=%u RefCount-- to %d\n", + (void *) old, old->Name, old->RefCount); +#endif deleteFlag = (old->RefCount == 0); if (deleteFlag) { @@ -192,8 +180,10 @@ _mesa_reference_shader_program(GLcontext *ctx, if (shProg) { shProg->RefCount++; - /*printf("SHPROG INCR %p (%d) to %d\n", - (void*) shProg, shProg->Name, shProg->RefCount);*/ +#if 0 + printf("ShaderProgram %p ID=%u RefCount++ to %d\n", + (void *) shProg, shProg->Name, shProg->RefCount); +#endif *ptr = shProg; } } @@ -248,10 +238,8 @@ _mesa_free_shader(GLcontext *ctx, struct gl_shader *sh) _mesa_free((void *) sh->Source); if (sh->InfoLog) _mesa_free(sh->InfoLog); - for (i = 0; i < sh->NumPrograms; i++) { - assert(sh->Programs[i]); - ctx->Driver.DeleteProgram(ctx, sh->Programs[i]); - } + for (i = 0; i < sh->NumPrograms; i++) + _mesa_reference_program(ctx, &sh->Programs[i], NULL); if (sh->Programs) _mesa_free(sh->Programs); _mesa_free(sh); @@ -373,57 +361,9 @@ copy_string(GLchar *dst, GLsizei maxLength, GLsizei *length, const GLchar *src) /** - * Return size (in floats) of the given GLSL type. - * See also _slang_sizeof_type_specifier(). - */ -static GLint -sizeof_glsl_type(GLenum type) -{ - switch (type) { - case GL_BOOL: - case GL_FLOAT: - case GL_INT: - return 1; - case GL_BOOL_VEC2: - case GL_FLOAT_VEC2: - case GL_INT_VEC2: - return 2; - case GL_BOOL_VEC3: - case GL_FLOAT_VEC3: - case GL_INT_VEC3: - return 3; - case GL_BOOL_VEC4: - case GL_FLOAT_VEC4: - case GL_INT_VEC4: - return 4; - case GL_FLOAT_MAT2: - return 8; /* 2 rows of 4, actually */ - case GL_FLOAT_MAT3: - return 12; /* 3 rows of 4, actually */ - case GL_FLOAT_MAT4: - return 16; - case GL_FLOAT_MAT2x3: - return 8; /* 2 rows of 4, actually */ - case GL_FLOAT_MAT2x4: - return 8; - case GL_FLOAT_MAT3x2: - return 12; /* 3 rows of 4, actually */ - case GL_FLOAT_MAT3x4: - return 12; - case GL_FLOAT_MAT4x2: - return 16; /* 4 rows of 4, actually */ - case GL_FLOAT_MAT4x3: - return 16; /* 4 rows of 4, actually */ - default: - return 0; /* error */ - } -} - - -/** * Called via ctx->Driver.AttachShader() */ -void +static void _mesa_attach_shader(GLcontext *ctx, GLuint program, GLuint shader) { struct gl_shader_program *shProg @@ -464,7 +404,38 @@ _mesa_attach_shader(GLcontext *ctx, GLuint program, GLuint shader) } -void +static GLint +_mesa_get_attrib_location(GLcontext *ctx, GLuint program, + const GLchar *name) +{ + struct gl_shader_program *shProg + = _mesa_lookup_shader_program(ctx, program); + + if (!shProg) { + _mesa_error(ctx, GL_INVALID_VALUE, "glGetAttribLocation"); + return -1; + } + + if (!shProg->LinkStatus) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetAttribLocation(program not linked)"); + return -1; + } + + if (!name) + return -1; + + if (shProg->Attributes) { + GLint i = _mesa_lookup_parameter_index(shProg->Attributes, -1, name); + if (i >= 0) { + return shProg->Attributes->Parameters[i].StateIndexes[0]; + } + } + return -1; +} + + +static void _mesa_bind_attrib_location(GLcontext *ctx, GLuint program, GLuint index, const GLchar *name) { @@ -510,7 +481,7 @@ _mesa_bind_attrib_location(GLcontext *ctx, GLuint program, GLuint index, } -GLuint +static GLuint _mesa_create_shader(GLcontext *ctx, GLenum type) { struct gl_shader *sh; @@ -534,7 +505,7 @@ _mesa_create_shader(GLcontext *ctx, GLenum type) } -GLuint +static GLuint _mesa_create_program(GLcontext *ctx) { GLuint name; @@ -555,7 +526,7 @@ _mesa_create_program(GLcontext *ctx) * Named w/ "2" to indicate OpenGL 2.x vs GL_ARB_fragment_programs's * DeleteProgramARB. */ -void +static void _mesa_delete_program2(GLcontext *ctx, GLuint name) { /* @@ -581,7 +552,7 @@ _mesa_delete_program2(GLcontext *ctx, GLuint name) } -void +static void _mesa_delete_shader(GLcontext *ctx, GLuint shader) { struct gl_shader *sh = _mesa_lookup_shader(ctx, shader); @@ -596,7 +567,7 @@ _mesa_delete_shader(GLcontext *ctx, GLuint shader) } -void +static void _mesa_detach_shader(GLcontext *ctx, GLuint program, GLuint shader) { struct gl_shader_program *shProg @@ -658,7 +629,7 @@ _mesa_detach_shader(GLcontext *ctx, GLuint program, GLuint shader) } -void +static void _mesa_get_active_attrib(GLcontext *ctx, GLuint program, GLuint index, GLsizei maxLength, GLsizei *length, GLint *size, GLenum *type, GLchar *nameOut) @@ -684,64 +655,64 @@ _mesa_get_active_attrib(GLcontext *ctx, GLuint program, GLuint index, shProg->Attributes->Parameters[index].Name); sz = shProg->Attributes->Parameters[index].Size; if (size) - *size = 1; /* attributes may not be arrays */ - if (type && sz > 0 && sz <= 4) /* XXX this is a temporary hack */ - *type = vec_types[sz - 1]; + *size = sz; + if (type) + *type = vec_types[sz]; /* XXX this is a temporary hack */ } /** * Called via ctx->Driver.GetActiveUniform(). */ -void +static void _mesa_get_active_uniform(GLcontext *ctx, GLuint program, GLuint index, GLsizei maxLength, GLsizei *length, GLint *size, GLenum *type, GLchar *nameOut) { - struct gl_shader_program *shProg + const struct gl_shader_program *shProg = _mesa_lookup_shader_program(ctx, program); - GLuint ind, j; + const struct gl_program *prog; + GLint progPos; if (!shProg) { _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveUniform"); return; } - if (!shProg->Uniforms || index >= shProg->Uniforms->NumParameters) { + if (!shProg->Uniforms || index >= shProg->Uniforms->NumUniforms) { _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveUniform(index)"); return; } - ind = 0; - for (j = 0; j < shProg->Uniforms->NumParameters; j++) { - if (shProg->Uniforms->Parameters[j].Type == PROGRAM_UNIFORM || - shProg->Uniforms->Parameters[j].Type == PROGRAM_SAMPLER) { - if (ind == index) { - GLuint uSize = shProg->Uniforms->Parameters[j].Size; - GLenum uType = shProg->Uniforms->Parameters[j].DataType; - /* found it */ - copy_string(nameOut, maxLength, length, - shProg->Uniforms->Parameters[j].Name); - if (size) { - /* convert from floats to 'type' (eg: sizeof(mat4x4)=1) */ - *size = uSize / sizeof_glsl_type(uType); - } - if (type) - *type = uType; - return; - } - ind++; + progPos = shProg->Uniforms->Uniforms[index].VertPos; + if (progPos >= 0) { + prog = &shProg->VertexProgram->Base; + } + else { + progPos = shProg->Uniforms->Uniforms[index].FragPos; + if (progPos >= 0) { + prog = &shProg->FragmentProgram->Base; } } - _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveUniform(index)"); + if (!prog || progPos < 0) + return; /* should never happen */ + + if (nameOut) + copy_string(nameOut, maxLength, length, + prog->Parameters->Parameters[progPos].Name); + if (size) + *size = prog->Parameters->Parameters[progPos].Size; + + if (type) + *type = prog->Parameters->Parameters[progPos].DataType; } /** * Called via ctx->Driver.GetAttachedShaders(). */ -void +static void _mesa_get_attached_shaders(GLcontext *ctx, GLuint program, GLsizei maxCount, GLsizei *count, GLuint *obj) { @@ -761,38 +732,7 @@ _mesa_get_attached_shaders(GLcontext *ctx, GLuint program, GLsizei maxCount, } -GLint -_mesa_get_attrib_location(GLcontext *ctx, GLuint program, - const GLchar *name) -{ - struct gl_shader_program *shProg - = _mesa_lookup_shader_program(ctx, program); - - if (!shProg) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGetAttribLocation"); - return -1; - } - - if (!shProg->LinkStatus) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetAttribLocation(program not linked)"); - return -1; - } - - if (!name) - return -1; - - if (shProg->Attributes) { - GLint i = _mesa_lookup_parameter_index(shProg->Attributes, -1, name); - if (i >= 0) { - return shProg->Attributes->Parameters[i].StateIndexes[0]; - } - } - return -1; -} - - -GLuint +static GLuint _mesa_get_handle(GLcontext *ctx, GLenum pname) { #if 0 @@ -816,7 +756,7 @@ _mesa_get_handle(GLcontext *ctx, GLenum pname) } -void +static void _mesa_get_programiv(GLcontext *ctx, GLuint program, GLenum pname, GLint *params) { @@ -852,14 +792,10 @@ _mesa_get_programiv(GLcontext *ctx, GLuint program, PROGRAM_INPUT) + 1; break; case GL_ACTIVE_UNIFORMS: - *params - = _mesa_num_parameters_of_type(shProg->Uniforms, PROGRAM_UNIFORM) - + _mesa_num_parameters_of_type(shProg->Uniforms, PROGRAM_SAMPLER); + *params = shProg->Uniforms ? shProg->Uniforms->NumUniforms : 0; break; case GL_ACTIVE_UNIFORM_MAX_LENGTH: - *params = MAX2( - _mesa_longest_parameter_name(shProg->Uniforms, PROGRAM_UNIFORM), - _mesa_longest_parameter_name(shProg->Uniforms, PROGRAM_SAMPLER)); + *params = _mesa_longest_uniform_name(shProg->Uniforms); if (*params > 0) (*params)++; /* add one for terminating zero */ break; @@ -870,7 +806,7 @@ _mesa_get_programiv(GLcontext *ctx, GLuint program, } -void +static void _mesa_get_shaderiv(GLcontext *ctx, GLuint name, GLenum pname, GLint *params) { struct gl_shader *shader = _mesa_lookup_shader(ctx, name); @@ -903,7 +839,7 @@ _mesa_get_shaderiv(GLcontext *ctx, GLuint name, GLenum pname, GLint *params) } -void +static void _mesa_get_program_info_log(GLcontext *ctx, GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog) { @@ -917,7 +853,7 @@ _mesa_get_program_info_log(GLcontext *ctx, GLuint program, GLsizei bufSize, } -void +static void _mesa_get_shader_info_log(GLcontext *ctx, GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog) { @@ -933,7 +869,7 @@ _mesa_get_shader_info_log(GLcontext *ctx, GLuint shader, GLsizei bufSize, /** * Called via ctx->Driver.GetShaderSource(). */ -void +static void _mesa_get_shader_source(GLcontext *ctx, GLuint shader, GLsizei maxLength, GLsizei *length, GLchar *sourceOut) { @@ -949,49 +885,34 @@ _mesa_get_shader_source(GLcontext *ctx, GLuint shader, GLsizei maxLength, /** * Called via ctx->Driver.GetUniformfv(). */ -void +static void _mesa_get_uniformfv(GLcontext *ctx, GLuint program, GLint location, GLfloat *params) { struct gl_shader_program *shProg = _mesa_lookup_shader_program(ctx, program); if (shProg) { - GLint i; - if (location >= 0 && location < shProg->Uniforms->NumParameters) { - GLuint uSize; - GLenum uType; - GLint rows = 0; - uType = shProg->Uniforms->Parameters[location].DataType; - uSize = sizeof_glsl_type(uType); - /* Matrix types need special handling, because they span several - * parameters, and may also not be fully packed. - */ - switch (shProg->Uniforms->Parameters[location].DataType) { - case GL_FLOAT_MAT2: - case GL_FLOAT_MAT3x2: - case GL_FLOAT_MAT4x2: - rows = 2; - break; - case GL_FLOAT_MAT2x3: - case GL_FLOAT_MAT3: - case GL_FLOAT_MAT4x3: - rows = 3; - break; - case GL_FLOAT_MAT2x4: - case GL_FLOAT_MAT3x4: - case GL_FLOAT_MAT4: - rows = 4; + if (location < shProg->Uniforms->NumUniforms) { + GLint progPos, i; + const struct gl_program *prog = NULL; + + progPos = shProg->Uniforms->Uniforms[location].VertPos; + if (progPos >= 0) { + prog = &shProg->VertexProgram->Base; } - if (rows != 0) { - GLint r, c; - for (c = 0, i = 0; c * 4 < uSize; c++) - for (r = 0; r < rows; r++, i++) - params[i] = shProg->Uniforms->ParameterValues[location + c][r]; + else { + progPos = shProg->Uniforms->Uniforms[location].FragPos; + if (progPos >= 0) { + prog = &shProg->FragmentProgram->Base; + } } - else - for (i = 0; i < uSize; i++) { - params[i] = shProg->Uniforms->ParameterValues[location][i]; + + ASSERT(prog); + if (prog) { + for (i = 0; i < prog->Parameters->Parameters[progPos].Size; i++) { + params[i] = prog->Parameters->ParameterValues[progPos][i]; } + } } else { _mesa_error(ctx, GL_INVALID_VALUE, "glGetUniformfv(location)"); @@ -1006,32 +927,19 @@ _mesa_get_uniformfv(GLcontext *ctx, GLuint program, GLint location, /** * Called via ctx->Driver.GetUniformLocation(). */ -GLint +static GLint _mesa_get_uniform_location(GLcontext *ctx, GLuint program, const GLchar *name) { struct gl_shader_program *shProg = _mesa_lookup_shader_program(ctx, program); - if (shProg) { - GLuint loc; - for (loc = 0; loc < shProg->Uniforms->NumParameters; loc++) { - const struct gl_program_parameter *u - = shProg->Uniforms->Parameters + loc; - /* XXX this is a temporary simplification / short-cut. - * We need to handle things like "e.c[0].b" as seen in the - * GLSL orange book, page 189. - */ - if ((u->Type == PROGRAM_UNIFORM || - u->Type == PROGRAM_SAMPLER) && !strcmp(u->Name, name)) { - return loc; - } - } - } - return -1; + if (!shProg) + return -1; + return _mesa_lookup_uniform(shProg->Uniforms, name); } -GLboolean +static GLboolean _mesa_is_program(GLcontext *ctx, GLuint name) { struct gl_shader_program *shProg = _mesa_lookup_shader_program(ctx, name); @@ -1039,7 +947,7 @@ _mesa_is_program(GLcontext *ctx, GLuint name) } -GLboolean +static GLboolean _mesa_is_shader(GLcontext *ctx, GLuint name) { struct gl_shader *shader = _mesa_lookup_shader(ctx, name); @@ -1051,7 +959,7 @@ _mesa_is_shader(GLcontext *ctx, GLuint name) /** * Called via ctx->Driver.ShaderSource() */ -void +static void _mesa_shader_source(GLcontext *ctx, GLuint shader, const GLchar *source) { struct gl_shader *sh = _mesa_lookup_shader(ctx, shader); @@ -1072,7 +980,7 @@ _mesa_shader_source(GLcontext *ctx, GLuint shader, const GLchar *source) /** * Called via ctx->Driver.CompileShader() */ -void +static void _mesa_compile_shader(GLcontext *ctx, GLuint shaderObj) { struct gl_shader *sh = _mesa_lookup_shader(ctx, shaderObj); @@ -1089,7 +997,7 @@ _mesa_compile_shader(GLcontext *ctx, GLuint shaderObj) /** * Called via ctx->Driver.LinkProgram() */ -void +static void _mesa_link_program(GLcontext *ctx, GLuint program) { struct gl_shader_program *shProg; @@ -1100,6 +1008,8 @@ _mesa_link_program(GLcontext *ctx, GLuint program) return; } + FLUSH_VERTICES(ctx, _NEW_PROGRAM); + _slang_link(ctx, program, shProg); } @@ -1136,55 +1046,121 @@ _mesa_use_program(GLcontext *ctx, GLuint program) } + /** - * Called via ctx->Driver.Uniform(). + * Update the vertex and fragment program's TexturesUsed arrays. */ -void -_mesa_uniform(GLcontext *ctx, GLint location, GLsizei count, - const GLvoid *values, GLenum type) +static void +update_textures_used(struct gl_program *prog) { - struct gl_shader_program *shProg = ctx->Shader.CurrentProgram; - GLint elems, i, k; - GLenum uType; - GLsizei maxCount; + GLuint s; - if (!shProg || !shProg->LinkStatus) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(program not linked)"); - return; - } + memset(prog->TexturesUsed, 0, sizeof(prog->TexturesUsed)); - if (location == -1) - return; /* The standard specifies this as a no-op */ - - /* The spec says this is GL_INVALID_OPERATION, although it seems like it - * ought to be GL_INVALID_VALUE - */ - if (location < 0 || location >= (GLint) shProg->Uniforms->NumParameters) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(location)"); - return; + for (s = 0; s < MAX_SAMPLERS; s++) { + if (prog->SamplersUsed & (1 << s)) { + GLuint u = prog->SamplerUnits[s]; + GLuint t = prog->SamplerTargets[s]; + assert(u < MAX_TEXTURE_IMAGE_UNITS); + prog->TexturesUsed[u] |= (1 << t); + } } +} - FLUSH_VERTICES(ctx, _NEW_PROGRAM); - uType = shProg->Uniforms->Parameters[location].DataType; - /* - * If we're setting a sampler, we must use glUniformi1()! - */ - if (shProg->Uniforms->Parameters[location].Type == PROGRAM_SAMPLER) { - GLint unit; +/** + * Set the value of a program's uniform variable. + * \param program the program whose uniform to update + * \param location the location/index of the uniform + * \param type the datatype of the uniform + * \param count the number of uniforms to set + * \param elems number of elements per uniform + * \param values the new values + */ +static void +set_program_uniform(GLcontext *ctx, struct gl_program *program, GLint location, + GLenum type, GLint count, GLint elems, const void *values) +{ + if (program->Parameters->Parameters[location].Type == PROGRAM_SAMPLER) { + /* This controls which texture unit which is used by a sampler */ + GLuint texUnit, sampler; + + /* data type for setting samplers must be int */ if (type != GL_INT || count != 1) { _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(only glUniform1i can be used " "to set sampler uniforms)"); return; } + + sampler = (GLuint) program->Parameters->ParameterValues[location][0]; + texUnit = ((GLuint *) values)[0]; + /* check that the sampler (tex unit index) is legal */ - unit = ((GLint *) values)[0]; - if (unit >= ctx->Const.MaxTextureImageUnits) { + if (texUnit >= ctx->Const.MaxTextureImageUnits) { _mesa_error(ctx, GL_INVALID_VALUE, "glUniform1(invalid sampler/tex unit index)"); return; } + + /* This maps a sampler to a texture unit: */ + program->SamplerUnits[sampler] = texUnit; + update_textures_used(program); + + FLUSH_VERTICES(ctx, _NEW_TEXTURE); + } + else { + /* ordinary uniform variable */ + GLint k, i; + + if (count * elems > program->Parameters->Parameters[location].Size) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(count too large)"); + return; + } + + for (k = 0; k < count; k++) { + GLfloat *uniformVal = program->Parameters->ParameterValues[location + k]; + if (type == GL_INT || + type == GL_INT_VEC2 || + type == GL_INT_VEC3 || + type == GL_INT_VEC4) { + const GLint *iValues = ((const GLint *) values) + k * elems; + for (i = 0; i < elems; i++) { + uniformVal[i] = (GLfloat) iValues[i]; + } + } + else { + const GLfloat *fValues = ((const GLfloat *) values) + k * elems; + for (i = 0; i < elems; i++) { + uniformVal[i] = fValues[i]; + } + } + } + } +} + + +/** + * Called via ctx->Driver.Uniform(). + */ +static void +_mesa_uniform(GLcontext *ctx, GLint location, GLsizei count, + const GLvoid *values, GLenum type) +{ + struct gl_shader_program *shProg = ctx->Shader.CurrentProgram; + GLint elems; + + if (!shProg || !shProg->LinkStatus) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(program not linked)"); + return; + } + + if (location == -1) + return; /* The standard specifies this as a no-op */ + + if (location < 0 || location >= (GLint) shProg->Uniforms->NumUniforms) { + _mesa_error(ctx, GL_INVALID_VALUE, "glUniform(location)"); + return; } if (count < 0) { @@ -1214,69 +1190,56 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count, return; } - /* OpenGL requires types to match exactly, except that one can convert - * float or int array to boolean array. + FLUSH_VERTICES(ctx, _NEW_PROGRAM); + + /* A uniform var may be used by both a vertex shader and a fragment + * shader. We may need to update one or both shader's uniform here: */ - switch (uType) - { - case GL_BOOL: - case GL_BOOL_VEC2: - case GL_BOOL_VEC3: - case GL_BOOL_VEC4: - if (elems != sizeof_glsl_type(uType)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(count mismatch)"); - } - break; - case PROGRAM_SAMPLER: - break; - default: - if (shProg->Uniforms->Parameters[location].Type != PROGRAM_SAMPLER - && uType != type) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(type mismatch)"); - } - break; + if (shProg->VertexProgram) { + GLint loc = shProg->Uniforms->Uniforms[location].VertPos; + if (loc >= 0) { + set_program_uniform(ctx, &shProg->VertexProgram->Base, + loc, type, count, elems, values); + } + } + + if (shProg->FragmentProgram) { + GLint loc = shProg->Uniforms->Uniforms[location].FragPos; + if (loc >= 0) { + set_program_uniform(ctx, &shProg->FragmentProgram->Base, + loc, type, count, elems, values); + } } +} + - /* XXX if this is a base type, then count must equal 1. However, we - * don't have enough information from the compiler to distinguish a - * base type from a 1-element array of that type. The standard allows - * count to overrun an array, in which case the overflow is ignored. +static void +set_program_uniform_matrix(GLcontext *ctx, struct gl_program *program, + GLuint location, GLuint rows, GLuint cols, + GLboolean transpose, const GLfloat *values) +{ + /* + * Note: the _columns_ of a matrix are stored in program registers, not + * the rows. */ - maxCount = shProg->Uniforms->Parameters[location].Size / elems; - if (count > maxCount) count = maxCount; - - for (k = 0; k < count; k++) { - GLfloat *uniformVal = shProg->Uniforms->ParameterValues[location + k]; - if (type == GL_INT || - type == GL_INT_VEC2 || - type == GL_INT_VEC3 || - type == GL_INT_VEC4) { - const GLint *iValues = ((const GLint *) values) + k * elems; - for (i = 0; i < elems; i++) { - uniformVal[i] = (GLfloat) iValues[i]; + /* XXXX need to test 3x3 and 2x2 matrices... */ + if (transpose) { + GLuint row, col; + for (col = 0; col < cols; col++) { + GLfloat *v = program->Parameters->ParameterValues[location + col]; + for (row = 0; row < rows; row++) { + v[row] = values[row * cols + col]; } } - else { - const GLfloat *fValues = ((const GLfloat *) values) + k * elems; - for (i = 0; i < elems; i++) { - uniformVal[i] = fValues[i]; + } + else { + GLuint row, col; + for (col = 0; col < cols; col++) { + GLfloat *v = program->Parameters->ParameterValues[location + col]; + for (row = 0; row < rows; row++) { + v[row] = values[col * rows + row]; } } - if (uType == GL_BOOL || - uType == GL_BOOL_VEC2 || - uType == GL_BOOL_VEC3 || - uType == GL_BOOL_VEC4) { - for (i = 0; i < elems; i++) - uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f; - } - } - - if (shProg->Uniforms->Parameters[location].Type == PROGRAM_SAMPLER) { - if (shProg->VertexProgram) - _slang_resolve_samplers(shProg, &shProg->VertexProgram->Base); - if (shProg->FragmentProgram) - _slang_resolve_samplers(shProg, &shProg->FragmentProgram->Base); - FLUSH_VERTICES(ctx, _NEW_TEXTURE); } } @@ -1284,72 +1247,52 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count, /** * Called by ctx->Driver.UniformMatrix(). */ -void +static void _mesa_uniform_matrix(GLcontext *ctx, GLint cols, GLint rows, GLenum matrixType, GLint location, GLsizei count, GLboolean transpose, const GLfloat *values) { - GLsizei maxCount, i; struct gl_shader_program *shProg = ctx->Shader.CurrentProgram; + if (!shProg || !shProg->LinkStatus) { _mesa_error(ctx, GL_INVALID_OPERATION, "glUniformMatrix(program not linked)"); return; } + if (location == -1) return; /* The standard specifies this as a no-op */ - /* The spec says this is GL_INVALID_OPERATION, although it seems like it - * ought to be GL_INVALID_VALUE - */ - if (location < 0 || location >= (GLint) shProg->Uniforms->NumParameters) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glUniformMatrix(location)"); + + if (location < 0 || location >= shProg->Uniforms->NumUniforms) { + _mesa_error(ctx, GL_INVALID_VALUE, "glUniformMatrix(location)"); return; } if (values == NULL) { _mesa_error(ctx, GL_INVALID_VALUE, "glUniformMatrix"); return; } - if (count < 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glUniformMatrix(count < 0)"); - return; - } FLUSH_VERTICES(ctx, _NEW_PROGRAM); - /* - * Note: the _columns_ of a matrix are stored in program registers, not - * the rows. - */ - /* XXXX need to test 3x3 and 2x2 matrices... */ - maxCount = shProg->Uniforms->Parameters[location].Size / (4 * cols); - if (count > maxCount) - count = maxCount; - for (i = 0; i < count; i++) { - if (transpose) { - GLuint row, col; - for (col = 0; col < cols; col++) { - GLfloat *v = shProg->Uniforms->ParameterValues[location + col]; - for (row = 0; row < rows; row++) { - v[row] = values[row * cols + col]; - } - } + if (shProg->VertexProgram) { + GLint loc = shProg->Uniforms->Uniforms[location].VertPos; + if (loc >= 0) { + set_program_uniform_matrix(ctx, &shProg->VertexProgram->Base, + loc, rows, cols, transpose, values); } - else { - GLuint row, col; - for (col = 0; col < cols; col++) { - GLfloat *v = shProg->Uniforms->ParameterValues[location + col]; - for (row = 0; row < rows; row++) { - v[row] = values[col * rows + row]; - } - } + } + + if (shProg->FragmentProgram) { + GLint loc = shProg->Uniforms->Uniforms[location].FragPos; + if (loc >= 0) { + set_program_uniform_matrix(ctx, &shProg->FragmentProgram->Base, + loc, rows, cols, transpose, values); } - location += cols; - values += rows * cols; } } -void +static void _mesa_validate_program(GLcontext *ctx, GLuint program) { struct gl_shader_program *shProg; @@ -1375,3 +1318,40 @@ _mesa_validate_program(GLcontext *ctx, GLuint program) image units allowed. */ } + + +/** + * Plug in Mesa's GLSL functions into the device driver function table. + */ +void +_mesa_init_glsl_driver_functions(struct dd_function_table *driver) +{ + driver->AttachShader = _mesa_attach_shader; + driver->BindAttribLocation = _mesa_bind_attrib_location; + driver->CompileShader = _mesa_compile_shader; + driver->CreateProgram = _mesa_create_program; + driver->CreateShader = _mesa_create_shader; + driver->DeleteProgram2 = _mesa_delete_program2; + driver->DeleteShader = _mesa_delete_shader; + driver->DetachShader = _mesa_detach_shader; + driver->GetActiveAttrib = _mesa_get_active_attrib; + driver->GetActiveUniform = _mesa_get_active_uniform; + driver->GetAttachedShaders = _mesa_get_attached_shaders; + driver->GetAttribLocation = _mesa_get_attrib_location; + driver->GetHandle = _mesa_get_handle; + driver->GetProgramiv = _mesa_get_programiv; + driver->GetProgramInfoLog = _mesa_get_program_info_log; + driver->GetShaderiv = _mesa_get_shaderiv; + driver->GetShaderInfoLog = _mesa_get_shader_info_log; + driver->GetShaderSource = _mesa_get_shader_source; + driver->GetUniformfv = _mesa_get_uniformfv; + driver->GetUniformLocation = _mesa_get_uniform_location; + driver->IsProgram = _mesa_is_program; + driver->IsShader = _mesa_is_shader; + driver->LinkProgram = _mesa_link_program; + driver->ShaderSource = _mesa_shader_source; + driver->Uniform = _mesa_uniform; + driver->UniformMatrix = _mesa_uniform_matrix; + driver->UseProgram = _mesa_use_program; + driver->ValidateProgram = _mesa_validate_program; +} diff --git a/src/mesa/shader/shader_api.h b/src/mesa/shader/shader_api.h index 27e5870d70..5521c585b5 100644 --- a/src/mesa/shader/shader_api.h +++ b/src/mesa/shader/shader_api.h @@ -41,9 +41,10 @@ _mesa_init_shader_state(GLcontext * ctx); extern void _mesa_free_shader_state(GLcontext *ctx); +/* extern struct gl_shader_program * _mesa_new_shader_program(GLcontext *ctx, GLuint name); - +*/ extern void _mesa_clear_shader_program_data(GLcontext *ctx, struct gl_shader_program *shProg); @@ -78,108 +79,12 @@ extern struct gl_shader * _mesa_lookup_shader(GLcontext *ctx, GLuint name); -/** - * API/Driver functions - */ - -extern void -_mesa_attach_shader(GLcontext *ctx, GLuint program, GLuint shader); - -extern void -_mesa_bind_attrib_location(GLcontext *ctx, GLuint program, GLuint index, - const GLchar *name); - -extern void -_mesa_compile_shader(GLcontext *ctx, GLuint shaderObj); - -extern GLuint -_mesa_create_shader(GLcontext *ctx, GLenum type); - -extern GLuint -_mesa_create_program(GLcontext *ctx); - -extern void -_mesa_delete_program2(GLcontext *ctx, GLuint name); - -extern void -_mesa_delete_shader(GLcontext *ctx, GLuint shader); - -extern void -_mesa_detach_shader(GLcontext *ctx, GLuint program, GLuint shader); - -extern void -_mesa_get_active_attrib(GLcontext *ctx, GLuint program, GLuint index, - GLsizei maxLength, GLsizei *length, GLint *size, - GLenum *type, GLchar *name); - -extern void -_mesa_get_active_uniform(GLcontext *ctx, GLuint program, GLuint index, - GLsizei maxLength, GLsizei *length, GLint *size, - GLenum *type, GLchar *name); - -extern void -_mesa_get_attached_shaders(GLcontext *ctx, GLuint program, GLsizei maxCount, - GLsizei *count, GLuint *obj); - -extern GLint -_mesa_get_attrib_location(GLcontext *ctx, GLuint program, - const GLchar *name); - -extern GLuint -_mesa_get_handle(GLcontext *ctx, GLenum pname); - -extern void -_mesa_get_programiv(GLcontext *ctx, GLuint program, - GLenum pname, GLint *params); - -extern void -_mesa_get_program_info_log(GLcontext *ctx, GLuint program, GLsizei bufSize, - GLsizei *length, GLchar *infoLog); - -extern void -_mesa_get_shaderiv(GLcontext *ctx, GLuint shader, GLenum pname, GLint *params); - -extern void -_mesa_get_shader_info_log(GLcontext *ctx, GLuint shader, GLsizei bufSize, - GLsizei *length, GLchar *infoLog); - -extern void -_mesa_get_shader_source(GLcontext *ctx, GLuint shader, GLsizei maxLength, - GLsizei *length, GLchar *sourceOut); - -extern void -_mesa_get_uniformfv(GLcontext *ctx, GLuint program, GLint location, - GLfloat *params); - -extern GLint -_mesa_get_uniform_location(GLcontext *ctx, GLuint program, const GLchar *name); - -extern GLboolean -_mesa_is_program(GLcontext *ctx, GLuint name); - -extern GLboolean -_mesa_is_shader(GLcontext *ctx, GLuint name); - -extern void -_mesa_link_program(GLcontext *ctx, GLuint program); - -extern void -_mesa_shader_source(GLcontext *ctx, GLuint shader, const GLchar *source); - -extern void -_mesa_uniform(GLcontext *ctx, GLint location, GLsizei count, - const GLvoid *values, GLenum type); - -void -_mesa_uniform_matrix(GLcontext *ctx, GLint cols, GLint rows, - GLenum matrixType, GLint location, GLsizei count, - GLboolean transpose, const GLfloat *values); - extern void _mesa_use_program(GLcontext *ctx, GLuint program); + extern void -_mesa_validate_program(GLcontext *ctx, GLuint program); +_mesa_init_glsl_driver_functions(struct dd_function_table *driver); #endif /* SHADER_API_H */ diff --git a/src/mesa/shader/slang/library/Makefile b/src/mesa/shader/slang/library/Makefile index dc67b59088..0e03fac2ee 100644 --- a/src/mesa/shader/slang/library/Makefile +++ b/src/mesa/shader/slang/library/Makefile @@ -17,7 +17,7 @@ LIB_DEP = $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) default: syntax builtin clean: - rm -f syn_to_c gc_to_bin *_syn.h *_gc.h + -rm -f syn_to_c gc_to_bin *_syn.h *_gc.h syntax: slang_pp_directives_syn.h slang_pp_expression_syn.h slang_shader_syn.h slang_pp_version_syn.h diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index ebead3274e..d19d5a0abb 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -1090,7 +1090,7 @@ slang_inline_function_call(slang_assemble_ctx * A, slang_function *fun, slang_operation_copy(inlined, fun->body); /*** XXX review this */ - assert(inlined->type = SLANG_OPER_BLOCK_NO_NEW_SCOPE); + assert(inlined->type == SLANG_OPER_BLOCK_NO_NEW_SCOPE); inlined->type = SLANG_OPER_BLOCK_NEW_SCOPE; #if 0 @@ -2836,14 +2836,13 @@ _slang_codegen_global_variable(slang_assemble_ctx *A, slang_variable *var, const GLint texIndex = sampler_to_texture_index(var->type.specifier.type); if (texIndex != -1) { - /* Texture sampler: + /* This is a texture sampler variable... * store->File = PROGRAM_SAMPLER - * store->Index = sampler uniform location + * store->Index = sampler number (0..7, typically) * store->Size = texture type index (1D, 2D, 3D, cube, etc) */ - GLint samplerUniform - = _mesa_add_sampler(prog->Parameters, varName, datatype); - store = _slang_new_ir_storage(PROGRAM_SAMPLER, samplerUniform, texIndex); + GLint sampNum = _mesa_add_sampler(prog->Parameters, varName, datatype); + store = _slang_new_ir_storage(PROGRAM_SAMPLER, sampNum, texIndex); if (dbg) printf("SAMPLER "); } else if (var->type.qualifier == SLANG_QUAL_UNIFORM) { diff --git a/src/mesa/shader/slang/slang_compile.c b/src/mesa/shader/slang/slang_compile.c index 2be89a5ce0..46b5c54bbe 100644 --- a/src/mesa/shader/slang/slang_compile.c +++ b/src/mesa/shader/slang/slang_compile.c @@ -31,6 +31,8 @@ #include "main/imports.h" #include "main/context.h" #include "shader/program.h" +#include "shader/programopt.h" +#include "shader/prog_print.h" #include "shader/prog_parameter.h" #include "shader/grammar/grammar_mesa.h" #include "slang_codegen.h" @@ -2180,6 +2182,19 @@ _slang_compile(GLcontext *ctx, struct gl_shader *shader) _slang_delete_mempool((slang_mempool *) ctx->Shader.MemPool); ctx->Shader.MemPool = NULL; + if (shader->Type == GL_VERTEX_SHADER) { + /* remove any reads of varying (output) registers */ +#if 0 + printf("Pre-remove output reads:\n"); + _mesa_print_program(shader->Programs[0]); +#endif + _mesa_remove_varying_reads(shader->Programs[0]); +#if 0 + printf("Post-remove output reads:\n"); + _mesa_print_program(shader->Programs[0]); +#endif + } + return success; } diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c index 9c307c6275..ff63e05dd2 100644 --- a/src/mesa/shader/slang/slang_emit.c +++ b/src/mesa/shader/slang/slang_emit.c @@ -79,7 +79,7 @@ new_subroutine(slang_emit_info *emitInfo, GLuint *id) _mesa_realloc(emitInfo->Subroutines, n * sizeof(struct gl_program), (n + 1) * sizeof(struct gl_program)); - emitInfo->Subroutines[n] = _mesa_new_program(ctx, emitInfo->prog->Target, 0); + emitInfo->Subroutines[n] = ctx->Driver.NewProgram(ctx, emitInfo->prog->Target, 0); emitInfo->Subroutines[n]->Parameters = emitInfo->prog->Parameters; emitInfo->NumSubroutines++; *id = n; @@ -922,11 +922,15 @@ emit_tex(slang_emit_info *emitInfo, slang_ir_node *n) assert(n->Children[0]->Store->Size >= TEXTURE_1D_INDEX); assert(n->Children[0]->Store->Size <= TEXTURE_RECT_INDEX); - inst->Sampler = n->Children[0]->Store->Index; /* i.e. uniform's index */ inst->TexSrcTarget = n->Children[0]->Store->Size; +#if 0 inst->TexSrcUnit = 27; /* Dummy value; the TexSrcUnit will be computed at * link time, using the sampler uniform's value. */ + inst->Sampler = n->Children[0]->Store->Index; /* i.e. uniform's index */ +#else + inst->TexSrcUnit = n->Children[0]->Store->Index; /* i.e. uniform's index */ +#endif return inst; } @@ -1793,7 +1797,7 @@ _slang_resolve_subroutines(slang_emit_info *emitInfo) sub->NumInstructions); /* delete subroutine code */ sub->Parameters = NULL; /* prevent double-free */ - _mesa_delete_program(ctx, sub); + _mesa_reference_program(ctx, &emitInfo->Subroutines[i], NULL); } /* free subroutine list */ diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c index c8457fc483..80cd4b6df6 100644 --- a/src/mesa/shader/slang/slang_link.c +++ b/src/mesa/shader/slang/slang_link.c @@ -37,12 +37,17 @@ #include "shader/prog_parameter.h" #include "shader/prog_print.h" #include "shader/prog_statevars.h" +#include "shader/prog_uniform.h" #include "shader/shader_api.h" #include "slang_link.h" - +/** + * Linking varying vars involves rearranging varying vars so that the + * vertex program's output varyings matches the order of the fragment + * program's input varyings. + */ static GLboolean link_varying_vars(struct gl_shader_program *shProg, struct gl_program *prog) { @@ -132,145 +137,65 @@ link_varying_vars(struct gl_shader_program *shProg, struct gl_program *prog) } -static GLboolean -is_uniform(GLuint file) -{ - return (file == PROGRAM_ENV_PARAM || - file == PROGRAM_STATE_VAR || - file == PROGRAM_NAMED_PARAM || - file == PROGRAM_CONSTANT || - file == PROGRAM_SAMPLER || - file == PROGRAM_UNIFORM); -} - - -static GLboolean -link_uniform_vars(struct gl_shader_program *shProg, struct gl_program *prog) +/** + * Build the shProg->Uniforms list. + * This is basically a list/index of all uniforms found in either/both of + * the vertex and fragment shaders. + */ +static void +link_uniform_vars(struct gl_shader_program *shProg, + struct gl_program *prog, + GLuint *numSamplers) { - GLuint *map, i; - -#if 0 - printf("================ pre link uniforms ===============\n"); - _mesa_print_parameter_list(shProg->Uniforms); -#endif - - map = (GLuint *) malloc(prog->Parameters->NumParameters * sizeof(GLuint)); - if (!map) - return GL_FALSE; + GLuint samplerMap[MAX_SAMPLERS]; + GLuint i; - for (i = 0; i < prog->Parameters->NumParameters; /* incr below*/) { - /* see if this uniform is in the linked uniform list */ + for (i = 0; i < prog->Parameters->NumParameters; i++) { const struct gl_program_parameter *p = prog->Parameters->Parameters + i; - const GLfloat *pVals = prog->Parameters->ParameterValues[i]; - GLint j; - GLint size; - - /* sanity check */ - assert(is_uniform(p->Type)); - - if (p->Name) { - j = _mesa_lookup_parameter_index(shProg->Uniforms, -1, p->Name); - } - else { - /*GLuint swizzle;*/ - ASSERT(p->Type == PROGRAM_CONSTANT); - if (_mesa_lookup_parameter_constant(shProg->Uniforms, pVals, - p->Size, &j, NULL)) { - assert(j >= 0); - } - else { - j = -1; - } - } - if (j >= 0) { - /* already in list, check size XXX check this */ -#if 0 - assert(p->Size == shProg->Uniforms->Parameters[j].Size); -#endif - } - else { - /* not already in linked list */ - switch (p->Type) { - case PROGRAM_ENV_PARAM: - j = _mesa_add_named_parameter(shProg->Uniforms, p->Name, pVals); - break; - case PROGRAM_CONSTANT: - j = _mesa_add_named_constant(shProg->Uniforms, p->Name, pVals, p->Size); - break; - case PROGRAM_STATE_VAR: - j = _mesa_add_state_reference(shProg->Uniforms, p->StateIndexes); - break; - case PROGRAM_UNIFORM: - j = _mesa_add_uniform(shProg->Uniforms, p->Name, p->Size, p->DataType); - break; - case PROGRAM_SAMPLER: - j = _mesa_add_sampler(shProg->Uniforms, p->Name, p->DataType); - break; - default: - _mesa_problem(NULL, "bad parameter type in link_uniform_vars()"); - return GL_FALSE; - } + /* + * XXX FIX NEEDED HERE + * We should also be adding a uniform if p->Type == PROGRAM_STATE_VAR. + * For example, modelview matrix, light pos, etc. + * Also, we need to update the state-var name-generator code to + * generate GLSL-style names, like "gl_LightSource[0].position". + * Furthermore, we'll need to fix the state-var's size/datatype info. + */ + + if (p->Type == PROGRAM_UNIFORM || + p->Type == PROGRAM_SAMPLER) { + _mesa_append_uniform(shProg->Uniforms, p->Name, prog->Target, i); } - ASSERT(j >= 0); - - size = p->Size; - while (size > 0) { - map[i] = j; - i++; - j++; - size -= 4; + if (p->Type == PROGRAM_SAMPLER) { + /* Allocate a new sampler index */ + GLuint sampNum = *numSamplers; + GLuint oldSampNum = (GLuint) prog->Parameters->ParameterValues[i][0]; + assert(oldSampNum < MAX_SAMPLERS); + samplerMap[oldSampNum] = sampNum; + (*numSamplers)++; } - } -#if 0 - printf("================ post link uniforms ===============\n"); - _mesa_print_parameter_list(shProg->Uniforms); -#endif -#if 0 - { - GLuint i; - for (i = 0; i < prog->Parameters->NumParameters; i++) { - printf("map[%d] = %d\n", i, map[i]); - } - _mesa_print_parameter_list(shProg->Uniforms); - } -#endif - - /* OK, now scan the program/shader instructions looking for uniform vars, + /* OK, now scan the program/shader instructions looking for sampler vars, * replacing the old index with the new index. */ + prog->SamplersUsed = 0x0; for (i = 0; i < prog->NumInstructions; i++) { struct prog_instruction *inst = prog->Instructions + i; - GLuint j; - - if (is_uniform(inst->DstReg.File)) { - inst->DstReg.Index = map[ inst->DstReg.Index ]; - } - - for (j = 0; j < 3; j++) { - if (is_uniform(inst->SrcReg[j].File)) { - inst->SrcReg[j].Index = map[ inst->SrcReg[j].Index ]; - } - } - - if (inst->Opcode == OPCODE_TEX || - inst->Opcode == OPCODE_TXB || - inst->Opcode == OPCODE_TXP) { + if (_mesa_is_tex_instruction(inst->Opcode)) { /* printf("====== remap sampler from %d to %d\n", inst->Sampler, map[ inst->Sampler ]); */ - inst->Sampler = map[ inst->Sampler ]; + /* here, texUnit is really samplerUnit */ + inst->TexSrcUnit = samplerMap[inst->TexSrcUnit]; + prog->SamplerTargets[inst->TexSrcUnit] = inst->TexSrcTarget; + prog->SamplersUsed |= (1 << inst->TexSrcUnit); } } - free(map); - - return GL_TRUE; } @@ -329,10 +254,8 @@ _slang_resolve_attributes(struct gl_shader_program *shProg, * glVertex/position. */ for (attr = 1; attr < MAX_VERTEX_ATTRIBS; attr++) { - if (((1 << attr) & usedAttributes) == 0) { - usedAttributes |= (1 << attr); + if (((1 << attr) & usedAttributes) == 0) break; - } } if (attr == MAX_VERTEX_ATTRIBS) { /* too many! XXX record error log */ @@ -406,36 +329,6 @@ _slang_remap_attribute(struct gl_program *prog, GLuint oldAttrib, GLuint newAttr -/** - * Scan program for texture instructions, lookup sampler/uniform's value - * to determine which texture unit to use. - * Also, update the program's TexturesUsed[] array. - */ -void -_slang_resolve_samplers(struct gl_shader_program *shProg, - struct gl_program *prog) -{ - GLuint i; - - for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) - prog->TexturesUsed[i] = 0; - - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = prog->Instructions + i; - if (inst->Opcode == OPCODE_TEX || - inst->Opcode == OPCODE_TXB || - inst->Opcode == OPCODE_TXP) { - GLint sampleUnit = (GLint) shProg->Uniforms->ParameterValues[inst->Sampler][0]; - assert(sampleUnit < MAX_TEXTURE_IMAGE_UNITS); - inst->TexSrcUnit = sampleUnit; - - prog->TexturesUsed[inst->TexSrcUnit] |= (1 << inst->TexSrcTarget); - } - } -} - - - /** cast wrapper */ static struct gl_vertex_program * vertex_program(struct gl_program *prog) @@ -476,12 +369,9 @@ link_error(struct gl_shader_program *shProg, const char *msg) * 2. Varying vars in the two shaders are combined so their locations * agree between the vertex and fragment stages. They're treated as * vertex program output attribs and as fragment program input attribs. - * 3. Uniform vars (including state references, constants, etc) from the - * vertex and fragment shaders are merged into one group. Recall that - * GLSL uniforms are shared by all linked shaders. - * 4. The vertex and fragment programs are cloned and modified to update - * src/dst register references so they use the new, linked uniform/ - * varying storage locations. + * 3. The vertex and fragment programs are cloned and modified to update + * src/dst register references so they use the new, linked varying + * storage locations. */ void _slang_link(GLcontext *ctx, @@ -490,11 +380,12 @@ _slang_link(GLcontext *ctx, { const struct gl_vertex_program *vertProg; const struct gl_fragment_program *fragProg; + GLuint numSamplers = 0; GLuint i; _mesa_clear_shader_program_data(ctx, shProg); - shProg->Uniforms = _mesa_new_parameter_list(); + shProg->Uniforms = _mesa_new_uniform_list(); shProg->Varying = _mesa_new_parameter_list(); /** @@ -515,48 +406,35 @@ _slang_link(GLcontext *ctx, * Make copies of the vertex/fragment programs now since we'll be * changing src/dst registers after merging the uniforms and varying vars. */ + _mesa_reference_vertprog(ctx, &shProg->VertexProgram, NULL); if (vertProg) { - shProg->VertexProgram - = vertex_program(_mesa_clone_program(ctx, &vertProg->Base)); - } - else { - shProg->VertexProgram = NULL; + struct gl_vertex_program *linked_vprog = + vertex_program(_mesa_clone_program(ctx, &vertProg->Base)); + shProg->VertexProgram = linked_vprog; /* refcount OK */ + ASSERT(shProg->VertexProgram->Base.RefCount == 1); } + _mesa_reference_fragprog(ctx, &shProg->FragmentProgram, NULL); if (fragProg) { - shProg->FragmentProgram - = fragment_program(_mesa_clone_program(ctx, &fragProg->Base)); - } - else { - shProg->FragmentProgram = NULL; + struct gl_fragment_program *linked_fprog = + fragment_program(_mesa_clone_program(ctx, &fragProg->Base)); + shProg->FragmentProgram = linked_fprog; /* refcount OK */ + ASSERT(shProg->FragmentProgram->Base.RefCount == 1); } + /* link varying vars */ if (shProg->VertexProgram) link_varying_vars(shProg, &shProg->VertexProgram->Base); if (shProg->FragmentProgram) link_varying_vars(shProg, &shProg->FragmentProgram->Base); + /* link uniform vars */ if (shProg->VertexProgram) - link_uniform_vars(shProg, &shProg->VertexProgram->Base); + link_uniform_vars(shProg, &shProg->VertexProgram->Base, &numSamplers); if (shProg->FragmentProgram) - link_uniform_vars(shProg, &shProg->FragmentProgram->Base); - - /* The vertex and fragment programs share a common set of uniforms now */ - if (shProg->VertexProgram) { - _mesa_free_parameter_list(shProg->VertexProgram->Base.Parameters); - shProg->VertexProgram->Base.Parameters = shProg->Uniforms; - } - if (shProg->FragmentProgram) { - _mesa_free_parameter_list(shProg->FragmentProgram->Base.Parameters); - shProg->FragmentProgram->Base.Parameters = shProg->Uniforms; - } + link_uniform_vars(shProg, &shProg->FragmentProgram->Base, &numSamplers); - if (shProg->VertexProgram) { - _slang_resolve_samplers(shProg, &shProg->VertexProgram->Base); - } - if (shProg->FragmentProgram) { - _slang_resolve_samplers(shProg, &shProg->FragmentProgram->Base); - } + /*_mesa_print_uniforms(shProg->Uniforms);*/ if (shProg->VertexProgram) { if (!_slang_resolve_attributes(shProg, &shProg->VertexProgram->Base)) { diff --git a/src/mesa/shader/slang/slang_link.h b/src/mesa/shader/slang/slang_link.h index 606b9e46b1..8ef8a6b4b3 100644 --- a/src/mesa/shader/slang/slang_link.h +++ b/src/mesa/shader/slang/slang_link.h @@ -33,10 +33,6 @@ _slang_link(GLcontext *ctx, GLhandleARB h, struct gl_shader_program *shProg); extern void -_slang_resolve_samplers(struct gl_shader_program *shProg, - struct gl_program *prog); - -extern void _slang_remap_attribute(struct gl_program *prog, GLuint oldAttrib, GLuint newAttrib); diff --git a/src/mesa/sources b/src/mesa/sources index dbfc01d0ed..054f667a25 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -165,6 +165,7 @@ SHADER_SOURCES = \ shader/prog_parameter.c \ shader/prog_print.c \ shader/prog_statevars.c \ + shader/prog_uniform.c \ shader/programopt.c \ shader/shader_api.c \ diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c index 81f5caa270..730798c908 100644 --- a/src/mesa/swrast/s_drawpix.c +++ b/src/mesa/swrast/s_drawpix.c @@ -840,8 +840,10 @@ _swrast_DrawPixels( GLcontext *ctx, _swrast_validate_derived( ctx ); pixels = _mesa_map_drawpix_pbo(ctx, unpack, pixels); - if (!pixels) + if (!pixels) { + RENDER_FINISH(swrast,ctx); return; + } switch (format) { case GL_STENCIL_INDEX: diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index 2dfc033d50..ecace9c502 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -33,18 +33,19 @@ /** - * Fetch a texel. + * Fetch a texel with given lod. + * Called via machine->FetchTexelLod() */ static void -fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda, - GLuint unit, GLfloat color[4] ) +fetch_texel_lod( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda, + GLuint unit, GLfloat color[4] ) { GLchan rgba[4]; SWcontext *swrast = SWRAST_CONTEXT(ctx); const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current; lambda = CLAMP(lambda, texObj->MinLod, texObj->MaxLod); - + /* XXX use a float-valued TextureSample routine here!!! */ swrast->TextureSample[unit](ctx, texObj, 1, (const GLfloat (*)[4]) texcoord, &lambda, &rgba); @@ -58,6 +59,7 @@ fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda, /** * Fetch a texel with the given partial derivatives to compute a level * of detail in the mipmap. + * Called via machine->FetchTexelDeriv() */ static void fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4], @@ -117,6 +119,8 @@ init_machine(GLcontext *ctx, struct gl_program_machine *machine, machine->DerivY = (GLfloat (*)[4]) span->attrStepY; machine->NumDeriv = FRAG_ATTRIB_MAX; + machine->Samplers = program->Base.SamplerUnits; + /* if running a GLSL program (not ARB_fragment_program) */ if (ctx->Shader.CurrentProgram) { /* Store front/back facing value in register FOGC.Y */ @@ -134,7 +138,7 @@ init_machine(GLcontext *ctx, struct gl_program_machine *machine, /* init call stack */ machine->StackDepth = 0; - machine->FetchTexelLod = fetch_texel; + machine->FetchTexelLod = fetch_texel_lod; machine->FetchTexelDeriv = fetch_texel_deriv; } diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index 656a90a99a..4c58f8de87 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -1401,7 +1401,6 @@ _swrast_write_rgba_span( GLcontext *ctx, SWspan *span) * Write to renderbuffers */ { - const struct gl_fragment_program *fp = ctx->FragmentProgram._Current; const GLuint numBuffers = fb->_NumColorDrawBuffers; const GLboolean multiFragOutputs = numBuffers > 1; GLuint buf; diff --git a/src/mesa/swrast/s_texstore.c b/src/mesa/swrast/s_texstore.c index 3f49b40d9c..547d5b9ea0 100644 --- a/src/mesa/swrast/s_texstore.c +++ b/src/mesa/swrast/s_texstore.c @@ -305,7 +305,7 @@ _swrast_copy_teximage1d( GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, texUnit, texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } } @@ -381,7 +381,7 @@ _swrast_copy_teximage2d( GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, texUnit, texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } } @@ -450,7 +450,7 @@ _swrast_copy_texsubimage1d( GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, texUnit, texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } } @@ -526,7 +526,7 @@ _swrast_copy_texsubimage2d( GLcontext *ctx, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, texUnit, texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } } @@ -599,6 +599,6 @@ _swrast_copy_texsubimage3d( GLcontext *ctx, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, texUnit, texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } } diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h index baf283ef0f..1ac508f033 100644 --- a/src/mesa/tnl/t_context.h +++ b/src/mesa/tnl/t_context.h @@ -388,7 +388,7 @@ struct tnl_clipspace struct tnl_cache_item { GLuint hash; void *key; - void *data; + struct gl_vertex_program *prog; struct tnl_cache_item *next; }; diff --git a/src/mesa/tnl/t_vp_build.c b/src/mesa/tnl/t_vp_build.c index a7fd815a26..d79f84f1eb 100644 --- a/src/mesa/tnl/t_vp_build.c +++ b/src/mesa/tnl/t_vp_build.c @@ -1464,21 +1464,22 @@ create_new_program( const struct state_key *key, build_tnl_program( &p ); } -static void *search_cache( struct tnl_cache *cache, - GLuint hash, - const void *key, - GLuint keysize) + +static struct gl_vertex_program * +search_cache(struct tnl_cache *cache, GLuint hash, + const void *key, GLuint keysize) { struct tnl_cache_item *c; for (c = cache->items[hash % cache->size]; c; c = c->next) { if (c->hash == hash && _mesa_memcmp(c->key, key, keysize) == 0) - return c->data; + return c->prog; } return NULL; } + static void rehash( struct tnl_cache *cache ) { struct tnl_cache_item **items; @@ -1501,15 +1502,17 @@ static void rehash( struct tnl_cache *cache ) cache->size = size; } -static void cache_item( struct tnl_cache *cache, +static void cache_item( GLcontext *ctx, + struct tnl_cache *cache, GLuint hash, void *key, - void *data ) + struct gl_vertex_program *prog ) { - struct tnl_cache_item *c = (struct tnl_cache_item*) _mesa_malloc(sizeof(*c)); + struct tnl_cache_item *c = CALLOC_STRUCT(tnl_cache_item); c->hash = hash; c->key = key; - c->data = data; + + c->prog = prog; if (++cache->n_items > cache->size * 1.5) rehash(cache); @@ -1540,6 +1543,8 @@ void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx ) if (!ctx->VertexProgram._Current || ctx->VertexProgram._Current == ctx->VertexProgram._TnlProgram) { + struct gl_vertex_program *newProg; + /* Grab all the relevent state and put it in a single structure: */ key = make_state_key(ctx); @@ -1547,33 +1552,33 @@ void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx ) /* Look for an already-prepared program for this state: */ - ctx->VertexProgram._TnlProgram = (struct gl_vertex_program *) - search_cache( tnl->vp_cache, hash, key, sizeof(*key) ); + newProg = search_cache( tnl->vp_cache, hash, key, sizeof(*key)); /* OK, we'll have to build a new one: */ - if (!ctx->VertexProgram._TnlProgram) { + if (!newProg) { + if (0) _mesa_printf("Build new TNL program\n"); - ctx->VertexProgram._TnlProgram = (struct gl_vertex_program *) + newProg = (struct gl_vertex_program *) ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); - create_new_program( key, ctx->VertexProgram._TnlProgram, - ctx->Const.VertexProgram.MaxTemps ); + create_new_program( key, newProg, ctx->Const.VertexProgram.MaxTemps ); if (ctx->Driver.ProgramStringNotify) ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, - &ctx->VertexProgram._TnlProgram->Base ); + &newProg->Base ); - cache_item(tnl->vp_cache, hash, key, ctx->VertexProgram._TnlProgram ); + /* Our ownership of newProg is transferred to the cache */ + cache_item(ctx, tnl->vp_cache, hash, key, newProg); } else { FREE(key); - if (0) - _mesa_printf("Found existing TNL program for key %x\n", hash); } - ctx->VertexProgram._Current = ctx->VertexProgram._TnlProgram; + + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, newProg); + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, newProg); } /* Tell the driver about the change. Could define a new target for @@ -1606,7 +1611,7 @@ void _tnl_ProgramCacheDestroy( GLcontext *ctx ) for (c = tnl->vp_cache->items[i]; c; c = next) { next = c->next; FREE(c->key); - FREE(c->data); + _mesa_reference_vertprog(ctx, &c->prog, NULL); FREE(c); } diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 0ef26cdfe3..68ce7ba837 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -187,7 +187,7 @@ static void vbo_exec_bind_arrays( GLcontext *ctx ) arrays[attr].BufferObj = exec->vtx.bufferobj; /* NullBufferObj */ arrays[attr]._MaxElement = count; /* ??? */ - data += exec->vtx.attrsz[attr] * sizeof(GLfloat); + data += exec->vtx.attrsz[src] * sizeof(GLfloat); } } } diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index ee6df22605..bf5c6d4eef 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -146,7 +146,7 @@ static void vbo_bind_vertex_list( GLcontext *ctx, assert(arrays[attr].BufferObj->Name); - data += node->attrsz[attr] * sizeof(GLfloat); + data += node->attrsz[src] * sizeof(GLfloat); } } } diff --git a/src/mesa/x86-64/Makefile b/src/mesa/x86-64/Makefile index 252218ca86..c6b69bafe8 100644 --- a/src/mesa/x86-64/Makefile +++ b/src/mesa/x86-64/Makefile @@ -19,7 +19,7 @@ INCLUDE_DIRS = \ default: matypes.h clean: - rm -f matypes.h + -rm -f matypes.h # need some special rules here, unfortunately diff --git a/src/mesa/x86/Makefile b/src/mesa/x86/Makefile index 3c6a6b11c0..dc8c7f355e 100644 --- a/src/mesa/x86/Makefile +++ b/src/mesa/x86/Makefile @@ -17,7 +17,7 @@ INCLUDE_DIRS = \ default: gen_matypes matypes.h clean: - rm -f matypes.h gen_matypes + -rm -f matypes.h gen_matypes gen_matypes: gen_matypes.c diff --git a/src/mesa/x86/read_rgba_span_x86.S b/src/mesa/x86/read_rgba_span_x86.S index 2e5c3be83f..80144b889c 100644 --- a/src/mesa/x86/read_rgba_span_x86.S +++ b/src/mesa/x86/read_rgba_span_x86.S @@ -434,7 +434,8 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: je .L47 movq (%ebx), %xmm0 - + addl $8, %ebx + movdqa %xmm0, %xmm3 movdqa %xmm0, %xmm4 andps %xmm1, %xmm0 @@ -448,6 +449,7 @@ _generic_read_RGBA_span_BGRA8888_REV_SSE2: orps %xmm3, %xmm0 movq %xmm0, (%ecx) + addl $8, %ecx .L47: testl $1, %edx |