From 5df82c82bd53db90eb72c5aad4dd20cf6f1116b1 Mon Sep 17 00:00:00 2001
From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Fri, 22 Aug 2003 20:11:43 +0000
Subject: patch to import Jon Smirl's work from Bitkeeper

---
 src/mesa/drivers/dri/radeon/Makefile.X11           |  154 ++
 src/mesa/drivers/dri/radeon/radeon_compat.c        |  304 +++
 src/mesa/drivers/dri/radeon/radeon_context.c       |  598 ++++++
 src/mesa/drivers/dri/radeon/radeon_context.h       |  839 ++++++++
 src/mesa/drivers/dri/radeon/radeon_ioctl.c         | 1191 +++++++++++
 src/mesa/drivers/dri/radeon/radeon_ioctl.h         |  188 ++
 src/mesa/drivers/dri/radeon/radeon_lighting.c      |  682 ++++++
 src/mesa/drivers/dri/radeon/radeon_lock.c          |  128 ++
 src/mesa/drivers/dri/radeon/radeon_lock.h          |  113 +
 src/mesa/drivers/dri/radeon/radeon_maos.c          |   12 +
 src/mesa/drivers/dri/radeon/radeon_maos.h          |   47 +
 src/mesa/drivers/dri/radeon/radeon_maos_arrays.c   |  591 ++++++
 src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h    |  368 ++++
 src/mesa/drivers/dri/radeon/radeon_maos_verts.c    |  335 +++
 src/mesa/drivers/dri/radeon/radeon_sanity.c        | 1043 +++++++++
 src/mesa/drivers/dri/radeon/radeon_sanity.h        |    8 +
 src/mesa/drivers/dri/radeon/radeon_screen.c        |  421 ++++
 src/mesa/drivers/dri/radeon/radeon_screen.h        |  101 +
 src/mesa/drivers/dri/radeon/radeon_span.c          |  415 ++++
 src/mesa/drivers/dri/radeon/radeon_span.h          |   45 +
 src/mesa/drivers/dri/radeon/radeon_state.c         | 2211 ++++++++++++++++++++
 src/mesa/drivers/dri/radeon/radeon_state.h         |   77 +
 src/mesa/drivers/dri/radeon/radeon_state_init.c    |  531 +++++
 src/mesa/drivers/dri/radeon/radeon_subset.h        |   75 +
 src/mesa/drivers/dri/radeon/radeon_subset_bitmap.c |  197 ++
 .../drivers/dri/radeon/radeon_subset_readpix.c     |  246 +++
 src/mesa/drivers/dri/radeon/radeon_subset_select.c |  998 +++++++++
 src/mesa/drivers/dri/radeon/radeon_subset_tex.c    | 1018 +++++++++
 src/mesa/drivers/dri/radeon/radeon_subset_vtx.c    |  989 +++++++++
 src/mesa/drivers/dri/radeon/radeon_swtcl.c         | 1332 ++++++++++++
 src/mesa/drivers/dri/radeon/radeon_swtcl.h         |   77 +
 src/mesa/drivers/dri/radeon/radeon_tcl.c           |  527 +++++
 src/mesa/drivers/dri/radeon/radeon_tcl.h           |   70 +
 src/mesa/drivers/dri/radeon/radeon_tex.c           |  733 +++++++
 src/mesa/drivers/dri/radeon/radeon_tex.h           |   53 +
 src/mesa/drivers/dri/radeon/radeon_texmem.c        |  378 ++++
 src/mesa/drivers/dri/radeon/radeon_texstate.c      | 1628 ++++++++++++++
 src/mesa/drivers/dri/radeon/radeon_vtxfmt.c        | 1089 ++++++++++
 src/mesa/drivers/dri/radeon/radeon_vtxfmt.h        |  124 ++
 src/mesa/drivers/dri/radeon/radeon_vtxfmt_c.c      |  905 ++++++++
 src/mesa/drivers/dri/radeon/radeon_vtxfmt_sse.c    |  232 ++
 src/mesa/drivers/dri/radeon/radeon_vtxfmt_x86.c    |  437 ++++
 src/mesa/drivers/dri/radeon/server/radeon_common.h |  397 ++--
 src/mesa/drivers/dri/radeon/server/radeon_dri.c    |    6 +-
 src/mesa/drivers/dri/radeon/server/radeon_reg.h    |  132 +-
 src/mesa/drivers/dri/radeon/server/radeon_sarea.h  |    4 +-
 46 files changed, 21784 insertions(+), 265 deletions(-)
 create mode 100644 src/mesa/drivers/dri/radeon/Makefile.X11
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_compat.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_context.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_context.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_ioctl.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_ioctl.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_lighting.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_lock.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_lock.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_maos.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_maos.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_maos_verts.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_sanity.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_sanity.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_screen.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_screen.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_span.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_span.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_state.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_state.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_state_init.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_subset.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_subset_bitmap.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_subset_readpix.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_subset_select.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_subset_tex.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_subset_vtx.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_swtcl.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_swtcl.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_tcl.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_tcl.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_tex.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_tex.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_texmem.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_texstate.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_vtxfmt.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_vtxfmt.h
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_vtxfmt_c.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_vtxfmt_sse.c
 create mode 100644 src/mesa/drivers/dri/radeon/radeon_vtxfmt_x86.c

(limited to 'src/mesa/drivers/dri/radeon')

diff --git a/src/mesa/drivers/dri/radeon/Makefile.X11 b/src/mesa/drivers/dri/radeon/Makefile.X11
new file mode 100644
index 0000000000..b73abe8f51
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/Makefile.X11
@@ -0,0 +1,154 @@
+# $Id: Makefile.X11,v 1.1 2003/08/22 20:11:45 brianp Exp $
+
+# Mesa 3-D graphics library
+# Version:  5.0
+# Copyright (C) 1995-2002  Brian Paul
+
+TOP = ../../../../..
+
+default: linux-solo
+
+SHARED_INCLUDES = $(INCLUDE_DIRS) -I. -I../common -Iserver
+MINIGLX_INCLUDES = -I$(TOP)/src/glx/mini
+
+ifeq ($(EMBEDDED),true)
+TARGET = radeon_es_dri.so
+DEFINES += \
+              -D_EMBEDDED \
+	-D_HAVE_SWRAST=0 \
+	-D_HAVE_SWTNL=0 \
+	-D_HAVE_SANITY=0 \
+	-D_HAVE_CODEGEN=0 \
+	-D_HAVE_LIGHTING=0 \
+	-D_HAVE_TEXGEN=0 \
+	-D_HAVE_USERCLIP=0 \
+	-DGLX_DIRECT_RENDERING
+else
+TARGET = radeon_dri.so
+DEFINES += \
+	-D_HAVE_SWRAST=1 \
+	-D_HAVE_SWTNL=1 \
+	-D_HAVE_SANITY=1 \
+	-D_HAVE_CODEGEN=1 \
+	-D_HAVE_LIGHTING=1 \
+	-D_HAVE_TEXGEN=1 \
+	-D_HAVE_USERCLIP=1 \
+	-DGLX_DIRECT_RENDERING
+endif
+
+MESA_MODULES = $(TOP)/src/mesa/mesa.a
+
+MINIGLX_SOURCES = server/radeon_dri.c 
+
+DRIVER_SOURCES = radeon_context.c \
+		 radeon_ioctl.c \
+		 radeon_lock.c \
+		 radeon_screen.c \
+		 radeon_state.c \
+		 radeon_state_init.c \
+		 ../common/mm.c \
+		 ../common/utils.c \
+		 ../common/texmem.c \
+		 ../common/vblank.c
+
+SUBSET_DRIVER_SOURCES = \
+		radeon_subset_bitmap.c \
+		radeon_subset_readpix.c \
+		radeon_subset_select.c \
+		radeon_subset_tex.c \
+		radeon_subset_vtx.c 
+
+FULL_DRIVER_SOURCES = 	\
+		 radeon_tex.c \
+		 radeon_texmem.c \
+		 radeon_texstate.c \
+		 radeon_tcl.c \
+		 radeon_swtcl.c \
+		 radeon_span.c \
+		 radeon_maos.c \
+		 radeon_sanity.c \
+	  	 radeon_compat.c \
+		 radeon_vtxfmt.c \
+		 radeon_vtxfmt_c.c \
+		 radeon_vtxfmt_sse.c \
+		 radeon_vtxfmt_x86.c 
+
+
+INCLUDES = $(MINIGLX_INCLUDES) \
+	   $(SHARED_INCLUDES)
+
+
+ifeq ($(EMBEDDED),true)
+C_SOURCES = $(DRIVER_SOURCES) \
+	    $(SUBSET_DRIVER_SOURCES) \
+	    $(MINIGLX_SOURCES) 
+else
+C_SOURCES = $(DRIVER_SOURCES) \
+	    $(FULL_DRIVER_SOURCES) \
+	    $(MINIGLX_SOURCES) 
+endif
+
+
+ifeq ($(WINDOW_SYSTEM),dri)
+WINOBJ=$(MESABUILDDIR)/dri/dri.a
+WINLIB=
+else
+WINOBJ=
+WINLIB=-L$(MESA)/src/glx/mini
+endif
+
+ASM_SOURCES = 
+OBJECTS = $(C_SOURCES:.c=.o) \
+	  $(ASM_SOURCES:.S=.o) 
+
+### Include directories
+
+INCLUDE_DIRS = \
+	-I$(TOP)/include \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/mesa/main \
+	-I$(TOP)/src/mesa/glapi \
+	-I$(TOP)/src/mesa/math \
+	-I$(TOP)/src/mesa/transform \
+	-I$(TOP)/src/mesa/swrast \
+	-I$(TOP)/src/mesa/swrast_setup
+
+
+##### RULES #####
+
+.c.o:
+	$(CC) -c $(SHARED_INCLUDES) $(MINIGLX_INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+.S.o:
+	$(CC) -c $(SHARED_INCLUDES) $(MINIGLX_INCLUDES) $(CFLAGS) $(DEFINES)  $< -o $@
+
+
+##### TARGETS #####
+
+targets: depend $(TARGET)
+
+$(TARGET):  $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile.X11
+	rm -f $@ && gcc -o $@ -shared $(OBJECTS) $(MESA_MODULES) $(WINOBJ) $(WINLIB) -lc -lm
+	rm -f $(TOP)/lib/$(TARGET) && \
+	install $(TARGET) $(TOP)/lib/$(TARGET)
+
+# Run 'make -f Makefile.X11 dep' to update the dependencies if you change
+# what's included by any source file.
+depend: $(C_SOURCES) $(ASM_SOURCES)
+	makedepend -fdepend -Y $(SHARED_INCLUDES) $(MINIGLX_INCLUDES)\
+		$(C_SOURCES) $(ASM_SOURCES)
+
+
+# Emacs tags
+tags:
+	etags `find . -name \*.[ch]` `find ../include`
+
+
+# Remove .o and backup files
+clean:
+	-rm -f *.o *~ *.o *~ *.so server/*.o
+
+
+include $(TOP)/Make-config
+
+include depend
diff --git a/src/mesa/drivers/dri/radeon/radeon_compat.c b/src/mesa/drivers/dri/radeon/radeon_compat.c
new file mode 100644
index 0000000000..0c32641530
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_compat.c
@@ -0,0 +1,304 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+               Tungsten Graphics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "glheader.h"
+#include "imports.h"
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+
+
+static struct { 
+	int start; 
+	int len; 
+	const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+	{ RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+	{ RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+	{ RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+	{ RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+	{ RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+	{ RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+	{ RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+	{ RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+	{ RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+	{ RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+	{ RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+	{ RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+	{ RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+	{ RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+	{ RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+	{ RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+	{ RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+	{ RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+};
+
+
+static void radeonCompatEmitPacket( radeonContextPtr rmesa, 
+				    struct radeon_state_atom *state )
+{
+   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+   radeon_context_regs_t *ctx = &sarea->ContextState;
+   radeon_texture_regs_t *tex0 = &sarea->TexState[0];
+   radeon_texture_regs_t *tex1 = &sarea->TexState[1];
+   int i;
+   int *buf = state->cmd;
+
+   for ( i = 0 ; i < state->cmd_size ; ) {
+      drmRadeonCmdHeader *header = (drmRadeonCmdHeader *)&buf[i++];
+
+      if (RADEON_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id,
+		 packet[(int)header->packet.packet_id].name);
+
+      switch (header->packet.packet_id) {
+      case RADEON_EMIT_PP_MISC:
+	 ctx->pp_misc = buf[i++]; 
+	 ctx->pp_fog_color = buf[i++];
+	 ctx->re_solid_color = buf[i++];
+	 ctx->rb3d_blendcntl = buf[i++];
+	 ctx->rb3d_depthoffset = buf[i++];
+	 ctx->rb3d_depthpitch = buf[i++];
+	 ctx->rb3d_zstencilcntl = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+	 break;
+      case RADEON_EMIT_PP_CNTL:
+	 ctx->pp_cntl = buf[i++];
+	 ctx->rb3d_cntl = buf[i++];
+	 ctx->rb3d_coloroffset = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+	 break;
+      case RADEON_EMIT_RB3D_COLORPITCH:
+	 ctx->rb3d_colorpitch = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+	 break;
+      case RADEON_EMIT_RE_LINE_PATTERN:
+	 ctx->re_line_pattern = buf[i++];
+	 ctx->re_line_state = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_LINE;
+	 break;
+      case RADEON_EMIT_SE_LINE_WIDTH:
+	 ctx->se_line_width = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_LINE;
+	 break;
+      case RADEON_EMIT_PP_LUM_MATRIX:
+	 ctx->pp_lum_matrix = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
+	 break;
+      case RADEON_EMIT_PP_ROT_MATRIX_0:
+	 ctx->pp_rot_matrix_0 = buf[i++];
+	 ctx->pp_rot_matrix_1 = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
+	 break;
+      case RADEON_EMIT_RB3D_STENCILREFMASK:
+	 ctx->rb3d_stencilrefmask = buf[i++];
+	 ctx->rb3d_ropcntl = buf[i++];
+	 ctx->rb3d_planemask = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_MASKS;
+	 break;
+      case RADEON_EMIT_SE_VPORT_XSCALE:
+	 ctx->se_vport_xscale = buf[i++];
+	 ctx->se_vport_xoffset = buf[i++];
+	 ctx->se_vport_yscale = buf[i++];
+	 ctx->se_vport_yoffset = buf[i++];
+	 ctx->se_vport_zscale = buf[i++];
+	 ctx->se_vport_zoffset = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_VIEWPORT;
+	 break;
+      case RADEON_EMIT_SE_CNTL:
+	 ctx->se_cntl = buf[i++];
+	 ctx->se_coord_fmt = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT;
+	 break;
+      case RADEON_EMIT_SE_CNTL_STATUS:
+	 ctx->se_cntl_status = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_SETUP;
+	 break;
+      case RADEON_EMIT_RE_MISC:
+	 ctx->re_misc = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_MISC;
+	 break;
+      case RADEON_EMIT_PP_TXFILTER_0:
+	 tex0->pp_txfilter = buf[i++];
+	 tex0->pp_txformat = buf[i++];
+	 tex0->pp_txoffset = buf[i++];
+	 tex0->pp_txcblend = buf[i++];
+	 tex0->pp_txablend = buf[i++];
+	 tex0->pp_tfactor = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX0;
+	 break;
+      case RADEON_EMIT_PP_BORDER_COLOR_0:
+	 tex0->pp_border_color = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX0;
+	 break;
+      case RADEON_EMIT_PP_TXFILTER_1:
+	 tex1->pp_txfilter = buf[i++];
+	 tex1->pp_txformat = buf[i++];
+	 tex1->pp_txoffset = buf[i++];
+	 tex1->pp_txcblend = buf[i++];
+	 tex1->pp_txablend = buf[i++];
+	 tex1->pp_tfactor = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX1;
+	 break;
+      case RADEON_EMIT_PP_BORDER_COLOR_1:
+	 tex1->pp_border_color = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX1;
+	 break;
+
+      case RADEON_EMIT_SE_ZBIAS_FACTOR:
+	 i++;
+	 i++;
+	 break;
+
+      case RADEON_EMIT_PP_TXFILTER_2:
+      case RADEON_EMIT_PP_BORDER_COLOR_2:
+      case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
+      case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
+      default:
+	 /* These states aren't understood by radeon drm 1.1 */
+	 fprintf(stderr, "Tried to emit unsupported state\n");
+	 return;
+      }
+   }
+}
+
+
+
+static void radeonCompatEmitStateLocked( radeonContextPtr rmesa )
+{
+   struct radeon_state_atom *state, *tmp;
+
+   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->lost_context) {
+      if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
+	 fprintf(stderr, "%s - lost context\n", __FUNCTION__); 
+
+      foreach_s( state, tmp, &(rmesa->hw.clean) ) 
+	 move_to_tail(&(rmesa->hw.dirty), state );
+
+      rmesa->lost_context = 0;
+   }
+
+   foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
+      if (!state->is_tcl)
+	 radeonCompatEmitPacket( rmesa, state );
+      move_to_head( &(rmesa->hw.clean), state );
+   }
+}
+
+
+
+static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa,
+					     GLuint hw_primitive,
+					     GLuint nverts,
+					     XF86DRIClipRectPtr pbox,
+					     GLuint nbox )
+{
+   int i;
+
+   for ( i = 0 ; i < nbox ; ) {
+      int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox );
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      drmRadeonVertex vtx;
+      
+      rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS;
+      rmesa->sarea->nbox = nr - i;
+
+      for ( ; i < nr ; i++) 
+	 *b++ = pbox[i];
+      
+      if (RADEON_DEBUG & DEBUG_IOCTL)
+	 fprintf(stderr, 
+		 "RadeonFlushVertexBuffer: prim %x buf %d verts %d "
+		 "disc %d nbox %d\n",
+		 hw_primitive, 
+		 rmesa->dma.current.buf->buf->idx, 
+		 nverts, 
+		 nr == nbox,
+		 rmesa->sarea->nbox );
+
+      vtx.prim = hw_primitive;
+      vtx.idx = rmesa->dma.current.buf->buf->idx;
+      vtx.count = nverts;
+      vtx.discard = (nr == nbox);      
+
+      drmCommandWrite( rmesa->dri.fd, 
+		       DRM_RADEON_VERTEX,
+		       &vtx, sizeof(vtx));
+   }
+}
+
+
+
+/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer!  
+ */
+void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
+				GLuint vertex_format,
+				GLuint hw_primitive,
+				GLuint nrverts )
+{
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   LOCK_HARDWARE( rmesa );
+
+   radeonCompatEmitStateLocked( rmesa );
+   rmesa->sarea->vc_format = vertex_format;
+   
+   if (rmesa->state.scissor.enabled) {
+      radeonCompatEmitPrimitiveLocked( rmesa, 
+				       hw_primitive,
+				       nrverts,
+				       rmesa->state.scissor.pClipRects,
+				       rmesa->state.scissor.numClipRects );
+   }
+   else {
+      radeonCompatEmitPrimitiveLocked( rmesa, 
+				       hw_primitive,
+				       nrverts,
+				       rmesa->pClipRects,
+				       rmesa->numClipRects );
+   }
+
+
+   UNLOCK_HARDWARE( rmesa );
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
new file mode 100644
index 0000000000..835cecbc3a
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -0,0 +1,598 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_context.c,v 1.7 2003/02/08 21:26:45 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "api_arrayelt.h"
+#include "context.h"
+#include "simple_list.h"
+#include "imports.h"
+#include "matrix.h"
+#include "extensions.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "array_cache/acache.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_span.h"
+#include "radeon_tex.h"
+#include "radeon_swtcl.h"
+#include "radeon_tcl.h"
+#include "radeon_vtxfmt.h"
+#include "radeon_maos.h"
+
+#define DRIVER_DATE	"20030328"
+
+#include "vblank.h"
+#include "utils.h"
+#ifndef RADEON_DEBUG
+int RADEON_DEBUG = (0);
+#endif
+
+
+
+/* Return the width and height of the given buffer.
+ */
+static void radeonGetBufferSize( GLframebuffer *buffer,
+				 GLuint *width, GLuint *height )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   LOCK_HARDWARE( rmesa );
+   *width  = rmesa->dri.drawable->w;
+   *height = rmesa->dri.drawable->h;
+   UNLOCK_HARDWARE( rmesa );
+}
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   static char buffer[128];
+   unsigned   offset;
+   GLuint agp_mode = rmesa->radeonScreen->IsPCI ? 0 :
+      rmesa->radeonScreen->AGPMode;
+
+   switch ( name ) {
+   case GL_VENDOR:
+      return (GLubyte *)"Tungsten Graphics, Inc.";
+
+   case GL_RENDERER:
+      offset = driGetRendererString( buffer, "Radeon", DRIVER_DATE,
+				     agp_mode );
+
+      sprintf( & buffer[ offset ], "%s %sTCL",
+	       ( rmesa->dri.drmMinor < 3 ) ? " DRM-COMPAT" : "",
+	       !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+	       ? "" : "NO-" );
+
+      return (GLubyte *)buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+
+/* Extension strings exported by the R100 driver.
+ */
+static const char * const card_extensions[] =
+{
+    "GL_ARB_multisample",
+    "GL_ARB_multitexture",
+    "GL_ARB_texture_border_clamp",
+    "GL_ARB_texture_compression",
+    "GL_ARB_texture_env_add",
+    "GL_ARB_texture_env_combine",
+    "GL_ARB_texture_env_dot3",
+    "GL_ARB_texture_mirrored_repeat",
+    "GL_EXT_blend_logic_op",
+    "GL_EXT_blend_subtract",
+    "GL_EXT_secondary_color",
+    "GL_EXT_texture_edge_clamp",
+    "GL_EXT_texture_env_add",
+    "GL_EXT_texture_env_combine",
+    "GL_EXT_texture_env_dot3",
+    "GL_EXT_texture_filter_anisotropic",
+    "GL_EXT_texture_lod_bias",
+    "GL_ATI_texture_env_combine3",
+    "GL_ATI_texture_mirror_once",
+    "GL_IBM_texture_mirrored_repeat",
+    "GL_MESA_ycbcr_texture",
+    "GL_NV_blend_square",
+    "GL_SGIS_generate_mipmap",
+    "GL_SGIS_texture_border_clamp",
+    "GL_SGIS_texture_edge_clamp",
+    NULL
+};
+
+extern const struct gl_pipeline_stage _radeon_texrect_stage;
+extern const struct gl_pipeline_stage _radeon_render_stage;
+extern const struct gl_pipeline_stage _radeon_tcl_stage;
+
+static const struct gl_pipeline_stage *radeon_pipeline[] = {
+
+   /* Try and go straight to t&l
+    */
+   &_radeon_tcl_stage,  
+
+   /* Catch any t&l fallbacks
+    */
+   &_tnl_vertex_transform_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+
+   /* Scale texture rectangle to 0..1.
+    */
+   &_radeon_texrect_stage,
+
+   &_radeon_render_stage,
+   &_tnl_render_stage,		/* FALLBACK:  */
+   0,
+};
+
+
+
+/* Initialize the driver's misc functions.
+ */
+static void radeonInitDriverFuncs( GLcontext *ctx )
+{
+    ctx->Driver.GetBufferSize		= radeonGetBufferSize;
+    ctx->Driver.ResizeBuffers           = _swrast_alloc_buffers;
+    ctx->Driver.GetString		= radeonGetString;
+
+    ctx->Driver.Error			= NULL;
+    ctx->Driver.DrawPixels		= NULL;
+    ctx->Driver.Bitmap			= NULL;
+}
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "fall",  DEBUG_FALLBACKS },
+    { "tex",   DEBUG_TEXTURE },
+    { "ioctl", DEBUG_IOCTL },
+    { "prim",  DEBUG_PRIMS },
+    { "vert",  DEBUG_VERTS },
+    { "state", DEBUG_STATE },
+    { "code",  DEBUG_CODEGEN },
+    { "vfmt",  DEBUG_VFMT },
+    { "vtxf",  DEBUG_VFMT },
+    { "verb",  DEBUG_VERBOSE },
+    { "dri",   DEBUG_DRI },
+    { "dma",   DEBUG_DMA },
+    { "san",   DEBUG_SANITY },
+    { NULL,    0 }
+};
+
+
+static int
+get_ust_nop( uint64_t * ust )
+{
+   *ust = 1;
+   return 0;
+}
+
+
+/* Create the device specific context.
+ */
+GLboolean
+radeonCreateContext( const __GLcontextModes *glVisual,
+                     __DRIcontextPrivate *driContextPriv,
+                     void *sharedContextPrivate)
+{
+   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+   radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
+   radeonContextPtr rmesa;
+   GLcontext *ctx, *shareCtx;
+   int i;
+
+   assert(glVisual);
+   assert(driContextPriv);
+   assert(screen);
+
+   /* Allocate the Radeon context */
+   rmesa = (radeonContextPtr) CALLOC( sizeof(*rmesa) );
+   if ( !rmesa )
+      return GL_FALSE;
+
+   /* Allocate the Mesa context */
+   if (sharedContextPrivate)
+      shareCtx = ((radeonContextPtr) sharedContextPrivate)->glCtx;
+   else
+      shareCtx = NULL;
+   rmesa->glCtx = _mesa_create_context(glVisual, shareCtx, (void *) rmesa, GL_TRUE);
+   if (!rmesa->glCtx) {
+      FREE(rmesa);
+      return GL_FALSE;
+   }
+   driContextPriv->driverPrivate = rmesa;
+
+   /* Init radeon context data */
+   rmesa->dri.context = driContextPriv;
+   rmesa->dri.screen = sPriv;
+   rmesa->dri.drawable = NULL; /* Set by XMesaMakeCurrent */
+   rmesa->dri.hwContext = driContextPriv->hHWContext;
+   rmesa->dri.hwLock = &sPriv->pSAREA->lock;
+   rmesa->dri.fd = sPriv->fd;
+
+   /* If we don't have 1.3, fallback to the 1.1 interfaces.
+    */
+   if (getenv("RADEON_COMPAT") || sPriv->drmMinor < 3 ) 
+      rmesa->dri.drmMinor = 1;
+   else
+      rmesa->dri.drmMinor = sPriv->drmMinor;
+
+   rmesa->radeonScreen = screen;
+   rmesa->sarea = (RADEONSAREAPrivPtr)((GLubyte *)sPriv->pSAREA +
+				       screen->sarea_priv_offset);
+
+
+   rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address;
+
+   (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
+   make_empty_list( & rmesa->swapped );
+
+   rmesa->nr_heaps = screen->numTexHeaps;
+   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+      rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
+	    screen->texSize[i],
+	    12,
+	    RADEON_NR_TEX_REGIONS,
+	    rmesa->sarea->texList[i],
+	    & rmesa->sarea->texAge[i],
+	    & rmesa->swapped,
+	    sizeof( radeonTexObj ),
+	    (destroy_texture_object_t *) radeonDestroyTexObj );
+
+      driSetTextureSwapCounterLocation( rmesa->texture_heaps[i],
+					& rmesa->c_textureSwaps );
+   }
+
+   rmesa->swtcl.RenderIndex = ~0;
+   rmesa->lost_context = 1;
+
+   /* Set the maximum texture size small enough that we can guarentee that
+    * all texture units can bind a maximal texture and have them both in
+    * texturable memory at once.
+    */
+
+   ctx = rmesa->glCtx;
+   ctx->Const.MaxTextureUnits = 2;
+
+   driCalculateMaxTextureLevels( rmesa->texture_heaps,
+				 rmesa->nr_heaps,
+				 & ctx->Const,
+				 4,
+				 11, /* max 2D texture size is 2048x2048 */
+				 0,  /* 3D textures unsupported. */
+				 0,  /* cube textures unsupported. */
+				 11, /* max rect texture size is 2048x2048. */
+				 12,
+				 GL_FALSE );
+
+   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
+   /* No wide points.
+    */
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 1.0;
+   ctx->Const.MaxPointSizeAA = 1.0;
+
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 10.0;
+   ctx->Const.MaxLineWidthAA = 10.0;
+   ctx->Const.LineWidthGranularity = 0.0625;
+
+   /* Set maxlocksize (and hence vb size) small enough to avoid
+    * fallbacks in radeon_tcl.c.  ie. guarentee that all vertices can
+    * fit in a single dma buffer for indexed rendering of quad strips,
+    * etc.
+    */
+   ctx->Const.MaxArrayLockSize = 
+      MIN2( ctx->Const.MaxArrayLockSize, 
+ 	    RADEON_BUFFER_SIZE / RADEON_MAX_TCL_VERTSIZE ); 
+
+   rmesa->boxes = (getenv("LIBGL_PERFORMANCE_BOXES") != NULL);
+
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _ac_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+   _ae_create_context( ctx );
+
+   /* Install the customized pipeline:
+    */
+   _tnl_destroy_pipeline( ctx );
+   _tnl_install_pipeline( ctx, radeon_pipeline );
+   ctx->Driver.FlushVertices = radeonFlushVertices;
+
+   /* Try and keep materials and vertices separate:
+    */
+   _tnl_isolate_materials( ctx, GL_TRUE );
+
+
+/*     _mesa_allow_light_in_model( ctx, GL_FALSE ); */
+
+   /* Try and keep materials and vertices separate:
+    */
+   _tnl_isolate_materials( ctx, GL_TRUE );
+
+
+   /* Configure swrast to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+
+
+   _math_matrix_ctr( &rmesa->TexGenMatrix[0] );
+   _math_matrix_ctr( &rmesa->TexGenMatrix[1] );
+   _math_matrix_ctr( &rmesa->tmpmat );
+   _math_matrix_set_identity( &rmesa->TexGenMatrix[0] );
+   _math_matrix_set_identity( &rmesa->TexGenMatrix[1] );
+   _math_matrix_set_identity( &rmesa->tmpmat );
+
+   driInitExtensions( ctx, card_extensions, GL_TRUE );
+
+   if (rmesa->dri.drmMinor >= 9)
+      _mesa_enable_extension( ctx, "GL_NV_texture_rectangle");
+
+   radeonInitDriverFuncs( ctx );
+   radeonInitIoctlFuncs( ctx );
+   radeonInitStateFuncs( ctx );
+   radeonInitSpanFuncs( ctx );
+   radeonInitTextureFuncs( ctx );
+   radeonInitState( rmesa );
+   radeonInitSwtcl( ctx );
+
+   rmesa->iw.irq_seq = -1;
+   rmesa->irqsEmitted = 0;
+   rmesa->do_irqs = (rmesa->radeonScreen->irq && !getenv("RADEON_NO_IRQS"));
+
+   rmesa->do_usleeps = !getenv("RADEON_NO_USLEEPS");
+
+   rmesa->vblank_flags = (rmesa->do_irqs)
+       ? driGetDefaultVBlankFlags() : VBLANK_FLAG_NO_IRQ;
+
+#ifndef _SOLO
+   rmesa->get_ust = (PFNGLXGETUSTPROC) glXGetProcAddress( "__glXGetUST" );
+   if ( rmesa->get_ust == NULL ) 
+#endif
+   {
+      rmesa->get_ust = get_ust_nop;
+   }
+
+   (*rmesa->get_ust)( & rmesa->swap_ust );
+
+
+#if DO_DEBUG
+   RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ),
+				       debug_control );
+#endif
+
+   if (getenv("RADEON_NO_RAST")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1); 
+   }
+   else if (getenv("RADEON_TCL_FORCE_ENABLE")) {
+      fprintf(stderr, "Enabling TCL support...  this will probably crash\n");
+      fprintf(stderr, "         your card if it isn't capable of TCL!\n");
+      rmesa->radeonScreen->chipset |= RADEON_CHIPSET_TCL;
+   } else if (getenv("RADEON_TCL_FORCE_DISABLE") ||
+	    rmesa->dri.drmMinor < 3 ||
+	    !(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
+      rmesa->radeonScreen->chipset &= ~RADEON_CHIPSET_TCL;
+      fprintf(stderr, "disabling TCL support\n");
+      TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1); 
+   }
+
+   if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) {
+      if (!getenv("RADEON_NO_VTXFMT"))
+	 radeonVtxfmtInit( ctx );
+
+      _tnl_need_dlist_norm_lengths( ctx, GL_FALSE );
+   }
+   return GL_TRUE;
+}
+
+
+/* Destroy the device specific context.
+ */
+/* Destroy the Mesa and driver specific context data.
+ */
+void radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
+   radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
+
+   /* check if we're deleting the currently bound context */
+   if (rmesa == current) {
+      RADEON_FIREVERTICES( rmesa );
+      _mesa_make_current2(NULL, NULL, NULL);
+   }
+
+   /* Free radeon context resources */
+   assert(rmesa); /* should never be null */
+   if ( rmesa ) {
+      GLboolean   release_texture_heaps;
+
+
+      release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
+      _swsetup_DestroyContext( rmesa->glCtx );
+      _tnl_DestroyContext( rmesa->glCtx );
+      _ac_DestroyContext( rmesa->glCtx );
+      _swrast_DestroyContext( rmesa->glCtx );
+
+      radeonDestroySwtcl( rmesa->glCtx );
+      radeonReleaseArrays( rmesa->glCtx, ~0 );
+      if (rmesa->dma.current.buf) {
+	 radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+	 radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+      }
+
+      if (!rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+	 if (!getenv("RADEON_NO_VTXFMT"))
+	    radeonVtxfmtDestroy( rmesa->glCtx );
+
+      /* free the Mesa context */
+      rmesa->glCtx->DriverCtx = NULL;
+      _mesa_destroy_context( rmesa->glCtx );
+
+      if (rmesa->state.scissor.pClipRects) {
+	 FREE(rmesa->state.scissor.pClipRects);
+	 rmesa->state.scissor.pClipRects = 0;
+      }
+
+      if ( release_texture_heaps ) {
+         /* This share group is about to go away, free our private
+          * texture object data.
+          */
+         int i;
+
+	 /* this assert is not correct, default textures are always on swap list
+	 assert( is_empty_list( & rmesa->swapped ) ); */
+
+         for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+	    driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
+	    rmesa->texture_heaps[ i ] = NULL;
+         }
+      }
+
+      FREE( rmesa );
+   }
+}
+
+
+
+
+void
+radeonSwapBuffers( __DRIdrawablePrivate *dPriv )
+{
+
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      radeonContextPtr rmesa;
+      GLcontext *ctx;
+      rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+      ctx = rmesa->glCtx;
+      if (ctx->Visual.doubleBufferMode) {
+         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+
+         if ( rmesa->doPageFlip ) {
+            radeonPageFlip( dPriv );
+         }
+         else {
+            radeonCopyBuffer( dPriv );
+         }
+      }
+   }
+   else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+   }
+}
+
+
+/* Force the context `c' to be the current context and associate with it
+ * buffer `b'.
+ */
+GLboolean
+radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
+                   __DRIdrawablePrivate *driDrawPriv,
+                   __DRIdrawablePrivate *driReadPriv )
+{
+   if ( driContextPriv ) {
+      radeonContextPtr newCtx = 
+	 (radeonContextPtr) driContextPriv->driverPrivate;
+
+      if (RADEON_DEBUG & DEBUG_DRI)
+	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, newCtx->glCtx);
+
+      if ( newCtx->dri.drawable != driDrawPriv ) {
+	 newCtx->dri.drawable = driDrawPriv;
+	 radeonUpdateWindow( newCtx->glCtx );
+	 radeonUpdateViewportOffset( newCtx->glCtx );
+      }
+
+      _mesa_make_current2( newCtx->glCtx,
+			   (GLframebuffer *) driDrawPriv->driverPrivate,
+			   (GLframebuffer *) driReadPriv->driverPrivate );
+
+      if ( !newCtx->glCtx->Viewport.Width ) {
+	 _mesa_set_viewport( newCtx->glCtx, 0, 0,
+			     driDrawPriv->w, driDrawPriv->h );
+      }
+
+      if (newCtx->vb.enabled)
+	 radeonVtxfmtMakeCurrent( newCtx->glCtx );
+
+   } else {
+      if (RADEON_DEBUG & DEBUG_DRI)
+	 fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+      _mesa_make_current( 0, 0 );
+   }
+
+   if (RADEON_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "End %s\n", __FUNCTION__);
+   return GL_TRUE;
+}
+
+/* Force the context `c' to be unbound from its buffer.
+ */
+GLboolean
+radeonUnbindContext( __DRIcontextPrivate *driContextPriv )
+{
+   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
+
+   if (RADEON_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, rmesa->glCtx);
+
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
new file mode 100644
index 0000000000..5f1f9659e8
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
@@ -0,0 +1,839 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_context.h,v 1.6 2002/12/16 16:18:58 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __RADEON_CONTEXT_H__
+#define __RADEON_CONTEXT_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include <inttypes.h>
+#include "dri_util.h"
+#include "radeon_common.h"
+#include "texmem.h"
+
+#include "macros.h"
+#include "mtypes.h"
+#include "colormac.h"
+
+struct radeon_context;
+typedef struct radeon_context radeonContextRec;
+typedef struct radeon_context *radeonContextPtr;
+
+#include "radeon_lock.h"
+#include "radeon_screen.h"
+#include "mm.h"
+
+/* Flags for software fallback cases */
+/* See correponding strings in radeon_swtcl.c */
+#define RADEON_FALLBACK_TEXTURE		0x0001
+#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
+#define RADEON_FALLBACK_STENCIL		0x0004
+#define RADEON_FALLBACK_RENDER_MODE	0x0008
+#define RADEON_FALLBACK_BLEND_EQ	0x0010
+#define RADEON_FALLBACK_BLEND_FUNC	0x0020
+#define RADEON_FALLBACK_DISABLE 	0x0040
+#define RADEON_FALLBACK_BORDER_MODE	0x0080
+
+/* The blit width for texture uploads
+ */
+#define BLIT_WIDTH_BYTES 1024
+
+/* Use the templated vertex format:
+ */
+#define COLOR_IS_RGBA
+#define TAG(x) radeon##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+typedef void (*radeon_tri_func)( radeonContextPtr,
+				 radeonVertex *,
+				 radeonVertex *,
+				 radeonVertex * );
+
+typedef void (*radeon_line_func)( radeonContextPtr,
+				  radeonVertex *,
+				  radeonVertex * );
+
+typedef void (*radeon_point_func)( radeonContextPtr,
+				   radeonVertex * );
+
+
+struct radeon_colorbuffer_state {
+   GLuint clear;
+   GLint drawOffset, drawPitch;
+};
+
+
+struct radeon_depthbuffer_state {
+   GLuint clear;
+   GLfloat scale;
+};
+
+struct radeon_pixel_state {
+   GLint readOffset, readPitch;
+};
+
+struct radeon_scissor_state {
+   XF86DRIClipRectRec rect;
+   GLboolean enabled;
+
+   GLuint numClipRects;			/* Cliprects active */
+   GLuint numAllocedClipRects;		/* Cliprects available */
+   XF86DRIClipRectPtr pClipRects;
+};
+
+struct radeon_stencilbuffer_state {
+   GLboolean hwBuffer;
+   GLuint clear;			/* rb3d_stencilrefmask value */
+};
+
+struct radeon_stipple_state {
+   GLuint mask[32];
+};
+
+
+
+#define TEX_0   0x1
+#define TEX_1   0x2
+#define TEX_ALL 0x3
+
+typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
+
+/* Texture object in locally shared texture space.
+ */
+struct radeon_tex_obj {
+   driTextureObject   base;
+
+   GLuint bufAddr;			/* Offset to start of locally
+					   shared texture block */
+
+   GLuint dirty_state;		        /* Flags (1 per texunit) for
+					   whether or not this texobj
+					   has dirty hardware state
+					   (pp_*) that needs to be
+					   brought into the
+					   texunit. */
+
+   drmRadeonTexImage image[6][RADEON_MAX_TEXTURE_LEVELS];
+					/* Six, for the cube faces */
+
+   GLuint pp_txfilter;		        /* hardware register values */
+   GLuint pp_txformat;
+   GLuint pp_txoffset;		        /* Image location in texmem.
+					   All cube faces follow. */
+   GLuint pp_txsize;		        /* npot only */
+   GLuint pp_txpitch;		        /* npot only */
+   GLuint pp_border_color;
+   GLuint pp_cubic_faces;	        /* cube face 1,2,3,4 log2 sizes */
+
+   GLboolean  border_fallback;
+};
+
+
+struct radeon_texture_env_state {
+   radeonTexObjPtr texobj;
+   GLenum format;
+   GLenum envMode;
+};
+
+struct radeon_texture_state {
+   struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS];
+};
+
+
+struct radeon_state_atom {
+   struct radeon_state_atom *next, *prev;
+   const char *name;		         /* for debug */
+   int cmd_size;		         /* size in bytes */
+   GLuint is_tcl;
+   int *cmd;			         /* one or more cmd's */
+   int *lastcmd;			 /* one or more cmd's */
+   GLboolean (*check)( GLcontext * );    /* is this state active? */
+};
+   
+
+
+/* Trying to keep these relatively short as the variables are becoming
+ * extravagently long.  Drop the driver name prefix off the front of
+ * everything - I think we know which driver we're in by now, and keep the
+ * prefix to 3 letters unless absolutely impossible.  
+ */
+
+#define CTX_CMD_0             0
+#define CTX_PP_MISC           1
+#define CTX_PP_FOG_COLOR      2
+#define CTX_RE_SOLID_COLOR    3
+#define CTX_RB3D_BLENDCNTL    4
+#define CTX_RB3D_DEPTHOFFSET  5
+#define CTX_RB3D_DEPTHPITCH   6
+#define CTX_RB3D_ZSTENCILCNTL 7
+#define CTX_CMD_1             8
+#define CTX_PP_CNTL           9
+#define CTX_RB3D_CNTL         10
+#define CTX_RB3D_COLOROFFSET  11
+#define CTX_CMD_2             12
+#define CTX_RB3D_COLORPITCH   13
+#define CTX_STATE_SIZE        14
+
+#define SET_CMD_0               0
+#define SET_SE_CNTL             1
+#define SET_SE_COORDFMT         2
+#define SET_CMD_1               3
+#define SET_SE_CNTL_STATUS      4
+#define SET_STATE_SIZE          5
+
+#define LIN_CMD_0               0
+#define LIN_RE_LINE_PATTERN     1
+#define LIN_RE_LINE_STATE       2
+#define LIN_CMD_1               3
+#define LIN_SE_LINE_WIDTH       4
+#define LIN_STATE_SIZE          5
+
+#define MSK_CMD_0               0
+#define MSK_RB3D_STENCILREFMASK 1
+#define MSK_RB3D_ROPCNTL        2
+#define MSK_RB3D_PLANEMASK      3
+#define MSK_STATE_SIZE          4
+
+#define VPT_CMD_0           0
+#define VPT_SE_VPORT_XSCALE          1
+#define VPT_SE_VPORT_XOFFSET         2
+#define VPT_SE_VPORT_YSCALE          3
+#define VPT_SE_VPORT_YOFFSET         4
+#define VPT_SE_VPORT_ZSCALE          5
+#define VPT_SE_VPORT_ZOFFSET         6
+#define VPT_STATE_SIZE      7
+
+#define MSC_CMD_0               0
+#define MSC_RE_MISC             1
+#define MSC_STATE_SIZE          2
+
+#define TEX_CMD_0                   0
+#define TEX_PP_TXFILTER             1
+#define TEX_PP_TXFORMAT             2
+#define TEX_PP_TXOFFSET             3
+#define TEX_PP_TXCBLEND             4
+#define TEX_PP_TXABLEND             5
+#define TEX_PP_TFACTOR              6
+#define TEX_CMD_1                   7
+#define TEX_PP_BORDER_COLOR         8
+#define TEX_STATE_SIZE              9
+
+#define TXR_CMD_0                   0 /* rectangle textures */
+#define TXR_PP_TEX_SIZE             1 /* 0x1d04, 0x1d0c for NPOT! */
+#define TXR_PP_TEX_PITCH            2 /* 0x1d08, 0x1d10 for NPOT! */
+#define TXR_STATE_SIZE              3
+
+#define ZBS_CMD_0              0
+#define ZBS_SE_ZBIAS_FACTOR             1
+#define ZBS_SE_ZBIAS_CONSTANT           2
+#define ZBS_STATE_SIZE         3
+
+#define TCL_CMD_0                        0
+#define TCL_OUTPUT_VTXFMT         1
+#define TCL_OUTPUT_VTXSEL         2
+#define TCL_MATRIX_SELECT_0       3
+#define TCL_MATRIX_SELECT_1       4
+#define TCL_UCP_VERT_BLEND_CTL    5
+#define TCL_TEXTURE_PROC_CTL      6
+#define TCL_LIGHT_MODEL_CTL       7
+#define TCL_PER_LIGHT_CTL_0       8
+#define TCL_PER_LIGHT_CTL_1       9
+#define TCL_PER_LIGHT_CTL_2       10
+#define TCL_PER_LIGHT_CTL_3       11
+#define TCL_STATE_SIZE                   12
+
+#define MTL_CMD_0            0	
+#define MTL_EMMISSIVE_RED    1	
+#define MTL_EMMISSIVE_GREEN  2	
+#define MTL_EMMISSIVE_BLUE   3	
+#define MTL_EMMISSIVE_ALPHA  4	
+#define MTL_AMBIENT_RED      5
+#define MTL_AMBIENT_GREEN    6
+#define MTL_AMBIENT_BLUE     7
+#define MTL_AMBIENT_ALPHA    8
+#define MTL_DIFFUSE_RED      9
+#define MTL_DIFFUSE_GREEN    10
+#define MTL_DIFFUSE_BLUE     11
+#define MTL_DIFFUSE_ALPHA    12
+#define MTL_SPECULAR_RED     13
+#define MTL_SPECULAR_GREEN   14
+#define MTL_SPECULAR_BLUE    15
+#define MTL_SPECULAR_ALPHA   16
+#define MTL_SHININESS        17
+#define MTL_STATE_SIZE       18
+
+#define VTX_CMD_0              0
+#define VTX_SE_COORD_FMT       1
+#define VTX_STATE_SIZE         2
+
+#define MAT_CMD_0              0
+#define MAT_ELT_0              1
+#define MAT_STATE_SIZE         17
+
+#define GRD_CMD_0                  0
+#define GRD_VERT_GUARD_CLIP_ADJ    1
+#define GRD_VERT_GUARD_DISCARD_ADJ 2
+#define GRD_HORZ_GUARD_CLIP_ADJ    3
+#define GRD_HORZ_GUARD_DISCARD_ADJ 4
+#define GRD_STATE_SIZE             5
+
+/* position changes frequently when lighting in modelpos - separate
+ * out to new state item?  
+ */
+#define LIT_CMD_0                  0
+#define LIT_AMBIENT_RED            1
+#define LIT_AMBIENT_GREEN          2
+#define LIT_AMBIENT_BLUE           3
+#define LIT_AMBIENT_ALPHA          4
+#define LIT_DIFFUSE_RED            5
+#define LIT_DIFFUSE_GREEN          6
+#define LIT_DIFFUSE_BLUE           7
+#define LIT_DIFFUSE_ALPHA          8
+#define LIT_SPECULAR_RED           9
+#define LIT_SPECULAR_GREEN         10
+#define LIT_SPECULAR_BLUE          11
+#define LIT_SPECULAR_ALPHA         12
+#define LIT_POSITION_X             13
+#define LIT_POSITION_Y             14
+#define LIT_POSITION_Z             15
+#define LIT_POSITION_W             16
+#define LIT_DIRECTION_X            17
+#define LIT_DIRECTION_Y            18
+#define LIT_DIRECTION_Z            19
+#define LIT_DIRECTION_W            20
+#define LIT_ATTEN_CONST            21
+#define LIT_ATTEN_LINEAR           22
+#define LIT_ATTEN_QUADRATIC        23
+#define LIT_ATTEN_XXX              24
+#define LIT_CMD_1                  25
+#define LIT_SPOT_DCD               26
+#define LIT_SPOT_EXPONENT          27
+#define LIT_SPOT_CUTOFF            28
+#define LIT_SPECULAR_THRESH        29
+#define LIT_RANGE_CUTOFF           30 /* ? */
+#define LIT_RANGE_ATTEN            31 /* ? */
+#define LIT_STATE_SIZE             32
+
+/* Fog
+ */
+#define FOG_CMD_0      0
+#define FOG_R          1
+#define FOG_C          2
+#define FOG_D          3
+#define FOG_PAD        4
+#define FOG_STATE_SIZE 5
+
+/* UCP
+ */
+#define UCP_CMD_0      0
+#define UCP_X          1
+#define UCP_Y          2
+#define UCP_Z          3
+#define UCP_W          4
+#define UCP_STATE_SIZE 5
+
+/* GLT - Global ambient
+ */
+#define GLT_CMD_0      0
+#define GLT_RED        1
+#define GLT_GREEN      2
+#define GLT_BLUE       3
+#define GLT_ALPHA      4
+#define GLT_STATE_SIZE 5
+
+/* EYE
+ */
+#define EYE_CMD_0          0
+#define EYE_X              1
+#define EYE_Y              2
+#define EYE_Z              3
+#define EYE_RESCALE_FACTOR 4
+#define EYE_STATE_SIZE     5
+
+#define SHN_CMD_0          0
+#define SHN_SHININESS      1
+#define SHN_STATE_SIZE     2
+
+
+
+
+
+struct radeon_hw_state {
+   /* All state should be on one of these lists:
+    */
+   struct radeon_state_atom dirty; /* dirty list head placeholder */
+   struct radeon_state_atom clean; /* clean list head placeholder */
+
+   /* Hardware state, stored as cmdbuf commands:  
+    *   -- Need to doublebuffer for
+    *           - reviving state after loss of context
+    *           - eliding noop statechange loops? (except line stipple count)
+    */
+   struct radeon_state_atom ctx;
+   struct radeon_state_atom set;
+   struct radeon_state_atom lin;
+   struct radeon_state_atom msk;
+   struct radeon_state_atom vpt;
+   struct radeon_state_atom tcl;
+   struct radeon_state_atom msc;
+   struct radeon_state_atom tex[2];
+   struct radeon_state_atom zbs;
+   struct radeon_state_atom mtl; 
+   struct radeon_state_atom mat[5]; 
+   struct radeon_state_atom lit[8]; /* includes vec, scl commands */
+   struct radeon_state_atom ucp[6];
+   struct radeon_state_atom eye; /* eye pos */
+   struct radeon_state_atom grd; /* guard band clipping */
+   struct radeon_state_atom fog; 
+   struct radeon_state_atom glt; 
+   struct radeon_state_atom txr[2]; /* for NPOT */
+};
+
+struct radeon_state {
+   /* Derived state for internal purposes:
+    */
+   struct radeon_colorbuffer_state color;
+   struct radeon_depthbuffer_state depth;
+   struct radeon_pixel_state pixel;
+   struct radeon_scissor_state scissor;
+   struct radeon_stencilbuffer_state stencil;
+   struct radeon_stipple_state stipple;
+   struct radeon_texture_state texture;
+};
+
+
+/* Need refcounting on dma buffers:
+ */
+struct radeon_dma_buffer {
+   int refcount;		/* the number of retained regions in buf */
+   drmBufPtr buf;
+};
+
+#define GET_START(rvb) (rmesa->radeonScreen->agp_buffer_offset +			\
+			(rvb)->address - rmesa->dma.buf0_address +	\
+			(rvb)->start)
+
+/* A retained region, eg vertices for indexed vertices.
+ */
+struct radeon_dma_region {
+   struct radeon_dma_buffer *buf;
+   char *address;		/* == buf->address */
+   int start, end, ptr;		/* offsets from start of buf */
+   int aos_start;
+   int aos_stride;
+   int aos_size;
+};
+
+
+struct radeon_dma {
+   /* Active dma region.  Allocations for vertices and retained
+    * regions come from here.  Also used for emitting random vertices,
+    * these may be flushed by calling flush_current();
+    */
+   struct radeon_dma_region current;
+   
+   void (*flush)( radeonContextPtr );
+
+   char *buf0_address;		/* start of buf[0], for index calcs */
+   GLuint nr_released_bufs;	/* flush after so many buffers released */
+};
+
+struct radeon_dri_mirror {
+   __DRIcontextPrivate	*context;	/* DRI context */
+   __DRIscreenPrivate	*screen;	/* DRI screen */
+   __DRIdrawablePrivate	*drawable;	/* DRI drawable bound to this ctx */
+
+   drmContext hwContext;
+   drmLock *hwLock;
+   int fd;
+   int drmMinor;
+};
+
+
+#define RADEON_CMD_BUF_SZ  (8*1024) 
+
+struct radeon_store {
+   GLuint statenr;
+   GLuint primnr;
+   char cmd_buf[RADEON_CMD_BUF_SZ];
+   int cmd_used;   
+   int elts_start;
+};
+
+
+/* radeon_tcl.c
+ */
+struct radeon_tcl_info {
+   GLuint vertex_format;
+   GLint last_offset;
+   GLuint hw_primitive;
+
+   struct radeon_dma_region *aos_components[8];
+   GLuint nr_aos_components;
+
+   GLuint *Elts;
+
+   struct radeon_dma_region indexed_verts;
+   struct radeon_dma_region obj;
+   struct radeon_dma_region rgba;
+   struct radeon_dma_region spec;
+   struct radeon_dma_region fog;
+   struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS];
+   struct radeon_dma_region norm;
+};
+
+
+/* radeon_swtcl.c
+ */
+struct radeon_swtcl_info {
+   GLuint SetupIndex;
+   GLuint SetupNewInputs;
+   GLuint RenderIndex;
+   GLuint vertex_size;
+   GLuint vertex_stride_shift;
+   GLuint vertex_format;
+   GLubyte *verts;
+
+   /* Fallback rasterization functions
+    */
+   radeon_point_func draw_point;
+   radeon_line_func draw_line;
+   radeon_tri_func draw_tri;
+
+   GLuint hw_primitive;
+   GLenum render_primitive;
+   GLuint numverts;
+
+   struct radeon_dma_region indexed_verts;
+};
+
+
+struct radeon_ioctl {
+   GLuint vertex_offset;
+   GLuint vertex_size;
+};
+
+
+
+#define RADEON_MAX_PRIMS 64
+
+
+/* Want to keep a cache of these around.  Each is parameterized by
+ * only a single value which has only a small range.  Only expect a
+ * few, so just rescan the list each time?
+ */
+struct dynfn {
+   struct dynfn *next, *prev;
+   int key;
+   char *code;
+};
+
+struct dfn_lists {
+   struct dynfn Vertex2f;
+   struct dynfn Vertex2fv;
+   struct dynfn Vertex3f;
+   struct dynfn Vertex3fv;
+   struct dynfn Color4ub;
+   struct dynfn Color4ubv;
+   struct dynfn Color3ub;
+   struct dynfn Color3ubv;
+   struct dynfn Color4f;
+   struct dynfn Color4fv;
+   struct dynfn Color3f;
+   struct dynfn Color3fv;
+   struct dynfn SecondaryColor3ubEXT;
+   struct dynfn SecondaryColor3ubvEXT;
+   struct dynfn SecondaryColor3fEXT;
+   struct dynfn SecondaryColor3fvEXT;
+   struct dynfn Normal3f;
+   struct dynfn Normal3fv;
+   struct dynfn TexCoord2f;
+   struct dynfn TexCoord2fv;
+   struct dynfn TexCoord1f;
+   struct dynfn TexCoord1fv;
+   struct dynfn MultiTexCoord2fARB;
+   struct dynfn MultiTexCoord2fvARB;
+   struct dynfn MultiTexCoord1fARB;
+   struct dynfn MultiTexCoord1fvARB;
+};
+
+struct dfn_generators {
+   struct dynfn *(*Vertex2f)( GLcontext *, int );
+   struct dynfn *(*Vertex2fv)( GLcontext *, int );
+   struct dynfn *(*Vertex3f)( GLcontext *, int );
+   struct dynfn *(*Vertex3fv)( GLcontext *, int );
+   struct dynfn *(*Color4ub)( GLcontext *, int );
+   struct dynfn *(*Color4ubv)( GLcontext *, int );
+   struct dynfn *(*Color3ub)( GLcontext *, int );
+   struct dynfn *(*Color3ubv)( GLcontext *, int );
+   struct dynfn *(*Color4f)( GLcontext *, int );
+   struct dynfn *(*Color4fv)( GLcontext *, int );
+   struct dynfn *(*Color3f)( GLcontext *, int );
+   struct dynfn *(*Color3fv)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3ubEXT)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3ubvEXT)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3fEXT)( GLcontext *, int );
+   struct dynfn *(*SecondaryColor3fvEXT)( GLcontext *, int );
+   struct dynfn *(*Normal3f)( GLcontext *, int );
+   struct dynfn *(*Normal3fv)( GLcontext *, int );
+   struct dynfn *(*TexCoord2f)( GLcontext *, int );
+   struct dynfn *(*TexCoord2fv)( GLcontext *, int );
+   struct dynfn *(*TexCoord1f)( GLcontext *, int );
+   struct dynfn *(*TexCoord1fv)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord2fARB)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord2fvARB)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord1fARB)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord1fvARB)( GLcontext *, int );
+};
+
+
+
+struct radeon_prim {
+   GLuint start;
+   GLuint end;
+   GLuint prim;
+};
+
+struct radeon_vbinfo {
+   GLint counter, initial_counter;
+   GLint *dmaptr;
+   void (*notify)( void );
+   GLint vertex_size;
+
+   /* A maximum total of 15 elements per vertex:  3 floats for position, 3
+    * floats for normal, 4 floats for color, 4 bytes for secondary color,
+    * 2 floats for each texture unit (4 floats total).
+    * 
+    * As soon as the 3rd TMU is supported or cube maps (or 3D textures) are
+    * supported, this value will grow.
+    * 
+    * The position data is never actually stored here, so 3 elements could be
+    * trimmed out of the buffer.
+    */
+   union { float f; int i; radeon_color_t color; } vertex[15];
+
+   GLfloat *normalptr;
+   GLfloat *floatcolorptr;
+   radeon_color_t *colorptr;
+   GLfloat *floatspecptr;
+   radeon_color_t *specptr;
+   GLfloat *texcoordptr[2];
+
+   GLenum *prim;		/* &ctx->Driver.CurrentExecPrimitive */
+   GLuint primflags;
+   GLboolean enabled;		/* *_NO_VTXFMT / *_NO_TCL env vars */
+   GLboolean installed;
+   GLboolean fell_back;
+   GLboolean recheck;
+   GLint nrverts;
+   GLuint vertex_format;
+
+   GLuint installed_vertex_format;
+   GLuint installed_color_3f_sz;
+
+   struct radeon_prim primlist[RADEON_MAX_PRIMS];
+   int nrprims;
+
+   struct dfn_lists dfn_cache;
+   struct dfn_generators codegen;
+   GLvertexformat vtxfmt;
+};
+
+
+
+
+struct radeon_context {
+   GLcontext *glCtx;			/* Mesa context */
+
+   /* Driver and hardware state management
+    */
+   struct radeon_hw_state hw;
+   struct radeon_state state;
+
+   /* Texture object bookkeeping
+    */
+   unsigned              nr_heaps;
+   driTexHeap          * texture_heaps[ RADEON_NR_TEX_HEAPS ];
+   driTextureObject      swapped;
+
+
+   /* Rasterization and vertex state:
+    */
+   GLuint TclFallback;
+   GLuint Fallback;
+   GLuint NewGLState;
+
+   
+   /* Temporaries for translating away float colors:
+    */
+   struct gl_client_array UbyteColor;
+   struct gl_client_array UbyteSecondaryColor;
+
+   /* Vertex buffers
+    */
+   struct radeon_ioctl ioctl;
+   struct radeon_dma dma;
+   struct radeon_store store;
+
+   /* Page flipping
+    */
+   GLuint doPageFlip;
+
+   /* Busy waiting
+    */
+   GLuint do_usleeps;
+   GLuint do_irqs;
+   GLuint irqsEmitted;
+   drmRadeonIrqWait iw;
+
+   /* Drawable, cliprect and scissor information
+    */
+   GLuint numClipRects;			/* Cliprects for the draw buffer */
+   XF86DRIClipRectPtr pClipRects;
+   unsigned int lastStamp;
+   GLboolean lost_context;
+   radeonScreenPtr radeonScreen;	/* Screen private DRI data */
+   RADEONSAREAPrivPtr sarea;		/* Private SAREA data */
+
+   /* TCL stuff
+    */
+   GLmatrix TexGenMatrix[RADEON_MAX_TEXTURE_UNITS];
+   GLboolean recheck_texgen[RADEON_MAX_TEXTURE_UNITS];
+   GLboolean TexGenNeedNormals[RADEON_MAX_TEXTURE_UNITS];
+   GLuint TexMatEnabled;
+   GLuint TexGenEnabled;
+   GLmatrix tmpmat;
+   GLuint last_ReallyEnabled;
+
+   /* VBI
+    */
+   GLuint vbl_seq;
+   GLuint vblank_flags;
+
+   uint64_t swap_ust;
+   uint64_t swap_missed_ust;
+
+   GLuint swap_count;
+   GLuint swap_missed_count;
+
+   PFNGLXGETUSTPROC get_ust;
+
+   /* radeon_tcl.c
+    */
+   struct radeon_tcl_info tcl;
+
+   /* radeon_swtcl.c
+    */
+   struct radeon_swtcl_info swtcl;
+
+   /* radeon_vtxfmt.c
+    */
+   struct radeon_vbinfo vb;
+
+   /* Mirrors of some DRI state
+    */
+   struct radeon_dri_mirror dri;
+
+ 
+   /* Performance counters
+    */
+   GLuint boxes;			/* Draw performance boxes */
+   GLuint hardwareWentIdle;
+   GLuint c_clears;
+   GLuint c_drawWaits;
+   GLuint c_textureSwaps;
+   GLuint c_textureBytes;
+   GLuint c_vertexBuffers;
+};
+
+#define RADEON_CONTEXT(ctx)		((radeonContextPtr)(ctx->DriverCtx))
+
+
+static __inline GLuint radeonPackColor( GLuint cpp,
+					GLubyte r, GLubyte g,
+					GLubyte b, GLubyte a )
+{
+   switch ( cpp ) {
+   case 2:
+      return PACK_COLOR_565( r, g, b );
+   case 4:
+      return PACK_COLOR_8888( a, r, g, b );
+   default:
+      return 0;
+   }
+}
+
+#define RADEON_OLD_PACKETS 1
+
+
+extern void radeonDestroyContext( __DRIcontextPrivate *driContextPriv );
+extern GLboolean radeonCreateContext(const __GLcontextModes *glVisual,
+				     __DRIcontextPrivate *driContextPriv,
+				     void *sharedContextPrivate);
+extern void radeonSwapBuffers( __DRIdrawablePrivate *dPriv );
+extern GLboolean radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
+				    __DRIdrawablePrivate *driDrawPriv,
+				    __DRIdrawablePrivate *driReadPriv );
+extern GLboolean radeonUnbindContext( __DRIcontextPrivate *driContextPriv );
+
+/* ================================================================
+ * Debugging:
+ */
+#define DO_DEBUG		1
+
+#if DO_DEBUG
+extern int RADEON_DEBUG;
+#else
+#define RADEON_DEBUG		0
+#endif
+
+#define DEBUG_TEXTURE	0x001
+#define DEBUG_STATE	0x002
+#define DEBUG_IOCTL	0x004
+#define DEBUG_PRIMS	0x008
+#define DEBUG_VERTS	0x010
+#define DEBUG_FALLBACKS	0x020
+#define DEBUG_VFMT	0x040
+#define DEBUG_CODEGEN	0x080
+#define DEBUG_VERBOSE	0x100
+#define DEBUG_DRI       0x200
+#define DEBUG_DMA       0x400
+#define DEBUG_SANITY    0x800
+
+#endif
+#endif /* __RADEON_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
new file mode 100644
index 0000000000..a388720aba
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
@@ -0,0 +1,1191 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c,v 1.11 2003/01/29 22:04:59 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include <sched.h>
+#include <errno.h>
+
+#include "glheader.h"
+#include "imports.h"
+#include "simple_list.h"
+#include "swrast/swrast.h"
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tcl.h"
+#include "radeon_sanity.h"
+
+#include "radeon_macros.h"  /* for INREG() */
+
+#include "vblank.h"
+
+#define RADEON_TIMEOUT             512
+#define RADEON_IDLE_RETRY           16
+
+
+static void radeonWaitForIdle( radeonContextPtr rmesa );
+
+/* =============================================================
+ * Kernel command buffer handling
+ */
+
+static void print_state_atom( struct radeon_state_atom *state )
+{
+   int i;
+
+   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
+
+   if (RADEON_DEBUG & DEBUG_VERBOSE) 
+      for (i = 0 ; i < state->cmd_size ; i++) 
+	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
+
+}
+
+static void radeon_emit_state_list( radeonContextPtr rmesa, 
+				    struct radeon_state_atom *list )
+{
+   struct radeon_state_atom *state, *tmp;
+   char *dest;
+
+   /* From Felix Kuhling: similar to some other lockups, glaxium will
+    * lock with what we believe to be a normal command stream, but
+    * sprinkling some magic waits arounds allows it to run
+    * uninterrupted.  This has a slight effect on q3 framerates, but
+    * it might now be possible to remove the zbs hack, below.
+    *
+    * Felix reports that this can be narrowed down to just
+    * tcl,tex0,tex1 state, but that's pretty much every statechange,
+    * so let's just put the wait in always (unless Felix wants to
+    * narrow it down further...)
+    */
+   if (1) {
+      drmRadeonCmdHeader *cmd;
+      cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), 
+						     __FUNCTION__ );
+      cmd->wait.cmd_type = RADEON_CMD_WAIT;
+      cmd->wait.flags = RADEON_WAIT_3D;
+   }
+
+   foreach_s( state, tmp, list ) {
+      if (state->check( rmesa->glCtx )) {
+	 dest = radeonAllocCmdBuf( rmesa, state->cmd_size * 4, __FUNCTION__);
+	 memcpy( dest, state->cmd, state->cmd_size * 4);
+	 move_to_head( &(rmesa->hw.clean), state );
+	 if (RADEON_DEBUG & DEBUG_STATE) 
+	    print_state_atom( state );
+      }
+      else if (RADEON_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "skip state %s\n", state->name);
+   }
+}
+
+
+void radeonEmitState( radeonContextPtr rmesa )
+{
+   struct radeon_state_atom *state, *tmp;
+
+   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* Somewhat overkill:
+    */
+   if (rmesa->lost_context) {
+      if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
+	 fprintf(stderr, "%s - lost context\n", __FUNCTION__); 
+
+      foreach_s( state, tmp, &(rmesa->hw.clean) ) 
+	 move_to_tail(&(rmesa->hw.dirty), state );
+
+      rmesa->lost_context = 0;
+   }
+   else if (1) {
+      /* This is a darstardly kludge to work around a lockup that I
+       * haven't otherwise figured out.
+       */
+      move_to_tail(&(rmesa->hw.dirty), &(rmesa->hw.zbs) );
+   }
+
+   if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
+     foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
+       if (state->is_tcl) {
+	 move_to_head( &(rmesa->hw.clean), state );
+       }
+     }
+   }
+
+   radeon_emit_state_list( rmesa, &rmesa->hw.dirty );
+}
+
+
+
+/* Fire a section of the retained (indexed_verts) buffer as a regular
+ * primtive.  
+ */
+extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
+				GLuint vertex_format,
+				GLuint primitive,
+				GLuint vertex_nr )
+{
+   drmRadeonCmdHeader *cmd;
+
+
+   assert(rmesa->dri.drmMinor >= 3); 
+   assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   
+   radeonEmitState( rmesa );
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
+	      rmesa->store.cmd_used/4);
+   
+#if RADEON_OLD_PACKETS
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 6 * sizeof(*cmd),
+						  __FUNCTION__ );
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
+   cmd[2].i = rmesa->ioctl.vertex_offset;
+   cmd[3].i = vertex_nr;
+   cmd[4].i = vertex_format;
+   cmd[5].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+
+   if (RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
+	      __FUNCTION__,
+	      cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
+#else
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 4 * sizeof(*cmd),
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
+   cmd[2].i = vertex_format;
+   cmd[3].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+
+
+   if (RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
+	      __FUNCTION__,
+	      cmd[1].i, cmd[2].i, cmd[3].i);
+#endif
+}
+
+
+void radeonFlushElts( radeonContextPtr rmesa )
+{
+   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
+   int dwords;
+#if RADEON_OLD_PACKETS
+   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
+#else
+   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
+#endif
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert( rmesa->dma.flush == radeonFlushElts );
+   rmesa->dma.flush = 0;
+
+   /* Cope with odd number of elts:
+    */
+   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
+   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
+
+#if RADEON_OLD_PACKETS
+   cmd[1] |= (dwords - 3) << 16;
+   cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+#else
+   cmd[1] |= (dwords - 3) << 16;
+   cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+#endif
+}
+
+
+GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
+				    GLuint vertex_format,
+				    GLuint primitive,
+				    GLuint min_nr )
+{
+   drmRadeonCmdHeader *cmd;
+   GLushort *retval;
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
+
+   assert(rmesa->dri.drmMinor >= 3); 
+   assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   
+   radeonEmitState( rmesa );
+   
+#if RADEON_OLD_PACKETS
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 
+						  24 + min_nr*2,
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
+   cmd[2].i = rmesa->ioctl.vertex_offset;
+   cmd[3].i = 0xffff;
+   cmd[4].i = vertex_format;
+   cmd[5].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+
+   retval = (GLushort *)(cmd+6);
+#else   
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 
+						  16 + min_nr*2,
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
+   cmd[2].i = vertex_format;
+   cmd[3].i = (primitive | 
+	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+
+   retval = (GLushort *)(cmd+4);
+#endif
+
+   if (RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
+	      __FUNCTION__,
+	      cmd[1].i, vertex_format, primitive);
+
+   assert(!rmesa->dma.flush);
+   rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+   rmesa->dma.flush = radeonFlushElts;
+
+   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
+
+   return retval;
+}
+
+
+
+void radeonEmitVertexAOS( radeonContextPtr rmesa,
+			  GLuint vertex_size,
+			  GLuint offset )
+{
+#if RADEON_OLD_PACKETS
+   rmesa->ioctl.vertex_size = vertex_size;
+   rmesa->ioctl.vertex_offset = offset;
+#else
+   drmRadeonCmdHeader *cmd;
+   assert(rmesa->dri.drmMinor >= 3); 
+
+   if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
+      fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
+	      __FUNCTION__, vertex_size, offset);
+
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 5 * sizeof(int),
+						  __FUNCTION__ );
+
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
+   cmd[2].i = 1;
+   cmd[3].i = vertex_size | (vertex_size << 8);
+   cmd[4].i = offset;
+#endif
+}
+		       
+
+void radeonEmitAOS( radeonContextPtr rmesa,
+		    struct radeon_dma_region **component,
+		    GLuint nr,
+		    GLuint offset )
+{
+#if RADEON_OLD_PACKETS
+   assert( nr == 1 );
+   assert( component[0]->aos_size == component[0]->aos_stride );
+   rmesa->ioctl.vertex_size = component[0]->aos_size;
+   rmesa->ioctl.vertex_offset = 
+      (component[0]->aos_start + offset * component[0]->aos_stride * 4);
+#else
+   drmRadeonCmdHeader *cmd;
+   int sz = 3 + (nr/2 * 3) + (nr & 1) * 2;
+   int i;
+   int *tmp;
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(rmesa->dri.drmMinor >= 3); 
+
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sz * sizeof(int),
+						  __FUNCTION__ );
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | ((sz-3) << 16);
+   cmd[2].i = nr;
+   tmp = &cmd[0].i;
+   cmd += 3;
+
+   for (i = 0 ; i < nr ; i++) {
+      if (i & 1) {
+	 cmd[0].i |= ((component[i]->aos_stride << 24) | 
+		      (component[i]->aos_size << 16));
+	 cmd[2].i = (component[i]->aos_start + 
+		     offset * component[i]->aos_stride * 4);
+	 cmd += 3;
+      }
+      else {
+	 cmd[0].i = ((component[i]->aos_stride << 8) | 
+		     (component[i]->aos_size << 0));
+	 cmd[1].i = (component[i]->aos_start + 
+		     offset * component[i]->aos_stride * 4);
+      }
+   }
+
+   if (RADEON_DEBUG & DEBUG_VERTS) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      for (i = 0 ; i < sz ; i++)
+	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
+   }
+#endif
+}
+
+/* using already shifted color_fmt! */
+void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */
+		   GLuint color_fmt,
+		   GLuint src_pitch,
+		   GLuint src_offset,
+		   GLuint dst_pitch,
+		   GLuint dst_offset,
+		   GLint srcx, GLint srcy,
+		   GLint dstx, GLint dsty,
+		   GLuint w, GLuint h )
+{
+   drmRadeonCmdHeader *cmd;
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+	      __FUNCTION__, 
+	      src_pitch, src_offset, srcx, srcy,
+	      dst_pitch, dst_offset, dstx, dsty,
+	      w, h);
+
+   assert( (src_pitch & 63) == 0 );
+   assert( (dst_pitch & 63) == 0 );
+   assert( (src_offset & 1023) == 0 ); 
+   assert( (dst_offset & 1023) == 0 ); 
+   assert( w < (1<<16) );
+   assert( h < (1<<16) );
+
+   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
+						  __FUNCTION__ );
+
+
+   cmd[0].i = 0;
+   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+   cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16);
+   cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+	       RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+	       RADEON_GMC_BRUSH_NONE |
+	       color_fmt |
+	       RADEON_GMC_SRC_DATATYPE_COLOR |
+	       RADEON_ROP3_S |
+	       RADEON_DP_SRC_SOURCE_MEMORY |
+	       RADEON_GMC_CLR_CMP_CNTL_DIS |
+	       RADEON_GMC_WR_MSK_DIS );
+
+   cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
+   cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
+   cmd[5].i = (srcx << 16) | srcy;
+   cmd[6].i = (dstx << 16) | dsty; /* dst */
+   cmd[7].i = (w << 16) | h;
+}
+
+
+void radeonEmitWait( radeonContextPtr rmesa, GLuint flags )
+{
+   if (rmesa->dri.drmMinor >= 6) {
+      drmRadeonCmdHeader *cmd;
+
+      assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
+      
+      cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
+						   __FUNCTION__ );
+      cmd[0].i = 0;
+      cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
+      cmd[0].wait.flags = flags;
+   }
+}
+
+
+static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
+				    const char * caller )
+{
+   int ret, i;
+   drmRadeonCmdBuffer cmd;
+
+   if (RADEON_DEBUG & DEBUG_IOCTL) {
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
+
+      if (RADEON_DEBUG & DEBUG_VERBOSE) 
+	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
+	    fprintf(stderr, "%d: %x\n", i/4, 
+		    *(int *)(&rmesa->store.cmd_buf[i]));
+   }
+
+   if (RADEON_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
+	      rmesa->dma.nr_released_bufs);
+
+
+   if (RADEON_DEBUG & DEBUG_SANITY) {
+      if (rmesa->state.scissor.enabled) 
+	 ret = radeonSanityCmdBuffer( rmesa, 
+				      rmesa->state.scissor.numClipRects,
+				      rmesa->state.scissor.pClipRects);
+      else
+	 ret = radeonSanityCmdBuffer( rmesa, 
+				      rmesa->numClipRects,
+				      rmesa->pClipRects);
+      if (ret) {
+	 fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);	 
+	 goto out;
+      }
+   }
+
+
+   cmd.bufsz = rmesa->store.cmd_used;
+   cmd.buf = rmesa->store.cmd_buf;
+
+   if (rmesa->state.scissor.enabled) {
+      cmd.nbox = rmesa->state.scissor.numClipRects;
+      cmd.boxes = (drmClipRect *)rmesa->state.scissor.pClipRects;
+   } else {
+      cmd.nbox = rmesa->numClipRects;
+      cmd.boxes = (drmClipRect *)rmesa->pClipRects;
+   }
+
+   ret = drmCommandWrite( rmesa->dri.fd,
+			  DRM_RADEON_CMDBUF,
+			  &cmd, sizeof(cmd) );
+
+   if (ret)
+      fprintf(stderr, "drmCommandWrite: %d\n", ret);
+
+ out:
+   rmesa->store.primnr = 0;
+   rmesa->store.statenr = 0;
+   rmesa->store.cmd_used = 0;
+   rmesa->dma.nr_released_bufs = 0;
+   rmesa->lost_context = 1;	
+   return ret;
+}
+
+
+/* Note: does not emit any commands to avoid recursion on
+ * radeonAllocCmdBuf.
+ */
+void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
+{
+   int ret;
+
+	      
+   assert (rmesa->dri.drmMinor >= 3);
+
+   LOCK_HARDWARE( rmesa );
+
+   ret = radeonFlushCmdBufLocked( rmesa, caller );
+
+   UNLOCK_HARDWARE( rmesa );
+
+   if (ret) {
+      fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret);
+      exit(ret);
+   }
+}
+
+/* =============================================================
+ * Hardware vertex buffer handling
+ */
+
+
+void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
+{
+   struct radeon_dma_buffer *dmabuf;
+   int fd = rmesa->dri.fd;
+   int index = 0;
+   int size = 0;
+   drmDMAReq dma;
+   int ret;
+
+   if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+      fprintf(stderr, "%s\n", __FUNCTION__);  
+
+   if (rmesa->dma.flush) {
+      rmesa->dma.flush( rmesa );
+   }
+
+   if (rmesa->dma.current.buf)
+      radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+
+   if (rmesa->dma.nr_released_bufs > 4)
+      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+
+   dma.context = rmesa->dri.hwContext;
+   dma.send_count = 0;
+   dma.send_list = NULL;
+   dma.send_sizes = NULL;
+   dma.flags = 0;
+   dma.request_count = 1;
+   dma.request_size = RADEON_BUFFER_SIZE;
+   dma.request_list = &index;
+   dma.request_sizes = &size;
+   dma.granted_count = 0;
+
+   LOCK_HARDWARE(rmesa);	/* no need to validate */
+
+   ret = drmDMA( fd, &dma );
+      
+   if (ret != 0) {
+      /* Free some up this way?
+       */
+      if (rmesa->dma.nr_released_bufs) {
+	 radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+      }
+      
+      if (RADEON_DEBUG & DEBUG_DMA)
+	 fprintf(stderr, "Waiting for buffers\n");
+
+      radeonWaitForIdleLocked( rmesa );
+      ret = drmDMA( fd, &dma );
+
+      if ( ret != 0 ) {
+	 UNLOCK_HARDWARE( rmesa );
+	 fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
+	 exit( -1 );
+      }
+   }
+
+   UNLOCK_HARDWARE(rmesa);
+
+   if (RADEON_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "Allocated buffer %d\n", index);
+
+   dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
+   dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
+   dmabuf->refcount = 1;
+
+   rmesa->dma.current.buf = dmabuf;
+   rmesa->dma.current.address = dmabuf->buf->address;
+   rmesa->dma.current.end = dmabuf->buf->total;
+   rmesa->dma.current.start = 0;
+   rmesa->dma.current.ptr = 0;
+
+   rmesa->c_vertexBuffers++;
+}
+
+void radeonReleaseDmaRegion( radeonContextPtr rmesa,
+			     struct radeon_dma_region *region,
+			     const char *caller )
+{
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
+   
+   if (!region->buf)
+      return;
+
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
+
+   if (--region->buf->refcount == 0) {
+      drmRadeonCmdHeader *cmd;
+
+      if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
+		 region->buf->buf->idx);  
+      
+      cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), 
+						     __FUNCTION__ );
+      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
+      cmd->dma.buf_idx = region->buf->buf->idx;
+      FREE(region->buf);
+      rmesa->dma.nr_released_bufs++;
+   }
+
+   region->buf = 0;
+   region->start = 0;
+}
+
+/* Allocates a region from rmesa->dma.current.  If there isn't enough
+ * space in current, grab a new buffer (and discard what was left of current)
+ */
+void radeonAllocDmaRegion( radeonContextPtr rmesa, 
+			   struct radeon_dma_region *region,
+			   int bytes,
+			   int alignment )
+{
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
+
+   if (region->buf)
+      radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
+
+   alignment--;
+   rmesa->dma.current.start = rmesa->dma.current.ptr = 
+      (rmesa->dma.current.ptr + alignment) & ~alignment;
+
+   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+      radeonRefillCurrentDmaRegion( rmesa );
+
+   region->start = rmesa->dma.current.start;
+   region->ptr = rmesa->dma.current.start;
+   region->end = rmesa->dma.current.start + bytes;
+   region->address = rmesa->dma.current.address;
+   region->buf = rmesa->dma.current.buf;
+   region->buf->refcount++;
+
+   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+   rmesa->dma.current.start = 
+      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
+
+   if ( rmesa->dri.drmMinor < 3 ) 
+      radeonRefillCurrentDmaRegion( rmesa );
+}
+
+void radeonAllocDmaRegionVerts( radeonContextPtr rmesa, 
+				struct radeon_dma_region *region,
+				int numverts,
+				int vertsize,
+				int alignment )
+{
+   radeonAllocDmaRegion( rmesa, region, vertsize * numverts, alignment );
+}
+
+/* ================================================================
+ * SwapBuffers with client-side throttling
+ */
+
+static CARD32 radeonGetLastFrame (radeonContextPtr rmesa) 
+{
+   unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
+   int ret;
+   CARD32 frame;
+
+   if (rmesa->dri.screen->drmMinor >= 4) {
+      drmRadeonGetParam gp;
+
+      gp.param = RADEON_PARAM_LAST_FRAME;
+      gp.value = &frame;
+      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
+				 &gp, sizeof(gp) );
+   } 
+   else
+      ret = -EINVAL;
+
+#ifndef __alpha__
+   if ( ret == -EINVAL ) {
+      frame = INREG( RADEON_LAST_FRAME_REG );
+      ret = 0;
+   } 
+#endif
+   if ( ret ) {
+      fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
+      exit(1);
+   }
+
+   return frame;
+}
+
+static void radeonEmitIrqLocked( radeonContextPtr rmesa )
+{
+   drmRadeonIrqEmit ie;
+   int ret;
+
+   ie.irq_seq = &rmesa->iw.irq_seq;
+   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, 
+			      &ie, sizeof(ie) );
+   if ( ret ) {
+      fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret );
+      exit(1);
+   }
+}
+
+
+static void radeonWaitIrq( radeonContextPtr rmesa )
+{
+   int ret;
+
+   do {
+      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
+			     &rmesa->iw, sizeof(rmesa->iw) );
+   } while (ret && (errno == EINTR || errno == EAGAIN));
+
+   if ( ret ) {
+      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
+      exit(1);
+   }
+}
+
+
+static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
+{
+   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+
+   if (rmesa->do_irqs) {
+      if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
+	 if (!rmesa->irqsEmitted) {
+	    while (radeonGetLastFrame (rmesa) < sarea->last_frame)
+	       ;
+	 }
+	 else {
+	    UNLOCK_HARDWARE( rmesa ); 
+	    radeonWaitIrq( rmesa );	
+	    LOCK_HARDWARE( rmesa ); 
+	 }
+	 rmesa->irqsEmitted = 10;
+      }
+
+      if (rmesa->irqsEmitted) {
+	 radeonEmitIrqLocked( rmesa );
+	 rmesa->irqsEmitted--;
+      }
+   } 
+   else {
+      while (radeonGetLastFrame (rmesa) < sarea->last_frame) {
+	 UNLOCK_HARDWARE( rmesa ); 
+	 if (rmesa->do_usleeps) 
+	    DO_USLEEP( 1 );
+	 LOCK_HARDWARE( rmesa ); 
+      }
+   }
+}
+
+/* Copy the back color buffer to the front color buffer.
+ */
+void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
+{
+   radeonContextPtr rmesa;
+   GLint nbox, i, ret;
+   GLboolean   missed_target;
+   uint64_t     ust;
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, rmesa->glCtx );
+   }
+
+   RADEON_FIREVERTICES( rmesa );
+   LOCK_HARDWARE( rmesa );
+
+   /* Throttle the frame rate -- only allow one pending swap buffers
+    * request at a time.
+    */
+   radeonWaitForFrameCompletion( rmesa );
+   UNLOCK_HARDWARE( rmesa );
+   driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
+   LOCK_HARDWARE( rmesa );
+
+   nbox = dPriv->numClipRects; /* must be in locked region */
+
+   for ( i = 0 ; i < nbox ; ) {
+      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
+      XF86DRIClipRectPtr box = dPriv->pClipRects;
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      GLint n = 0;
+
+      for ( ; i < nr ; i++ ) {
+	 *b++ = box[i];
+	 n++;
+      }
+      rmesa->sarea->nbox = n;
+
+      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
+
+      if ( ret ) {
+	 fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
+	 UNLOCK_HARDWARE( rmesa );
+	 exit( 1 );
+      }
+   }
+
+   UNLOCK_HARDWARE( rmesa );
+   rmesa->swap_count++;
+   (*rmesa->get_ust)( & ust );
+   if ( missed_target ) {
+      rmesa->swap_missed_count++;
+      rmesa->swap_missed_ust = ust - rmesa->swap_ust;
+   }
+
+   rmesa->swap_ust = ust;
+}
+
+void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
+{
+   radeonContextPtr rmesa;
+   GLint ret;
+   GLboolean   missed_target;
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+      fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
+	      rmesa->sarea->pfCurrentPage);
+   }
+
+   RADEON_FIREVERTICES( rmesa );
+   LOCK_HARDWARE( rmesa );
+
+   /* Need to do this for the perf box placement:
+    */
+   if (dPriv->numClipRects)
+   {
+      XF86DRIClipRectPtr box = dPriv->pClipRects;
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      b[0] = box[0];
+      rmesa->sarea->nbox = 1;
+   }
+
+   /* Throttle the frame rate -- only allow a few pending swap buffers
+    * request at a time.
+    */
+   radeonWaitForFrameCompletion( rmesa );
+   UNLOCK_HARDWARE( rmesa );
+   driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
+   if ( missed_target ) {
+      rmesa->swap_missed_count++;
+      (void) (*rmesa->get_ust)( & rmesa->swap_missed_ust );
+   }
+   LOCK_HARDWARE( rmesa );
+
+   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
+
+   UNLOCK_HARDWARE( rmesa );
+
+   if ( ret ) {
+      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
+      exit( 1 );
+   }
+
+   rmesa->swap_count++;
+   (void) (*rmesa->get_ust)( & rmesa->swap_ust );
+
+   if ( rmesa->sarea->pfCurrentPage == 1 ) {
+	 rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+   } else {
+	 rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+   }
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset;
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
+}
+
+
+/* ================================================================
+ * Buffer clear
+ */
+#define RADEON_MAX_CLEARS	256
+
+static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
+			 GLint cx, GLint cy, GLint cw, GLint ch )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+   unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
+   CARD32 clear;
+   GLuint flags = 0;
+   GLuint color_mask = 0;
+   GLint ret, i;
+
+   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+      fprintf( stderr, "%s:  all=%d cx=%d cy=%d cw=%d ch=%d\n",
+	       __FUNCTION__, all, cx, cy, cw, ch );
+   }
+
+   radeonEmitState( rmesa );
+
+   /* Need to cope with lostcontext here as kernel relies on
+    * some residual state:
+    */
+   RADEON_FIREVERTICES( rmesa ); 
+
+   if ( mask & DD_FRONT_LEFT_BIT ) {
+      flags |= RADEON_FRONT;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      mask &= ~DD_FRONT_LEFT_BIT;
+   }
+
+   if ( mask & DD_BACK_LEFT_BIT ) {
+      flags |= RADEON_BACK;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      mask &= ~DD_BACK_LEFT_BIT;
+   }
+
+   if ( mask & DD_DEPTH_BIT ) {
+      if ( ctx->Depth.Mask ) flags |= RADEON_DEPTH; /* FIXME: ??? */
+      mask &= ~DD_DEPTH_BIT;
+   }
+
+   if ( (mask & DD_STENCIL_BIT) && rmesa->state.stencil.hwBuffer ) {
+      flags |= RADEON_STENCIL;
+      mask &= ~DD_STENCIL_BIT;
+   }
+
+   if ( mask ) {
+      if (RADEON_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
+      _swrast_Clear( ctx, mask, all, cx, cy, cw, ch );
+   }
+
+   if ( !flags ) 
+      return;
+
+
+   /* Flip top to bottom */
+   cx += dPriv->x;
+   cy  = dPriv->y + dPriv->h - cy - ch;
+
+   LOCK_HARDWARE( rmesa );
+
+   /* Throttle the number of clear ioctls we do.
+    */
+   while ( 1 ) {
+      int ret;
+
+      if (rmesa->dri.screen->drmMinor >= 4) {
+	drmRadeonGetParam gp;
+
+	gp.param = RADEON_PARAM_LAST_CLEAR;
+	gp.value = &clear;
+	ret = drmCommandWriteRead( rmesa->dri.fd,
+				   DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
+      } else
+	ret = -EINVAL;
+
+#ifndef __alpha__
+      if ( ret == -EINVAL ) {
+	 clear = INREG( RADEON_LAST_CLEAR_REG );
+	 ret = 0;
+      }
+#endif
+      if ( ret ) {
+	 fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
+	 exit(1);
+      }
+      if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+	 fprintf( stderr, "%s( %d )\n", __FUNCTION__, (int)clear );
+	 if ( ret ) fprintf( stderr, " ( RADEON_LAST_CLEAR register read directly )\n" );
+      }
+
+      if ( sarea->last_clear - clear <= RADEON_MAX_CLEARS ) {
+	 break;
+      }
+
+      if ( rmesa->do_usleeps ) {
+	 UNLOCK_HARDWARE( rmesa );
+	 DO_USLEEP( 1 );
+	 LOCK_HARDWARE( rmesa );
+      }
+   }
+
+   for ( i = 0 ; i < dPriv->numClipRects ; ) {
+      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
+      XF86DRIClipRectPtr box = dPriv->pClipRects;
+      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
+      drmRadeonClearType clear;
+      drmRadeonClearRect depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
+      GLint n = 0;
+
+      if ( !all ) {
+	 for ( ; i < nr ; i++ ) {
+	    GLint x = box[i].x1;
+	    GLint y = box[i].y1;
+	    GLint w = box[i].x2 - x;
+	    GLint h = box[i].y2 - y;
+
+	    if ( x < cx ) w -= cx - x, x = cx;
+	    if ( y < cy ) h -= cy - y, y = cy;
+	    if ( x + w > cx + cw ) w = cx + cw - x;
+	    if ( y + h > cy + ch ) h = cy + ch - y;
+	    if ( w <= 0 ) continue;
+	    if ( h <= 0 ) continue;
+
+	    b->x1 = x;
+	    b->y1 = y;
+	    b->x2 = x + w;
+	    b->y2 = y + h;
+	    b++;
+	    n++;
+	 }
+      } else {
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = box[i];
+	    n++;
+	 }
+      }
+
+      rmesa->sarea->nbox = n;
+
+      clear.flags       = flags;
+      clear.clear_color = rmesa->state.color.clear;
+      clear.clear_depth = rmesa->state.depth.clear;
+      clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      clear.depth_mask  = rmesa->state.stencil.clear;
+      clear.depth_boxes = depth_boxes;
+
+      n--;
+      b = rmesa->sarea->boxes;
+      for ( ; n >= 0 ; n-- ) {
+	 depth_boxes[n].f[RADEON_CLEAR_X1] = (float)b[n].x1;
+	 depth_boxes[n].f[RADEON_CLEAR_Y1] = (float)b[n].y1;
+	 depth_boxes[n].f[RADEON_CLEAR_X2] = (float)b[n].x2;
+	 depth_boxes[n].f[RADEON_CLEAR_Y2] = (float)b[n].y2;
+	 depth_boxes[n].f[RADEON_CLEAR_DEPTH] = 
+	    (float)rmesa->state.depth.clear;
+      }
+
+      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
+			     &clear, sizeof(drmRadeonClearType));
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( rmesa );
+	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
+	 exit( 1 );
+      }
+   }
+
+   UNLOCK_HARDWARE( rmesa );
+}
+
+
+void radeonWaitForIdleLocked( radeonContextPtr rmesa )
+{
+    int fd = rmesa->dri.fd;
+    int to = 0;
+    int ret, i = 0;
+
+    rmesa->c_drawWaits++;
+
+    do {
+        do {
+            ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE);
+        } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY );
+    } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) );
+
+    if ( ret < 0 ) {
+	UNLOCK_HARDWARE( rmesa );
+	fprintf( stderr, "Error: Radeon timed out... exiting\n" );
+	exit( -1 );
+    }
+}
+
+
+static void radeonWaitForIdle( radeonContextPtr rmesa )
+{
+   LOCK_HARDWARE(rmesa);
+   radeonWaitForIdleLocked( rmesa );
+   UNLOCK_HARDWARE(rmesa);
+}
+
+
+void radeonFlush( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
+
+   if (rmesa->dri.drmMinor >= 3) {
+      if (!is_empty_list(&rmesa->hw.dirty)) 
+	 radeonEmitState( rmesa );
+   
+      if (rmesa->store.cmd_used)
+	 radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+   }
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+void radeonFinish( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonFlush( ctx );
+
+   if (rmesa->do_irqs) {
+      LOCK_HARDWARE( rmesa );
+      radeonEmitIrqLocked( rmesa );
+      UNLOCK_HARDWARE( rmesa );
+      radeonWaitIrq( rmesa );
+   }
+   else
+      radeonWaitForIdle( rmesa );
+}
+
+
+void radeonInitIoctlFuncs( GLcontext *ctx )
+{
+    ctx->Driver.Clear = radeonClear;
+    ctx->Driver.Finish = radeonFinish;
+    ctx->Driver.Flush = radeonFlush;
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
new file mode 100644
index 0000000000..3f6e1751cf
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
@@ -0,0 +1,188 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h,v 1.6 2002/12/16 16:18:58 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifndef __RADEON_IOCTL_H__
+#define __RADEON_IOCTL_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "simple_list.h"
+#include "radeon_lock.h"
+
+
+extern void radeonEmitState( radeonContextPtr rmesa );
+extern void radeonEmitVertexAOS( radeonContextPtr rmesa,
+				 GLuint vertex_size,
+				 GLuint offset );
+
+extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
+				GLuint vertex_format,
+				GLuint primitive,
+				GLuint vertex_nr );
+
+extern void radeonFlushElts( radeonContextPtr rmesa );
+
+extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
+					   GLuint vertex_format,
+					   GLuint primitive,
+					   GLuint min_nr );
+
+extern void radeonEmitAOS( radeonContextPtr rmesa,
+			   struct radeon_dma_region **regions,
+			   GLuint n,
+			   GLuint offset );
+
+extern void radeonEmitBlit( radeonContextPtr rmesa,
+			    GLuint color_fmt,
+			    GLuint src_pitch,
+			    GLuint src_offset,
+			    GLuint dst_pitch,
+			    GLuint dst_offset,
+			    GLint srcx, GLint srcy,
+			    GLint dstx, GLint dsty,
+			    GLuint w, GLuint h );
+
+extern void radeonEmitWait( radeonContextPtr rmesa, GLuint flags );
+
+extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * );
+extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa );
+
+extern void radeonAllocDmaRegion( radeonContextPtr rmesa,
+				  struct radeon_dma_region *region,
+				  int bytes, 
+				  int alignment );
+
+extern void radeonAllocDmaRegionVerts( radeonContextPtr rmesa,
+				       struct radeon_dma_region *region,
+				       int numverts,
+				       int vertsize, 
+				       int alignment );
+
+extern void radeonReleaseDmaRegion( radeonContextPtr rmesa,
+				    struct radeon_dma_region *region,
+				    const char *caller );
+
+extern void radeonCopyBuffer( const __DRIdrawablePrivate *drawable );
+extern void radeonPageFlip( const __DRIdrawablePrivate *drawable );
+extern void radeonFlush( GLcontext *ctx );
+extern void radeonFinish( GLcontext *ctx );
+extern void radeonWaitForIdleLocked( radeonContextPtr rmesa );
+extern void radeonWaitForVBlank( radeonContextPtr rmesa );
+extern void radeonInitIoctlFuncs( GLcontext *ctx );
+extern void radeonGetAllParams( radeonContextPtr rmesa );
+
+/* radeon_compat.c:
+ */
+extern void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
+				       GLuint vertex_format,
+				       GLuint hw_primitive,
+				       GLuint nrverts );
+
+
+/* ================================================================
+ * Helper macros:
+ */
+
+/* Close off the last primitive, if it exists.
+ */
+#define RADEON_NEWPRIM( rmesa )			\
+do {						\
+   if ( rmesa->dma.flush )			\
+      rmesa->dma.flush( rmesa );	\
+} while (0)
+
+/* Can accomodate several state changes and primitive changes without
+ * actually firing the buffer.
+ */
+#define RADEON_STATECHANGE( rmesa, ATOM )			\
+do {								\
+   RADEON_NEWPRIM( rmesa );					\
+   move_to_head( &(rmesa->hw.dirty), &(rmesa->hw.ATOM));	\
+} while (0)
+
+#define RADEON_DB_STATE( ATOM )			        \
+   memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd,	\
+	   rmesa->hw.ATOM.cmd_size * 4)
+
+static __inline int RADEON_DB_STATECHANGE( 
+   radeonContextPtr rmesa,
+   struct radeon_state_atom *atom )
+{
+   if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+      int *tmp;
+      RADEON_NEWPRIM( rmesa );
+      move_to_head( &(rmesa->hw.dirty), atom );
+      tmp = atom->cmd; 
+      atom->cmd = atom->lastcmd;
+      atom->lastcmd = tmp;
+      return 1;
+   }
+   else
+      return 0;
+}
+
+
+/* Fire the buffered vertices no matter what.
+ */
+#define RADEON_FIREVERTICES( rmesa )			\
+do {							\
+   if ( rmesa->store.cmd_used || rmesa->dma.flush ) {	\
+      radeonFlush( rmesa->glCtx );			\
+   }							\
+} while (0)
+
+/* Alloc space in the command buffer
+ */
+static __inline char *radeonAllocCmdBuf( radeonContextPtr rmesa,
+					 int bytes, const char *where )
+{
+   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
+      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+   
+   assert(rmesa->dri.drmMinor >= 3);
+
+   {
+      char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+      rmesa->store.cmd_used += bytes;
+      return head;
+   }
+}
+
+
+
+
+#endif
+#endif /* __RADEON_IOCTL_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_lighting.c b/src/mesa/drivers/dri/radeon/radeon_lighting.c
new file mode 100644
index 0000000000..b00c9cb6de
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_lighting.c
@@ -0,0 +1,682 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state.c,v 1.5 2002/09/16 18:05:20 eich Exp $ */
+/*
+ * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "api_arrayelt.h"
+/* #include "mmath.h" */
+#include "enums.h"
+#include "colormac.h"
+
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_tcl.h"
+#include "radeon_tex.h"
+#include "radeon_vtxfmt.h"
+
+
+
+/* =============================================================
+ * Materials
+ */
+
+
+/* Update on colormaterial, material emmissive/ambient, 
+ * lightmodel.globalambient
+ */
+void update_global_ambient( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   float *fcmd = (float *)RADEON_DB_STATE( glt );
+
+   /* Need to do more if both emmissive & ambient are PREMULT:
+    */
+   if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &
+       ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+	(3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) 
+   {
+      COPY_3V( &fcmd[GLT_RED], 
+	       ctx->Light.Material[0].Emission);
+      ACC_SCALE_3V( &fcmd[GLT_RED],
+		   ctx->Light.Model.Ambient,
+		   ctx->Light.Material[0].Ambient);
+   } 
+   else
+   {
+      COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
+   }
+   
+   RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
+}
+
+/* Update on change to 
+ *    - light[p].colors
+ *    - light[p].enabled
+ *    - material,
+ *    - colormaterial enabled
+ *    - colormaterial bitmask
+ */
+void update_light_colors( GLcontext *ctx, GLuint p )
+{
+   struct gl_light *l = &ctx->Light.Light[p];
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   if (l->Enabled) {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
+      GLuint bitmask = ctx->Light.ColorMaterialBitmask;
+      struct gl_material *mat = &ctx->Light.Material[0];
+
+      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );	 
+      COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
+      COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
+      
+      if (!ctx->Light.ColorMaterialEnabled)
+	 bitmask = 0;
+
+      if ((bitmask & FRONT_AMBIENT_BIT) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_AMBIENT_RED], mat->Ambient );
+
+      if ((bitmask & FRONT_DIFFUSE_BIT) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_DIFFUSE_RED], mat->Diffuse );
+      
+      if ((bitmask & FRONT_SPECULAR_BIT) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_SPECULAR_RED], mat->Specular );
+
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+   }
+}
+
+/* Also fallback for asym colormaterial mode in twoside lighting...
+ */
+void check_twoside_fallback( GLcontext *ctx )
+{
+   GLboolean fallback = GL_FALSE;
+
+   if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+      if (memcmp( &ctx->Light.Material[0],
+		  &ctx->Light.Material[1],
+		  sizeof(struct gl_material)) != 0)
+	 fallback = GL_TRUE;  
+      else if (ctx->Light.ColorMaterialEnabled &&
+	       (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != 
+	       ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1))
+	 fallback = GL_TRUE;
+   }
+
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_LIGHT_TWOSIDE, fallback );
+}
+
+void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   if (ctx->Light.ColorMaterialEnabled) {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      GLuint light_model_ctl = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+      GLuint mask = ctx->Light.ColorMaterialBitmask;
+
+      /* Default to PREMULT:
+       */
+      light_model_ctl &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+			   (3 << RADEON_AMBIENT_SOURCE_SHIFT) |
+			   (3 << RADEON_DIFFUSE_SOURCE_SHIFT) |
+			   (3 << RADEON_SPECULAR_SOURCE_SHIFT)); 
+   
+      if (mask & FRONT_EMISSION_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_EMISSIVE_SOURCE_SHIFT);
+      }
+
+      if (mask & FRONT_AMBIENT_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_AMBIENT_SOURCE_SHIFT);
+      }
+	 
+      if (mask & FRONT_DIFFUSE_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_DIFFUSE_SOURCE_SHIFT);
+      }
+   
+      if (mask & FRONT_SPECULAR_BIT) {
+	 light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_SPECULAR_SOURCE_SHIFT);
+      }
+   
+      if (light_model_ctl != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) {
+	 GLuint p;
+
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl;      
+
+	 for (p = 0 ; p < MAX_LIGHTS; p++) 
+	    update_light_colors( ctx, p );
+	 update_global_ambient( ctx );
+      }
+   }
+   
+   check_twoside_fallback( ctx );
+}
+
+void radeonUpdateMaterial( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
+   GLuint p;
+   GLuint mask = ~0;
+   
+   if (ctx->Light.ColorMaterialEnabled)
+      mask &= ~ctx->Light.ColorMaterialBitmask;
+
+   if (RADEON_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+      
+   if (mask & FRONT_EMISSION_BIT) {
+      fcmd[MTL_EMMISSIVE_RED]   = ctx->Light.Material[0].Emission[0];
+      fcmd[MTL_EMMISSIVE_GREEN] = ctx->Light.Material[0].Emission[1];
+      fcmd[MTL_EMMISSIVE_BLUE]  = ctx->Light.Material[0].Emission[2];
+      fcmd[MTL_EMMISSIVE_ALPHA] = ctx->Light.Material[0].Emission[3];
+   }
+   if (mask & FRONT_AMBIENT_BIT) {
+      fcmd[MTL_AMBIENT_RED]     = ctx->Light.Material[0].Ambient[0];
+      fcmd[MTL_AMBIENT_GREEN]   = ctx->Light.Material[0].Ambient[1];
+      fcmd[MTL_AMBIENT_BLUE]    = ctx->Light.Material[0].Ambient[2];
+      fcmd[MTL_AMBIENT_ALPHA]   = ctx->Light.Material[0].Ambient[3];
+   }
+   if (mask & FRONT_DIFFUSE_BIT) {
+      fcmd[MTL_DIFFUSE_RED]     = ctx->Light.Material[0].Diffuse[0];
+      fcmd[MTL_DIFFUSE_GREEN]   = ctx->Light.Material[0].Diffuse[1];
+      fcmd[MTL_DIFFUSE_BLUE]    = ctx->Light.Material[0].Diffuse[2];
+      fcmd[MTL_DIFFUSE_ALPHA]   = ctx->Light.Material[0].Diffuse[3];
+   }
+   if (mask & FRONT_SPECULAR_BIT) {
+      fcmd[MTL_SPECULAR_RED]    = ctx->Light.Material[0].Specular[0];
+      fcmd[MTL_SPECULAR_GREEN]  = ctx->Light.Material[0].Specular[1];
+      fcmd[MTL_SPECULAR_BLUE]   = ctx->Light.Material[0].Specular[2];
+      fcmd[MTL_SPECULAR_ALPHA]  = ctx->Light.Material[0].Specular[3];
+   }
+   if (mask & FRONT_SHININESS_BIT) {
+      fcmd[MTL_SHININESS]       = ctx->Light.Material[0].Shininess;
+   }
+
+   if (RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mtl )) {
+      for (p = 0 ; p < MAX_LIGHTS; p++) 
+	 update_light_colors( ctx, p );
+
+      check_twoside_fallback( ctx );
+      update_global_ambient( ctx );
+   }
+   else if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_STATE))
+      fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__);
+}
+
+/* _NEW_LIGHT
+ * _NEW_MODELVIEW
+ * _MESA_NEW_NEED_EYE_COORDS
+ *
+ * Uses derived state from mesa:
+ *       _VP_inf_norm
+ *       _h_inf_norm
+ *       _Position
+ *       _NormDirection
+ *       _ModelViewInvScale
+ *       _NeedEyeCoords
+ *       _EyeZDir
+ *
+ * which are calculated in light.c and are correct for the current
+ * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
+ * and _MESA_NEW_NEED_EYE_COORDS.  
+ */
+void radeonUpdateLighting( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   /* Have to check these, or have an automatic shortcircuit mechanism
+    * to remove noop statechanges. (Or just do a better job on the
+    * front end).
+    */
+   {
+      GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+
+      if (ctx->_NeedEyeCoords)
+	 tmp &= ~RADEON_LIGHT_IN_MODELSPACE;
+      else
+	 tmp |= RADEON_LIGHT_IN_MODELSPACE;
+      
+
+      /* Leave this test disabled: (unexplained q3 lockup) (even with
+         new packets)
+      */
+      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) 
+      {
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp;
+      }
+   }
+
+   {
+      GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( eye );
+      fcmd[EYE_X] = ctx->_EyeZDir[0];
+      fcmd[EYE_Y] = ctx->_EyeZDir[1];
+      fcmd[EYE_Z] = - ctx->_EyeZDir[2];
+      fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale;
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.eye );
+   }
+
+
+/*     RADEON_STATECHANGE( rmesa, glt ); */
+
+   if (ctx->Light.Enabled) {
+      GLint p;
+      for (p = 0 ; p < MAX_LIGHTS; p++) {
+	 if (ctx->Light.Light[p].Enabled) {
+	    struct gl_light *l = &ctx->Light.Light[p];
+	    GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] );
+	    
+	    if (l->EyePosition[3] == 0.0) {
+	       COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); 
+	       COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); 
+	       fcmd[LIT_POSITION_W] = 0;
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    } else {
+	       COPY_4V( &fcmd[LIT_POSITION_X], l->_Position );
+	       fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0];
+	       fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1];
+	       fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2];
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    }
+
+	    RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+	 }
+      }
+   }
+}
+
+
+void radeonLightfv( GLcontext *ctx, GLenum light,
+		    GLenum pname, const GLfloat *params )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLint p = light - GL_LIGHT0;
+   struct gl_light *l = &ctx->Light.Light[p];
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+   
+
+   switch (pname) {
+   case GL_AMBIENT:		
+   case GL_DIFFUSE:
+   case GL_SPECULAR:
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_SPOT_DIRECTION: 
+      /* picked up in update_light */	
+      break;
+
+   case GL_POSITION: {
+      /* positions picked up in update_light, but can do flag here */	
+      GLuint flag = (p&1)? RADEON_LIGHT_1_IS_LOCAL : RADEON_LIGHT_0_IS_LOCAL;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->EyePosition[3] != 0.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+
+   case GL_SPOT_EXPONENT:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_EXPONENT] = params[0];
+      break;
+
+   case GL_SPOT_CUTOFF: {
+      GLuint flag = (p&1) ? RADEON_LIGHT_1_IS_SPOT : RADEON_LIGHT_0_IS_SPOT;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff;
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->SpotCutoff != 180.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+
+   case GL_CONSTANT_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_CONST] = params[0];
+      break;
+   case GL_LINEAR_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_LINEAR] = params[0];
+      break;
+   case GL_QUADRATIC_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_QUADRATIC] = params[0];
+      break;
+   default:
+      return;
+   }
+
+}
+
+		  
+
+
+void radeonLightModelfv( GLcontext *ctx, GLenum pname,
+			 const GLfloat *param )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   switch (pname) {
+      case GL_LIGHT_MODEL_AMBIENT: 
+	 update_global_ambient( ctx );
+	 break;
+
+      case GL_LIGHT_MODEL_LOCAL_VIEWER:
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.LocalViewer)
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LOCAL_VIEWER;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LOCAL_VIEWER;
+         break;
+
+      case GL_LIGHT_MODEL_TWO_SIDE:
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.TwoSide)
+	    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_LIGHT_TWOSIDE;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_LIGHT_TWOSIDE;
+
+	 check_twoside_fallback( ctx );
+
+#if _HAVE_SWTNL
+	 if (rmesa->TclFallback) {
+	    radeonChooseRenderState( ctx );
+	    radeonChooseVertexState( ctx );
+	 }
+#endif
+         break;
+
+      case GL_LIGHT_MODEL_COLOR_CONTROL:
+	 radeonUpdateSpecular(ctx);
+
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) 
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= 
+	       ~RADEON_DIFFUSE_SPECULAR_COMBINE;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= 
+	       RADEON_DIFFUSE_SPECULAR_COMBINE;
+         break;
+
+      default:
+         break;
+   }
+}
+
+
+/* =============================================================
+ * Fog
+ */
+
+
+static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   union { int i; float f; } c, d;
+   GLchan col[4];
+
+   c.i = rmesa->hw.fog.cmd[FOG_C];
+   d.i = rmesa->hw.fog.cmd[FOG_D];
+
+   switch (pname) {
+   case GL_FOG_MODE:
+      if (!ctx->Fog.Enabled)
+	 return;
+      RADEON_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
+      switch (ctx->Fog.Mode) {
+      case GL_LINEAR:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_LINEAR;
+	 if (ctx->Fog.Start == ctx->Fog.End) {
+	    c.f = 1.0F;
+	    d.f = 1.0F;
+	 }
+	 else {
+	    c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+	    d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start);
+	 }
+	 break;
+      case GL_EXP:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP;
+	 c.f = 0.0;
+	 d.f = ctx->Fog.Density;
+	 break;
+      case GL_EXP2:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP2;
+	 c.f = 0.0;
+	 d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+	 break;
+      default:
+	 return;
+      }
+      break;
+   case GL_FOG_DENSITY:
+      switch (ctx->Fog.Mode) {
+      case GL_EXP:
+	 c.f = 0.0;
+	 d.f = ctx->Fog.Density;
+	 break;
+      case GL_EXP2:
+	 c.f = 0.0;
+	 d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+	 break;
+      default:
+	 break;
+      }
+      break;
+   case GL_FOG_START:
+   case GL_FOG_END:
+      if (ctx->Fog.Mode == GL_LINEAR) {
+	 if (ctx->Fog.Start == ctx->Fog.End) {
+	    c.f = 1.0F;
+	    d.f = 1.0F;
+	 } else {
+	    c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+	    d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start);
+	 }
+      }
+      break;
+   case GL_FOG_COLOR: 
+      RADEON_STATECHANGE( rmesa, ctx );
+      UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] =
+	 radeonPackColor( 4, col[0], col[1], col[2], 0 );
+      break;
+   case GL_FOG_COORDINATE_SOURCE_EXT: 
+      /* What to do?
+       */
+      break;
+   default:
+      return;
+   }
+
+   if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
+      RADEON_STATECHANGE( rmesa, fog );
+      rmesa->hw.fog.cmd[FOG_C] = c.i;
+      rmesa->hw.fog.cmd[FOG_D] = d.i;
+   }
+}
+
+/* Examine lighting and texture state to determine if separate specular
+ * should be enabled.
+ */
+void radeonUpdateSpecular( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+
+   if ( ctx->_TriangleCaps & DD_SEPARATE_SPECULAR ) {
+      p |=  RADEON_SPECULAR_ENABLE;
+   } else {
+      p &= ~RADEON_SPECULAR_ENABLE;
+   }
+
+   if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p;
+   }
+
+   /* Bizzare: have to leave lighting enabled to get fog.
+    */
+   RADEON_STATECHANGE( rmesa, tcl );
+   if ((ctx->Light.Enabled &&
+	ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR)) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+   }
+   else if (ctx->Fog.Enabled) {
+      if (ctx->Light.Enabled) {
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      } else {
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      }
+   }
+   else if (ctx->Light.Enabled) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+   } else if (ctx->Fog.ColorSumEnabled ) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE;
+   }
+
+#if _HAVE_SWTNL
+   /* Update vertex/render formats
+    */
+   if (rmesa->TclFallback) { 
+      radeonChooseRenderState( ctx );
+      radeonChooseVertexState( ctx );
+   }
+#endif
+}
+
+
+
+static void radeonLightingSpaceChange( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLboolean tmp;
+   RADEON_STATECHANGE( rmesa, tcl );
+
+   if (RADEON_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, ctx->_NeedEyeCoords);
+
+   if (ctx->_NeedEyeCoords)
+      tmp = ctx->Transform.RescaleNormals;
+   else
+      tmp = !ctx->Transform.RescaleNormals;
+
+   if ( tmp ) {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_RESCALE_NORMALS;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
+   }
+}
+
+void radeonInitLightStateFuncs( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int i;
+
+   ctx->Driver.LightModelfv		= radeonLightModelfv; 
+   ctx->Driver.Lightfv			= radeonLightfv; 
+   ctx->Driver.Fogfv			= radeonFogfv;
+   ctx->Driver.LightingSpaceChange      = radeonLightingSpaceChange;
+
+   for (i = 0 ; i < 8; i++) {
+      struct gl_light *l = &ctx->Light.Light[i];
+      GLenum p = GL_LIGHT0 + i;
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX;
+
+      ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient );
+      ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse );
+      ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular );
+      ctx->Driver.Lightfv( ctx, p, GL_POSITION, 0 );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, 0 );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff );
+      ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION,
+			   &l->ConstantAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION, 
+			   &l->LinearAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION, 
+		     &l->QuadraticAttenuation );
+   }
+
+   ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT, 
+			     ctx->Light.Model.Ambient );
+
+   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 );
+   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, 0 );
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
new file mode 100644
index 0000000000..96a4f9d112
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
@@ -0,0 +1,128 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_lock.c,v 1.5 2002/10/30 12:51:55 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+
+#include "glheader.h"
+#include "radeon_context.h"
+#include "radeon_lock.h"
+#include "radeon_tex.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+
+#if DEBUG_LOCKING
+char *prevLockFile = NULL;
+int prevLockLine = 0;
+#endif
+
+/* Turn on/off page flipping according to the flags in the sarea:
+ */
+static void
+radeonUpdatePageFlipping( radeonContextPtr rmesa )
+{
+   int use_back;
+
+   if (rmesa->dri.drmMinor < 3)
+      return;
+
+   rmesa->doPageFlip = rmesa->sarea->pfAllowPageFlip;
+
+   use_back = (rmesa->glCtx->Color._DrawDestMask == BACK_LEFT_BIT);
+   use_back ^= (rmesa->sarea->pfCurrentPage == 1);
+
+   if ( RADEON_DEBUG & DEBUG_VERBOSE )
+      fprintf(stderr, "%s allow %d current %d\n", __FUNCTION__, 
+	      rmesa->doPageFlip,
+	      rmesa->sarea->pfCurrentPage );
+
+   if ( use_back ) {
+	 rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+   } else {
+	 rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+   }
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset;
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
+}
+
+
+
+/* Update the hardware state.  This is called if another context has
+ * grabbed the hardware lock, which includes the X server.  This
+ * function also updates the driver's window state after the X server
+ * moves, resizes or restacks a window -- the change will be reflected
+ * in the drawable position and clip rects.  Since the X server grabs
+ * the hardware lock when it changes the window state, this routine will
+ * automatically be called after such a change.
+ */
+void radeonGetLock( radeonContextPtr rmesa, GLuint flags )
+{
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   __DRIscreenPrivate *sPriv = rmesa->dri.screen;
+   RADEONSAREAPrivPtr sarea = rmesa->sarea;
+
+   drmGetLock( rmesa->dri.fd, rmesa->dri.hwContext, flags );
+
+   /* The window might have moved, so we might need to get new clip
+    * rects.
+    *
+    * NOTE: This releases and regrabs the hw lock to allow the X server
+    * to respond to the DRI protocol request for new drawable info.
+    * Since the hardware state depends on having the latest drawable
+    * clip rects, all state checking must be done _after_ this call.
+    */
+   DRI_VALIDATE_DRAWABLE_INFO( sPriv, dPriv );
+
+   if ( rmesa->lastStamp != dPriv->lastStamp ) {
+      radeonUpdatePageFlipping( rmesa );
+      if (rmesa->glCtx->Color._DrawDestMask == BACK_LEFT_BIT)
+         radeonSetCliprects( rmesa, GL_BACK_LEFT );
+      else
+         radeonSetCliprects( rmesa, GL_FRONT_LEFT );
+      radeonUpdateViewportOffset( rmesa->glCtx );
+      rmesa->lastStamp = dPriv->lastStamp;
+   }
+
+   if ( sarea->ctxOwner != rmesa->dri.hwContext ) {
+      int i;
+      sarea->ctxOwner = rmesa->dri.hwContext;
+
+      for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+	 DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] );
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h
new file mode 100644
index 0000000000..783db7e92a
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.h
@@ -0,0 +1,113 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_lock.h,v 1.3 2002/10/30 12:51:55 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifndef __RADEON_LOCK_H__
+#define __RADEON_LOCK_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+extern void radeonGetLock( radeonContextPtr rmesa, GLuint flags );
+
+/* Turn DEBUG_LOCKING on to find locking conflicts.
+ */
+#define DEBUG_LOCKING	0
+
+#if DEBUG_LOCKING
+extern char *prevLockFile;
+extern int prevLockLine;
+
+#define DEBUG_LOCK()							\
+   do {									\
+      prevLockFile = (__FILE__);					\
+      prevLockLine = (__LINE__);					\
+   } while (0)
+
+#define DEBUG_RESET()							\
+   do {									\
+      prevLockFile = 0;							\
+      prevLockLine = 0;							\
+   } while (0)
+
+#define DEBUG_CHECK_LOCK()						\
+   do {									\
+      if ( prevLockFile ) {						\
+	 fprintf( stderr,						\
+		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
+		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
+	 exit( 1 );							\
+      }									\
+   } while (0)
+
+#else
+
+#define DEBUG_LOCK()
+#define DEBUG_RESET()
+#define DEBUG_CHECK_LOCK()
+
+#endif
+
+/*
+ * !!! We may want to separate locks from locks with validation.  This
+ * could be used to improve performance for those things commands that
+ * do not do any drawing !!!
+ */
+
+
+/* Lock the hardware and validate our state.
+ */
+#define LOCK_HARDWARE( rmesa )					\
+   do {								\
+      char __ret = 0;						\
+      DEBUG_CHECK_LOCK();					\
+      DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext,		\
+	       (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret );	\
+      if ( __ret )						\
+	 radeonGetLock( rmesa, 0 );				\
+      DEBUG_LOCK();						\
+   } while (0)
+
+/* Unlock the hardware.
+ */
+#define UNLOCK_HARDWARE( rmesa )					\
+   do {									\
+      DRM_UNLOCK( rmesa->dri.fd,					\
+		  rmesa->dri.hwLock,					\
+		  rmesa->dri.hwContext );				\
+      DEBUG_RESET();							\
+   } while (0)
+
+#endif
+#endif /* __RADEON_LOCK_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos.c b/src/mesa/drivers/dri/radeon/radeon_maos.c
new file mode 100644
index 0000000000..c62edd715c
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_maos.c
@@ -0,0 +1,12 @@
+
+
+/* If using new packets, can choose either verts or arrays.
+ * Otherwise, must use verts.
+ */
+#include "radeon_context.h"
+#define RADEON_MAOS_VERTS 1
+#if (RADEON_MAOS_VERTS) || (RADEON_OLD_PACKETS)
+#include "radeon_maos_verts.c"
+#else
+#include "radeon_maos_arrays.c"
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos.h b/src/mesa/drivers/dri/radeon/radeon_maos.h
new file mode 100644
index 0000000000..7e2bd643d3
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_maos.h
@@ -0,0 +1,47 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Grahpics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __RADEON_MAOS_H__
+#define __RADEON_MAOS_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "radeon_context.h"
+
+extern void radeonEmitArrays( GLcontext *ctx, GLuint inputs );
+extern void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs );
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
new file mode 100644
index 0000000000..cec05a89d7
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
@@ -0,0 +1,591 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "mtypes.h"
+#include "mmath.h"
+#include "macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_imm_debug.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+
+/* Usage:
+ *   - from radeon_tcl_render
+ *   - call radeonEmitArrays to ensure uptodate arrays in dma
+ *   - emit primitives (new type?) which reference the data
+ *       -- need to use elts for lineloop, quads, quadstrip/flat
+ *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
+ *
+ */
+static void emit_ubyte_rgba3( GLcontext *ctx,
+		       struct radeon_dma_region *rvb,
+		       char *data,
+		       int stride,
+		       int count )
+{
+   int i;
+   radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d out %p\n",
+	      __FUNCTION__, count, stride, out);
+
+   for (i = 0; i < count; i++) {
+      out->red   = *data;
+      out->green = *(data+1);
+      out->blue  = *(data+2);
+      out->alpha = 0xFF;
+      out++;
+      data += stride;
+   }
+}
+
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr )					\
+do {									\
+	int __tmp;							\
+	__asm__ __volatile__( "rep ; movsl"				\
+			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
+			      : "0" (nr),				\
+			        "D" ((long)dst),			\
+			        "S" ((long)src) );			\
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr )		\
+do {						\
+   int j;					\
+   for ( j = 0 ; j < nr ; j++ )			\
+      dst[j] = ((int *)src)[j];			\
+   dst += nr;					\
+} while (0)
+#endif
+
+
+
+static void emit_ubyte_rgba4( GLcontext *ctx,
+			      struct radeon_dma_region *rvb,
+			      char *data,
+			      int stride,
+			      int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   if (stride == 4)
+       COPY_DWORDS( out, data, count );
+   else
+      for (i = 0; i < count; i++) {
+	 *out++ = LE32_TO_CPU(*(int *)data);
+	 data += stride;
+      }
+}
+
+
+static void emit_ubyte_rgba( GLcontext *ctx,
+			     struct radeon_dma_region *rvb,
+			     char *data,
+			     int size,
+			     int stride,
+			     int count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+
+   assert (!rvb->buf);
+
+   if (stride == 0) {
+      radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
+      count = 1;
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 0;
+      rvb->aos_size = 1;
+   }
+   else {
+      radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 );	/* alignment? */
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 1;
+      rvb->aos_size = 1;
+   }
+
+   /* Emit the data
+    */
+   switch (size) {
+   case 3:
+      emit_ubyte_rgba3( ctx, rvb, data, stride, count );
+      break;
+   case 4:
+      emit_ubyte_rgba4( ctx, rvb, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+}
+
+
+
+
+static void emit_vec8( GLcontext *ctx,
+		       struct radeon_dma_region *rvb,
+		       char *data,
+		       int stride,
+		       int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   if (stride == 8)
+      COPY_DWORDS( out, data, count*2 );
+   else
+      for (i = 0; i < count; i++) {
+	 out[0] = *(int *)data;
+	 out[1] = *(int *)(data+4);
+	 out += 2;
+	 data += stride;
+      }
+}
+
+static void emit_vec12( GLcontext *ctx,
+		       struct radeon_dma_region *rvb,
+		       char *data,
+		       int stride,
+		       int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+	      __FUNCTION__, count, stride, out, data);
+
+   if (stride == 12)
+      COPY_DWORDS( out, data, count*3 );
+   else
+      for (i = 0; i < count; i++) {
+	 out[0] = *(int *)data;
+	 out[1] = *(int *)(data+4);
+	 out[2] = *(int *)(data+8);
+	 out += 3;
+	 data += stride;
+      }
+}
+
+static void emit_vec16( GLcontext *ctx,
+			struct radeon_dma_region *rvb,
+			char *data,
+			int stride,
+			int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   if (stride == 16)
+      COPY_DWORDS( out, data, count*4 );
+   else
+      for (i = 0; i < count; i++) {
+	 out[0] = *(int *)data;
+	 out[1] = *(int *)(data+4);
+	 out[2] = *(int *)(data+8);
+	 out[3] = *(int *)(data+12);
+	 out += 4;
+	 data += stride;
+      }
+}
+
+
+static void emit_vector( GLcontext *ctx,
+			 struct radeon_dma_region *rvb,
+			 char *data,
+			 int size,
+			 int stride,
+			 int count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d size %d stride %d\n",
+	      __FUNCTION__, count, size, stride);
+
+   assert (!rvb->buf);
+
+   if (stride == 0) {
+      radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
+      count = 1;
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 0;
+      rvb->aos_size = size;
+   }
+   else {
+      radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 );	/* alignment? */
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = size;
+      rvb->aos_size = size;
+   }
+
+   /* Emit the data
+    */
+   switch (size) {
+   case 2:
+      emit_vec8( ctx, rvb, data, stride, count );
+      break;
+   case 3:
+      emit_vec12( ctx, rvb, data, stride, count );
+      break;
+   case 4:
+      emit_vec16( ctx, rvb, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+
+}
+
+
+
+static void emit_s0_vec( GLcontext *ctx,
+			 struct radeon_dma_region *rvb,
+			 char *data,
+			 int stride,
+			 int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   for (i = 0; i < count; i++) {
+      out[0] = *(int *)data;
+      out[1] = 0;
+      out += 2;
+      data += stride;
+   }
+}
+
+static void emit_stq_vec( GLcontext *ctx,
+			 struct radeon_dma_region *rvb,
+			 char *data,
+			 int stride,
+			 int count )
+{
+   int i;
+   int *out = (int *)(rvb->address + rvb->start);
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   for (i = 0; i < count; i++) {
+      out[0] = *(int *)data;
+      out[1] = *(int *)(data+4);
+      out[2] = *(int *)(data+12);
+      out += 3;
+      data += stride;
+   }
+}
+
+
+
+
+static void emit_tex_vector( GLcontext *ctx,
+			     struct radeon_dma_region *rvb,
+			     char *data,
+			     int size,
+			     int stride,
+			     int count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int emitsize;
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+
+   assert (!rvb->buf);
+
+   switch (size) {
+   case 4: emitsize = 3; break;
+   default: emitsize = 2; break;
+   }
+
+
+   if (stride == 0) {
+      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
+      count = 1;
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = 0;
+      rvb->aos_size = emitsize;
+   }
+   else {
+      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
+      rvb->aos_start = GET_START(rvb);
+      rvb->aos_stride = emitsize;
+      rvb->aos_size = emitsize;
+   }
+
+
+   /* Emit the data
+    */
+   switch (size) {
+   case 1:
+      emit_s0_vec( ctx, rvb, data, stride, count ); 
+      break;
+   case 2:
+      emit_vec8( ctx, rvb, data, stride, count );
+      break;
+   case 3:
+      emit_vec8( ctx, rvb, data, stride, count );
+      break;
+   case 4:
+      emit_stq_vec( ctx, rvb, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+}
+
+
+
+
+/* Emit any changed arrays to new agp memory, re-emit a packet to
+ * update the arrays.  
+ */
+void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+   struct radeon_dma_region **component = rmesa->tcl.aos_components;
+   GLuint nr = 0;
+   GLuint vfmt = 0;
+   GLuint count = VB->Count;
+   GLuint vtx;
+   
+   if (RADEON_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, inputs );
+
+   if (1) {
+      if (!rmesa->tcl.obj.buf) 
+	 emit_vector( ctx, 
+		      &rmesa->tcl.obj, 
+		      (char *)VB->ObjPtr->data,
+		      VB->ObjPtr->size,
+		      VB->ObjPtr->stride,
+		      count);
+
+      switch( VB->ObjPtr->size ) {
+      case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
+      case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
+      case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
+      default:
+      }
+      component[nr++] = &rmesa->tcl.obj;
+   }
+   
+
+   if (inputs & VERT_BIT_NORMAL) {
+      if (!rmesa->tcl.norm.buf)
+	 emit_vector( ctx, 
+		      &(rmesa->tcl.norm), 
+		      (char *)VB->NormalPtr->data,
+		      3,
+		      VB->NormalPtr->stride,
+		      count);
+
+      vfmt |= RADEON_CP_VC_FRMT_N0;
+      component[nr++] = &rmesa->tcl.norm;
+   }
+
+   if (inputs & VERT_BIT_COLOR0) {
+      if (VB->ColorPtr[0]->Type == GL_UNSIGNED_BYTE) {
+	 if (!rmesa->tcl.rgba.buf)
+	    emit_ubyte_rgba( ctx, 
+			     &rmesa->tcl.rgba, 
+			     (char *)VB->ColorPtr[0]->Ptr,
+			     VB->ColorPtr[0]->Size,
+			     VB->ColorPtr[0]->StrideB,
+			     count);
+
+	 vfmt |= RADEON_CP_VC_FRMT_PKCOLOR; 
+      }
+      else {
+	 int emitsize;
+
+	 if (VB->ColorPtr[0]->Size == 4 &&
+	     (VB->ColorPtr[0]->StrideB != 0 ||
+	      ((GLfloat *)VB->ColorPtr[0]->Ptr)[3] != 1.0)) { 
+	    vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
+	    emitsize = 4;
+	 }
+	 else { 
+	    vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
+	    emitsize = 3;
+	 }
+
+	 if (!rmesa->tcl.rgba.buf)
+	    emit_vector( ctx, 
+			 &(rmesa->tcl.rgba), 
+			 (char *)VB->ColorPtr[0]->Ptr,
+			 emitsize,
+			 VB->ColorPtr[0]->StrideB,
+			 count);
+      }
+
+      component[nr++] = &rmesa->tcl.rgba;
+   }
+
+
+   if (inputs & VERT_BIT_COLOR1) {
+      if (!rmesa->tcl.spec.buf) {
+	 if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+	    radeon_import_float_spec_colors( ctx );
+
+	 emit_ubyte_rgba( ctx, 
+			  &rmesa->tcl.spec, 
+			  (char *)VB->SecondaryColorPtr[0]->Ptr,
+			  3,
+			  VB->SecondaryColorPtr[0]->StrideB,
+			  count);
+      }
+
+      vfmt |= RADEON_CP_VC_FRMT_PKSPEC; 
+      component[nr++] = &rmesa->tcl.spec;
+   }
+
+   vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
+	  ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1));
+      
+   if (inputs & VERT_BIT_TEX0) {
+      if (!rmesa->tcl.tex[0].buf)
+	 emit_tex_vector( ctx, 
+			  &(rmesa->tcl.tex[0]), 
+			  (char *)VB->TexCoordPtr[0]->data,
+			  VB->TexCoordPtr[0]->size,
+			  VB->TexCoordPtr[0]->stride,
+			  count );
+
+      switch( VB->TexCoordPtr[0]->size ) {
+      case 4:
+	 vtx |= RADEON_TCL_VTX_Q0; 
+	 vfmt |= RADEON_CP_VC_FRMT_Q0;
+      default: 
+	 vfmt |= RADEON_CP_VC_FRMT_ST0;
+      }
+      component[nr++] = &rmesa->tcl.tex[0];
+   }
+
+   if (inputs & VERT_BIT_TEX1) {
+      if (!rmesa->tcl.tex[1].buf)
+	 emit_tex_vector( ctx, 
+			  &(rmesa->tcl.tex[1]), 
+			  (char *)VB->TexCoordPtr[1]->data,
+			  VB->TexCoordPtr[1]->size,
+			  VB->TexCoordPtr[1]->stride,
+			  count );
+	 
+      switch( VB->TexCoordPtr[1]->size ) {
+      case 4: 
+	 vtx |= RADEON_TCL_VTX_Q1;
+	 vfmt |= RADEON_CP_VC_FRMT_Q1;
+      default: 
+	 vfmt |= RADEON_CP_VC_FRMT_ST1;
+      }
+      component[nr++] = &rmesa->tcl.tex[1];
+   }
+
+   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
+   }
+
+   rmesa->tcl.nr_aos_components = nr;
+   rmesa->tcl.vertex_format = vfmt;
+}
+
+
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, newinputs );
+
+   if (newinputs & VERT_BIT_POS) 
+     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
+
+   if (newinputs & VERT_BIT_NORMAL) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
+
+   if (newinputs & VERT_BIT_COLOR0) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
+
+   if (newinputs & VERT_BIT_COLOR1) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
+
+   if (newinputs & VERT_BIT_TEX0)
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[0], __FUNCTION__ );
+
+   if (newinputs & VERT_BIT_TEX1)
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[1], __FUNCTION__ );
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
new file mode 100644
index 0000000000..b379bad985
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
@@ -0,0 +1,368 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  4.1
+ *
+ * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+#undef TCL_DEBUG
+#ifndef TCL_DEBUG
+#define TCL_DEBUG 0
+#endif
+
+static void TAG(emit)( GLcontext *ctx,
+		       GLuint start, GLuint end,
+		       void *dest )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint (*tc0)[4], (*tc1)[4], (*tc2)[4];
+   GLfloat (*fog)[4];
+   GLuint (*norm)[4];
+   GLubyte (*col)[4], (*spec)[4];
+   GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride;
+   GLuint tc2_stride, norm_stride;
+   GLuint (*coord)[4];
+   GLuint coord_stride; /* object coordinates */
+   GLubyte dummy[4];
+   int i;
+
+   union emit_union *v = (union emit_union *)dest;
+
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s\n", __FUNCTION__); 
+
+   /* The vertex code expects Obj to be clean to element 3.  To fix
+    * this, add more vertex code (for obj-2, obj-3) or preferably move
+    * to maos.  
+    */
+   if (VB->ObjPtr->size < 3) {
+      if (VB->ObjPtr->flags & VEC_NOT_WRITEABLE) {
+	 VB->import_data( ctx, VERT_BIT_POS, VEC_NOT_WRITEABLE );
+      }
+      _mesa_vector4f_clean_elem( VB->ObjPtr, VB->Count, 2 );
+   }
+
+   if (DO_W && VB->ObjPtr->size < 4) {
+      if (VB->ObjPtr->flags & VEC_NOT_WRITEABLE) {
+	 VB->import_data( ctx, VERT_BIT_POS, VEC_NOT_WRITEABLE );
+      }
+      _mesa_vector4f_clean_elem( VB->ObjPtr, VB->Count, 3 );
+   }
+
+   coord = (GLuint (*)[4])VB->ObjPtr->data;
+   coord_stride = VB->ObjPtr->stride;
+
+   if (DO_TEX2) {
+      const GLuint t2 = GET_TEXSOURCE(2);
+      tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data;
+      tc2_stride = VB->TexCoordPtr[t2]->stride;
+      if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) {
+	 if (VB->TexCoordPtr[t2]->flags & VEC_NOT_WRITEABLE) {
+	    VB->import_data( ctx, VERT_BIT_TEX2, VEC_NOT_WRITEABLE );
+	 }
+	 _mesa_vector4f_clean_elem( VB->TexCoordPtr[t2], VB->Count, 3 );
+      }
+   }
+
+   if (DO_TEX1) {
+      if (VB->TexCoordPtr[1]) {
+	 const GLuint t1 = GET_TEXSOURCE(1);
+	 tc1 = (GLuint (*)[4])VB->TexCoordPtr[t1]->data;
+	 tc1_stride = VB->TexCoordPtr[t1]->stride;
+	 if (DO_PTEX && VB->TexCoordPtr[t1]->size < 4) {
+	    if (VB->TexCoordPtr[t1]->flags & VEC_NOT_WRITEABLE) {
+	       VB->import_data( ctx, VERT_BIT_TEX1, VEC_NOT_WRITEABLE );
+	    }
+	    _mesa_vector4f_clean_elem( VB->TexCoordPtr[t1], VB->Count, 3 );
+	 }
+      } else {
+	 tc1 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX1]; /* could be anything, really */
+	 tc1_stride = 0;
+      }
+   }
+
+   if (DO_TEX0) {
+      if (VB->TexCoordPtr[0]) {
+	 const GLuint t0 = GET_TEXSOURCE(0);
+	 tc0_stride = VB->TexCoordPtr[t0]->stride;
+	 tc0 = (GLuint (*)[4])VB->TexCoordPtr[t0]->data;
+	 if (DO_PTEX && VB->TexCoordPtr[t0]->size < 4) {
+	    if (VB->TexCoordPtr[t0]->flags & VEC_NOT_WRITEABLE) {
+	       VB->import_data( ctx, VERT_BIT_TEX0, VEC_NOT_WRITEABLE );
+	    }
+	    _mesa_vector4f_clean_elem( VB->TexCoordPtr[t0], VB->Count, 3 );
+	 }
+      } else {
+	 tc0 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX0]; /* could be anything, really */
+	 tc0_stride = 0;
+      }
+	 
+   }
+
+   if (DO_NORM) {
+      if (VB->NormalPtr) {
+	 norm_stride = VB->NormalPtr->stride;
+	 norm = (GLuint (*)[4])VB->NormalPtr->data;
+      } else {
+	 norm_stride = 0;
+	 norm = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_NORMAL];
+      }
+   }
+
+   if (DO_RGBA) {
+      if (VB->ColorPtr[0]) {
+	 /* This is incorrect when colormaterial is enabled:
+	  */
+	 if (VB->ColorPtr[0]->Type != GL_UNSIGNED_BYTE) {
+	    if (0) fprintf(stderr, "IMPORTING FLOAT COLORS\n");
+	    IMPORT_FLOAT_COLORS( ctx );
+	 }
+	 col = (GLubyte (*)[4])VB->ColorPtr[0]->Ptr;
+	 col_stride = VB->ColorPtr[0]->StrideB;
+      } else {
+	 col = &dummy; /* any old memory is fine */
+	 col_stride = 0;
+      }
+   }
+
+   if (DO_SPEC) {
+      if (VB->SecondaryColorPtr[0]) {
+	 if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
+	    IMPORT_FLOAT_SPEC_COLORS( ctx );
+	 spec = (GLubyte (*)[4])VB->SecondaryColorPtr[0]->Ptr;
+	 spec_stride = VB->SecondaryColorPtr[0]->StrideB;
+      } else {
+	 spec = &dummy;
+	 spec_stride = 0;
+      }
+   }
+
+   if (DO_FOG) {
+      if (VB->FogCoordPtr) {
+	 fog = VB->FogCoordPtr->data;
+	 fog_stride = VB->FogCoordPtr->stride;
+      } else {
+	 fog = (GLfloat (*)[4])&dummy; fog[0][0] = 0.0F;
+	 fog_stride = 0;
+      }
+   }
+   
+   
+   if (VB->importable_data) {
+      if (start) {
+	 coord =  (GLuint (*)[4])((GLubyte *)coord + start * coord_stride);
+	 if (DO_TEX0)
+	    tc0 =  (GLuint (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+	 if (DO_TEX1) 
+	    tc1 =  (GLuint (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+	 if (DO_TEX2) 
+	    tc2 =  (GLuint (*)[4])((GLubyte *)tc2 + start * tc2_stride);
+	 if (DO_NORM) 
+	    norm =  (GLuint (*)[4])((GLubyte *)norm + start * norm_stride);
+	 if (DO_RGBA) 
+	    STRIDE_4UB(col, start * col_stride);
+	 if (DO_SPEC)
+	    STRIDE_4UB(spec, start * spec_stride);
+	 if (DO_FOG)
+	    fog =  (GLfloat (*)[4])((GLubyte *)fog + start * fog_stride);
+      }
+
+      for (i=start; i < end; i++) {
+	 v[0].ui = coord[0][0];
+	 v[1].ui = coord[0][1];
+	 v[2].ui = coord[0][2];
+	 if (TCL_DEBUG) fprintf(stderr, "%d: %.2f %.2f %.2f ", i, v[0].f, v[1].f, v[2].f);
+	 if (DO_W) {
+	    v[3].ui = coord[0][3];
+	    if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[3].f);
+	    v += 4;
+	 } 
+	 else
+	    v += 3;
+	 coord =  (GLuint (*)[4])((GLubyte *)coord +  coord_stride);
+
+	 if (DO_NORM) {
+	    v[0].ui = norm[0][0];
+	    v[1].ui = norm[0][1];
+	    v[2].ui = norm[0][2];
+	    if (TCL_DEBUG) fprintf(stderr, "norm: %.2f %.2f %.2f ", v[0].f, v[1].f, v[2].f);
+	    v += 3;
+	    norm =  (GLuint (*)[4])((GLubyte *)norm +  norm_stride);
+	 }
+	 if (DO_RGBA) {
+	    v[0].ui = LE32_TO_CPU(*(GLuint *)&col[0]);
+	    STRIDE_4UB(col, col_stride);
+	    if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
+	    v++;
+	 }
+	 if (DO_SPEC || DO_FOG) {
+	    if (DO_SPEC) {
+	       v[0].specular.red   = spec[0][0];
+	       v[0].specular.green = spec[0][1];
+	       v[0].specular.blue  = spec[0][2];
+	       STRIDE_4UB(spec, spec_stride);
+	    }
+	    if (DO_FOG) {
+	       v[0].specular.alpha = fog[0][0] * 255.0;
+               fog = (GLfloat (*)[4])((GLubyte *)fog + fog_stride);
+	    }
+	    if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
+	    v++;
+	 }
+	 if (DO_TEX0) {
+	    v[0].ui = tc0[0][0];
+	    v[1].ui = tc0[0][1];
+	    if (TCL_DEBUG) fprintf(stderr, "t0: %.2f %.2f ", v[0].f, v[1].f);
+	    if (DO_PTEX) {
+	       v[2].ui = tc0[0][3];
+	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc0 =  (GLuint (*)[4])((GLubyte *)tc0 +  tc0_stride);
+	 }
+	 if (DO_TEX1) {
+	    v[0].ui = tc1[0][0];
+	    v[1].ui = tc1[0][1];
+	    if (TCL_DEBUG) fprintf(stderr, "t1: %.2f %.2f ", v[0].f, v[1].f);
+	    if (DO_PTEX) {
+	       v[2].ui = tc1[0][3];
+	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc1 =  (GLuint (*)[4])((GLubyte *)tc1 +  tc1_stride);
+	 } 
+	 if (DO_TEX2) {
+	    v[0].ui = tc2[0][0];
+	    v[1].ui = tc2[0][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc2[0][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc2 =  (GLuint (*)[4])((GLubyte *)tc2 +  tc2_stride);
+	 } 
+	 if (TCL_DEBUG) fprintf(stderr, "\n");
+      }
+   } else {
+      for (i=start; i < end; i++) {
+	 v[0].ui = coord[i][0];
+	 v[1].ui = coord[i][1];
+	 v[2].ui = coord[i][2];
+	 if (DO_W) {
+	    v[3].ui = coord[i][3];
+	    v += 4;
+	 } 
+	 else
+	    v += 3;
+
+	 if (DO_NORM) {
+	    v[0].ui = norm[i][0];
+	    v[1].ui = norm[i][1];
+	    v[2].ui = norm[i][2];
+	    v += 3;
+	 }
+	 if (DO_RGBA) {
+	    v[0].ui = LE32_TO_CPU(*(GLuint *)&col[i]);
+	    v++;
+	 }
+	 if (DO_SPEC || DO_FOG) {
+	    if (DO_SPEC) {
+	       v[0].specular.red   = spec[i][0];
+	       v[0].specular.green = spec[i][1];
+	       v[0].specular.blue  = spec[i][2];
+	    }
+	    if (DO_FOG) {
+               GLfloat *f = (GLfloat *) ((GLubyte *)fog + fog_stride);
+               v[0].specular.alpha = *f * 255.0;
+	    }
+	    v++;
+	 }
+	 if (DO_TEX0) {
+	    v[0].ui = tc0[i][0];
+	    v[1].ui = tc0[i][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc0[i][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	 }
+	 if (DO_TEX1) {
+	    v[0].ui = tc1[i][0];
+	    v[1].ui = tc1[i][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc1[i][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	 } 
+	 if (DO_TEX2) {
+	    v[0].ui = tc2[i][0];
+	    v[1].ui = tc2[i][1];
+	    if (DO_PTEX) {
+	       v[2].ui = tc2[i][3];
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	 } 
+      }
+   }
+}
+
+
+
+static void TAG(init)( void )
+{
+   int sz = 3;
+   if (DO_W) sz++;
+   if (DO_NORM) sz += 3;
+   if (DO_RGBA) sz++;
+   if (DO_SPEC || DO_FOG) sz++;
+   if (DO_TEX0) sz += 2;
+   if (DO_TEX0 && DO_PTEX) sz++;
+   if (DO_TEX1) sz += 2;
+   if (DO_TEX1 && DO_PTEX) sz++;
+   if (DO_TEX2) sz += 2;
+   if (DO_TEX2 && DO_PTEX) sz++;
+
+   setup_tab[IDX].emit = TAG(emit);
+   setup_tab[IDX].vertex_format = IND;
+   setup_tab[IDX].vertex_size = sz;
+}
+
+
+#undef IND
+#undef TAG
+#undef IDX
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
new file mode 100644
index 0000000000..39b1f57507
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
@@ -0,0 +1,335 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "mtypes.h"
+
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_imm_debug.h"
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+
+
+#define RADEON_TCL_MAX_SETUP 13
+
+union emit_union { float f; GLuint ui; radeon_color_t specular; };
+
+static struct {
+   void   (*emit)( GLcontext *, GLuint, GLuint, void * );
+   GLuint vertex_size;
+   GLuint vertex_format;
+} setup_tab[RADEON_TCL_MAX_SETUP];
+
+#define DO_W    (IND & RADEON_CP_VC_FRMT_W0)
+#define DO_RGBA (IND & RADEON_CP_VC_FRMT_PKCOLOR)
+#define DO_SPEC (IND & RADEON_CP_VC_FRMT_PKSPEC)
+#define DO_FOG  (IND & RADEON_CP_VC_FRMT_PKSPEC)
+#define DO_TEX0 (IND & RADEON_CP_VC_FRMT_ST0)
+#define DO_TEX1 (IND & RADEON_CP_VC_FRMT_ST1)
+#define DO_PTEX (IND & RADEON_CP_VC_FRMT_Q0)
+#define DO_NORM (IND & RADEON_CP_VC_FRMT_N0)
+
+#define DO_TEX2 0
+#define DO_TEX3 0
+
+#define GET_TEXSOURCE(n)  n
+#define GET_UBYTE_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteColor
+#define GET_UBYTE_SPEC_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteSecondaryColor
+
+#define IMPORT_FLOAT_COLORS radeon_import_float_colors
+#define IMPORT_FLOAT_SPEC_COLORS radeon_import_float_spec_colors
+
+/***********************************************************************
+ *             Generate vertex emit functions               *
+ ***********************************************************************/
+
+
+/* Defined in order of increasing vertex size:
+ */
+#define IDX 0
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR)
+#define TAG(x) x##_rgba
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 1
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 2
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0)
+#define TAG(x) x##_rgba_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 3
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 4
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 5
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_st_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 6
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 7
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_spec_st_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 8
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 9
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgpa_spec_st_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 10
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 11
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq_stq
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 12
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_W0|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_w_rgpa_spec_stq_stq_n
+#include "radeon_maos_vbtmp.h"
+
+
+
+
+
+/***********************************************************************
+ *                         Initialization 
+ ***********************************************************************/
+
+
+static void init_tcl_verts( void )
+{
+   init_rgba();
+   init_n();
+   init_rgba_n();
+   init_rgba_st();
+   init_st_n();
+   init_rgba_st_st();
+   init_rgba_st_n();
+   init_rgba_spec_st_st();
+   init_st_st_n();
+   init_rgpa_spec_st_st_n();
+   init_rgba_stq();
+   init_rgba_stq_stq();
+   init_w_rgpa_spec_stq_stq_n();
+}
+
+
+void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint req = 0;
+   GLuint vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
+		 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1));
+   int i;
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_tcl_verts();
+      firsttime = 0;
+   }
+		     
+   if (1) {
+      req |= RADEON_CP_VC_FRMT_Z;
+      if (VB->ObjPtr->size == 4) {
+	 req |= RADEON_CP_VC_FRMT_W0;
+      }
+   }
+
+   if (inputs & VERT_BIT_NORMAL) {
+      req |= RADEON_CP_VC_FRMT_N0;
+   }
+   
+   if (inputs & VERT_BIT_COLOR0) {
+      req |= RADEON_CP_VC_FRMT_PKCOLOR;
+   }
+
+   if (inputs & VERT_BIT_COLOR1) {
+      req |= RADEON_CP_VC_FRMT_PKSPEC;
+   }
+
+   if (inputs & VERT_BIT_TEX0) {
+      req |= RADEON_CP_VC_FRMT_ST0;
+
+      if (VB->TexCoordPtr[0]->size == 4) {
+	 req |= RADEON_CP_VC_FRMT_Q0;
+	 vtx |= RADEON_TCL_VTX_Q0;
+      }
+   }
+
+   if (inputs & VERT_BIT_TEX1) {
+      req |= RADEON_CP_VC_FRMT_ST1;
+
+      if (VB->TexCoordPtr[1]->size == 4) {
+	 req |= RADEON_CP_VC_FRMT_Q1;
+	 vtx |= RADEON_TCL_VTX_Q1;
+      }
+   }
+
+   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
+   }
+
+   for (i = 0 ; i < RADEON_TCL_MAX_SETUP ; i++) 
+      if ((setup_tab[i].vertex_format & req) == req) 
+	 break;
+
+   if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
+       rmesa->tcl.indexed_verts.buf)
+      return;
+
+   if (rmesa->tcl.indexed_verts.buf)
+      radeonReleaseArrays( ctx, ~0 );
+
+   radeonAllocDmaRegionVerts( rmesa, 
+			      &rmesa->tcl.indexed_verts, 
+			      VB->Count,
+			      setup_tab[i].vertex_size * 4, 
+			      4);
+
+   setup_tab[i].emit( ctx, 0, VB->Count, 
+		      rmesa->tcl.indexed_verts.address + 
+		      rmesa->tcl.indexed_verts.start );
+
+   rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
+   rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
+   rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
+   rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
+
+   rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts;
+   rmesa->tcl.nr_aos_components = 1;
+}
+
+
+
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_VERTS) 
+      _tnl_print_vert_flags( __FUNCTION__, newinputs );
+
+   if (newinputs) 
+     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.c b/src/mesa/drivers/dri/radeon/radeon_sanity.c
new file mode 100644
index 0000000000..e3b37bf3de
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_sanity.c
@@ -0,0 +1,1043 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc, Cedar Park, TX.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+#include <errno.h>
+
+#include "glheader.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_sanity.h"
+
+/* Set this '1' to get more verbiage.
+ */
+#define MORE_VERBOSE 1
+
+#if MORE_VERBOSE
+#define VERBOSE (RADEON_DEBUG & DEBUG_VERBOSE)
+#define NORMAL  (1)
+#else
+#define VERBOSE 0
+#define NORMAL  (RADEON_DEBUG & DEBUG_VERBOSE)
+#endif
+
+
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.  
+ */
+static struct { 
+   int start; 
+   int len; 
+   const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+   { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+   { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+   { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+   { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+   { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+   { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+   { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+   { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+   { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+   { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+	{ 0, 4, "R200_PP_TXCBLEND_0" },
+	{ 0, 4, "R200_PP_TXCBLEND_1" },
+	{ 0, 4, "R200_PP_TXCBLEND_2" },
+	{ 0, 4, "R200_PP_TXCBLEND_3" },
+	{ 0, 4, "R200_PP_TXCBLEND_4" },
+	{ 0, 4, "R200_PP_TXCBLEND_5" },
+	{ 0, 4, "R200_PP_TXCBLEND_6" },
+	{ 0, 4, "R200_PP_TXCBLEND_7" },
+	{ 0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
+	{ 0, 6, "R200_PP_TFACTOR_0" },
+	{ 0, 4, "R200_SE_VTX_FMT_0" },
+	{ 0, 1, "R200_SE_VAP_CNTL" },
+	{ 0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
+	{ 0, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
+	{ 0, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
+	{ 0, 6, "R200_PP_TXFILTER_0" },
+	{ 0, 6, "R200_PP_TXFILTER_1" },
+	{ 0, 6, "R200_PP_TXFILTER_2" },
+	{ 0, 6, "R200_PP_TXFILTER_3" },
+	{ 0, 6, "R200_PP_TXFILTER_4" },
+	{ 0, 6, "R200_PP_TXFILTER_5" },
+	{ 0, 1, "R200_PP_TXOFFSET_0" },
+	{ 0, 1, "R200_PP_TXOFFSET_1" },
+	{ 0, 1, "R200_PP_TXOFFSET_2" },
+	{ 0, 1, "R200_PP_TXOFFSET_3" },
+	{ 0, 1, "R200_PP_TXOFFSET_4" },
+	{ 0, 1, "R200_PP_TXOFFSET_5" },
+	{ 0, 1, "R200_SE_VTE_CNTL" },
+	{ 0, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
+	{ 0, 1, "R200_PP_TAM_DEBUG3" },
+	{ 0, 1, "R200_PP_CNTL_X" }, 
+	{ 0, 1, "R200_RB3D_DEPTHXY_OFFSET" }, 
+	{ 0, 1, "R200_RE_AUX_SCISSOR_CNTL" }, 
+	{ 0, 2, "R200_RE_SCISSOR_TL_0" }, 
+	{ 0, 2, "R200_RE_SCISSOR_TL_1" }, 
+	{ 0, 2, "R200_RE_SCISSOR_TL_2" }, 
+	{ 0, 1, "R200_SE_VAP_CNTL_STATUS" }, 
+	{ 0, 1, "R200_SE_VTX_STATE_CNTL" }, 
+	{ 0, 1, "R200_RE_POINTSIZE" }, 
+	{ 0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
+	{ 0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
+	{ 0, 1, "R200_PP_CUBIC_FACES_1" },
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
+	{ 0, 1, "R200_PP_CUBIC_FACES_2" },
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
+	{ 0, 1, "R200_PP_CUBIC_FACES_3" },
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
+	{ 0, 1, "R200_PP_CUBIC_FACES_4" },
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
+	{ 0, 1, "R200_PP_CUBIC_FACES_5" },
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
+   { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
+   { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
+   { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_1" },
+};
+
+struct reg_names {
+   int idx;
+   const char *name;
+};
+
+static struct reg_names reg_names[] = {
+   { RADEON_PP_MISC, "RADEON_PP_MISC" },
+   { RADEON_PP_FOG_COLOR, "RADEON_PP_FOG_COLOR" },
+   { RADEON_RE_SOLID_COLOR, "RADEON_RE_SOLID_COLOR" },
+   { RADEON_RB3D_BLENDCNTL, "RADEON_RB3D_BLENDCNTL" },
+   { RADEON_RB3D_DEPTHOFFSET, "RADEON_RB3D_DEPTHOFFSET" },
+   { RADEON_RB3D_DEPTHPITCH, "RADEON_RB3D_DEPTHPITCH" },
+   { RADEON_RB3D_ZSTENCILCNTL, "RADEON_RB3D_ZSTENCILCNTL" },
+   { RADEON_PP_CNTL, "RADEON_PP_CNTL" },
+   { RADEON_RB3D_CNTL, "RADEON_RB3D_CNTL" },
+   { RADEON_RB3D_COLOROFFSET, "RADEON_RB3D_COLOROFFSET" },
+   { RADEON_RB3D_COLORPITCH, "RADEON_RB3D_COLORPITCH" },
+   { RADEON_SE_CNTL, "RADEON_SE_CNTL" },
+   { RADEON_SE_COORD_FMT, "RADEON_SE_COORDFMT" },
+   { RADEON_SE_CNTL_STATUS, "RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_LINE_PATTERN, "RADEON_RE_LINE_PATTERN" },
+   { RADEON_RE_LINE_STATE, "RADEON_RE_LINE_STATE" },
+   { RADEON_SE_LINE_WIDTH, "RADEON_SE_LINE_WIDTH" },
+   { RADEON_RB3D_STENCILREFMASK, "RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_RB3D_ROPCNTL, "RADEON_RB3D_ROPCNTL" },
+   { RADEON_RB3D_PLANEMASK, "RADEON_RB3D_PLANEMASK" },
+   { RADEON_SE_VPORT_XSCALE, "RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_VPORT_XOFFSET, "RADEON_SE_VPORT_XOFFSET" },
+   { RADEON_SE_VPORT_YSCALE, "RADEON_SE_VPORT_YSCALE" },
+   { RADEON_SE_VPORT_YOFFSET, "RADEON_SE_VPORT_YOFFSET" },
+   { RADEON_SE_VPORT_ZSCALE, "RADEON_SE_VPORT_ZSCALE" },
+   { RADEON_SE_VPORT_ZOFFSET, "RADEON_SE_VPORT_ZOFFSET" },
+   { RADEON_RE_MISC, "RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0, "RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_TXFILTER_1, "RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_TXFILTER_2, "RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_TXFORMAT_0, "RADEON_PP_TXFORMAT_0" },
+   { RADEON_PP_TXFORMAT_1, "RADEON_PP_TXFORMAT_1" },
+   { RADEON_PP_TXFORMAT_2, "RADEON_PP_TXFORMAT_3" },
+   { RADEON_PP_TXOFFSET_0, "RADEON_PP_TXOFFSET_0" },
+   { RADEON_PP_TXOFFSET_1, "RADEON_PP_TXOFFSET_1" },
+   { RADEON_PP_TXOFFSET_2, "RADEON_PP_TXOFFSET_3" },
+   { RADEON_PP_TXCBLEND_0, "RADEON_PP_TXCBLEND_0" },
+   { RADEON_PP_TXCBLEND_1, "RADEON_PP_TXCBLEND_1" },
+   { RADEON_PP_TXCBLEND_2, "RADEON_PP_TXCBLEND_3" },
+   { RADEON_PP_TXABLEND_0, "RADEON_PP_TXABLEND_0" },
+   { RADEON_PP_TXABLEND_1, "RADEON_PP_TXABLEND_1" },
+   { RADEON_PP_TXABLEND_2, "RADEON_PP_TXABLEND_3" },
+   { RADEON_PP_TFACTOR_0, "RADEON_PP_TFACTOR_0" },
+   { RADEON_PP_TFACTOR_1, "RADEON_PP_TFACTOR_1" },
+   { RADEON_PP_TFACTOR_2, "RADEON_PP_TFACTOR_3" },
+   { RADEON_PP_BORDER_COLOR_0, "RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_BORDER_COLOR_1, "RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_BORDER_COLOR_2, "RADEON_PP_BORDER_COLOR_3" },
+   { RADEON_SE_ZBIAS_FACTOR, "RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_ZBIAS_CONSTANT, "RADEON_SE_ZBIAS_CONSTANT" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT, "RADEON_SE_TCL_OUTPUT_VTXFMT" },
+   { RADEON_SE_TCL_OUTPUT_VTX_SEL, "RADEON_SE_TCL_OUTPUT_VTXSEL" },
+   { RADEON_SE_TCL_MATRIX_SELECT_0, "RADEON_SE_TCL_MATRIX_SELECT_0" },
+   { RADEON_SE_TCL_MATRIX_SELECT_1, "RADEON_SE_TCL_MATRIX_SELECT_1" },
+   { RADEON_SE_TCL_UCP_VERT_BLEND_CTL, "RADEON_SE_TCL_UCP_VERT_BLEND_CTL" },
+   { RADEON_SE_TCL_TEXTURE_PROC_CTL, "RADEON_SE_TCL_TEXTURE_PROC_CTL" },
+   { RADEON_SE_TCL_LIGHT_MODEL_CTL, "RADEON_SE_TCL_LIGHT_MODEL_CTL" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_0, "RADEON_SE_TCL_PER_LIGHT_CTL_0" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_1, "RADEON_SE_TCL_PER_LIGHT_CTL_1" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_2, "RADEON_SE_TCL_PER_LIGHT_CTL_2" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_3, "RADEON_SE_TCL_PER_LIGHT_CTL_3" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, "RADEON_SE_TCL_EMMISSIVE_RED" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_GREEN, "RADEON_SE_TCL_EMMISSIVE_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_BLUE, "RADEON_SE_TCL_EMMISSIVE_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_ALPHA, "RADEON_SE_TCL_EMMISSIVE_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_RED, "RADEON_SE_TCL_AMBIENT_RED" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN, "RADEON_SE_TCL_AMBIENT_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE, "RADEON_SE_TCL_AMBIENT_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_ALPHA, "RADEON_SE_TCL_AMBIENT_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_RED, "RADEON_SE_TCL_DIFFUSE_RED" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_GREEN, "RADEON_SE_TCL_DIFFUSE_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_BLUE, "RADEON_SE_TCL_DIFFUSE_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_ALPHA, "RADEON_SE_TCL_DIFFUSE_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_RED, "RADEON_SE_TCL_SPECULAR_RED" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN, "RADEON_SE_TCL_SPECULAR_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE, "RADEON_SE_TCL_SPECULAR_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA, "RADEON_SE_TCL_SPECULAR_ALPHA" },
+   { RADEON_SE_TCL_SHININESS, "RADEON_SE_TCL_SHININESS" },
+   { RADEON_SE_COORD_FMT, "RADEON_SE_COORD_FMT" },
+   { RADEON_PP_TEX_SIZE_0, "RADEON_PP_TEX_SIZE_0" },
+   { RADEON_PP_TEX_SIZE_1, "RADEON_PP_TEX_SIZE_1" },
+   { RADEON_PP_TEX_SIZE_2, "RADEON_PP_TEX_SIZE_2" },
+   { RADEON_PP_TEX_SIZE_0+4, "RADEON_PP_TEX_PITCH_0" },
+   { RADEON_PP_TEX_SIZE_1+4, "RADEON_PP_TEX_PITCH_1" },
+   { RADEON_PP_TEX_SIZE_2+4, "RADEON_PP_TEX_PITCH_2" },
+};
+
+static struct reg_names scalar_names[] = {
+   { RADEON_SS_LIGHT_DCD_ADDR, "LIGHT_DCD" },
+   { RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR, "LIGHT_SPOT_EXPONENT" },
+   { RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR, "LIGHT_SPOT_CUTOFF" },
+   { RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR, "LIGHT_SPECULAR_THRESH" },
+   { RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR, "LIGHT_RANGE_CUTOFF" },
+   { RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, "VERT_GUARD_CLIP" },
+   { RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR, "VERT_GUARD_DISCARD" },
+   { RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR, "HORZ_GUARD_CLIP" },
+   { RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR, "HORZ_GUARD_DISCARD" },
+   { RADEON_SS_SHININESS, "SHININESS" },
+   { 1000, "" },
+};
+
+/* Puff these out to make them look like normal (dword) registers.
+ */
+static struct reg_names vector_names[] = {
+   { RADEON_VS_MATRIX_0_ADDR * 4, "MATRIX_0" },
+   { RADEON_VS_MATRIX_1_ADDR * 4, "MATRIX_1" },
+   { RADEON_VS_MATRIX_2_ADDR * 4, "MATRIX_2" },
+   { RADEON_VS_MATRIX_3_ADDR * 4, "MATRIX_3" },
+   { RADEON_VS_MATRIX_4_ADDR * 4, "MATRIX_4" },
+   { RADEON_VS_MATRIX_5_ADDR * 4, "MATRIX_5" },
+   { RADEON_VS_MATRIX_6_ADDR * 4, "MATRIX_6" },
+   { RADEON_VS_MATRIX_7_ADDR * 4, "MATRIX_7" },
+   { RADEON_VS_MATRIX_8_ADDR * 4, "MATRIX_8" },
+   { RADEON_VS_MATRIX_9_ADDR * 4, "MATRIX_9" },
+   { RADEON_VS_MATRIX_10_ADDR * 4, "MATRIX_10" },
+   { RADEON_VS_MATRIX_11_ADDR * 4, "MATRIX_11" },
+   { RADEON_VS_MATRIX_12_ADDR * 4, "MATRIX_12" },
+   { RADEON_VS_MATRIX_13_ADDR * 4, "MATRIX_13" },
+   { RADEON_VS_MATRIX_14_ADDR * 4, "MATRIX_14" },
+   { RADEON_VS_MATRIX_15_ADDR * 4, "MATRIX_15" },
+   { RADEON_VS_LIGHT_AMBIENT_ADDR * 4, "LIGHT_AMBIENT" },
+   { RADEON_VS_LIGHT_DIFFUSE_ADDR * 4, "LIGHT_DIFFUSE" },
+   { RADEON_VS_LIGHT_SPECULAR_ADDR * 4, "LIGHT_SPECULAR" },
+   { RADEON_VS_LIGHT_DIRPOS_ADDR * 4, "LIGHT_DIRPOS" },
+   { RADEON_VS_LIGHT_HWVSPOT_ADDR * 4, "LIGHT_HWVSPOT" },
+   { RADEON_VS_LIGHT_ATTENUATION_ADDR * 4, "LIGHT_ATTENUATION" },
+   { RADEON_VS_MATRIX_EYE2CLIP_ADDR * 4, "MATRIX_EYE2CLIP" },
+   { RADEON_VS_UCP_ADDR * 4, "UCP" },
+   { RADEON_VS_GLOBAL_AMBIENT_ADDR * 4, "GLOBAL_AMBIENT" },
+   { RADEON_VS_FOG_PARAM_ADDR * 4, "FOG_PARAM" },
+   { RADEON_VS_EYE_VECTOR_ADDR * 4, "EYE_VECTOR" },
+   { 1000, "" },
+};
+
+union fi { float f; int i; };
+
+#define ISVEC   1
+#define ISFLOAT 2
+#define TOUCHED 4
+
+struct reg {
+   int idx; 
+   struct reg_names *closest;
+   int flags;
+   union fi current;
+   union fi *values;
+   int nvalues;
+   int nalloc;
+   float vmin, vmax;
+};
+
+
+static struct reg regs[Elements(reg_names)+1];
+static struct reg scalars[512+1];
+static struct reg vectors[512*4+1];
+
+static int total, total_changed, bufs;
+
+static void init_regs( void )
+{
+   struct reg_names *tmp;
+   int i;
+
+   for (i = 0 ; i < Elements(regs) ; i++) {
+      regs[i].idx = reg_names[i].idx;
+      regs[i].closest = &reg_names[i];
+      regs[i].flags = 0;
+   }
+
+   for (i = 0, tmp = scalar_names ; i < Elements(scalars) ; i++) {
+      if (tmp[1].idx == i) tmp++;
+      scalars[i].idx = i;
+      scalars[i].closest = tmp;
+      scalars[i].flags = ISFLOAT;
+   }
+
+   for (i = 0, tmp = vector_names ; i < Elements(vectors) ; i++) {
+      if (tmp[1].idx*4 == i) tmp++;
+      vectors[i].idx = i;
+      vectors[i].closest = tmp;
+      vectors[i].flags = ISFLOAT|ISVEC;
+   }
+
+   regs[Elements(regs)-1].idx = -1;
+   scalars[Elements(scalars)-1].idx = -1;
+   vectors[Elements(vectors)-1].idx = -1;
+}
+
+static int find_or_add_value( struct reg *reg, int val )
+{
+   int j;
+
+   for ( j = 0 ; j < reg->nvalues ; j++)
+      if ( val == reg->values[j].i )
+	 return 1;
+
+   if (j == reg->nalloc) {
+      reg->nalloc += 5;
+      reg->nalloc *= 2;
+      reg->values = (union fi *) realloc( reg->values, 
+					  reg->nalloc * sizeof(union fi) );
+   }
+
+   reg->values[reg->nvalues++].i = val;
+   return 0;
+}
+
+static struct reg *lookup_reg( struct reg *tab, int reg )
+{
+   int i;
+
+   for (i = 0 ; tab[i].idx != -1 ; i++) {
+      if (tab[i].idx == reg)
+	 return &tab[i];
+   }
+
+   fprintf(stderr, "*** unknown reg 0x%x\n", reg);
+   return 0;
+}
+
+
+static const char *get_reg_name( struct reg *reg )
+{
+   static char tmp[80];
+
+   if (reg->idx == reg->closest->idx) 
+      return reg->closest->name;
+
+   
+   if (reg->flags & ISVEC) {
+      if (reg->idx/4 != reg->closest->idx)
+	 sprintf(tmp, "%s+%d[%d]", 
+		 reg->closest->name, 
+		 (reg->idx/4) - reg->closest->idx,
+		 reg->idx%4);
+      else
+	 sprintf(tmp, "%s[%d]", reg->closest->name, reg->idx%4);
+   }
+   else {
+      if (reg->idx != reg->closest->idx)
+	 sprintf(tmp, "%s+%d", reg->closest->name, reg->idx - reg->closest->idx);
+      else
+	 sprintf(tmp, "%s", reg->closest->name);
+   }
+
+   return tmp;
+}
+
+static int print_int_reg_assignment( struct reg *reg, int data )
+{
+   int changed = (reg->current.i != data);
+   int ever_seen = find_or_add_value( reg, data );
+   
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+       fprintf(stderr, "   %s <-- 0x%x", get_reg_name(reg), data);
+       
+   if (NORMAL) {
+      if (!ever_seen) 
+	 fprintf(stderr, " *** BRAND NEW VALUE");
+      else if (changed) 
+	 fprintf(stderr, " *** CHANGED"); 
+   }
+   
+   reg->current.i = data;
+
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+
+static int print_float_reg_assignment( struct reg *reg, float data )
+{
+   int changed = (reg->current.f != data);
+   int newmin = (data < reg->vmin);
+   int newmax = (data > reg->vmax);
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "   %s <-- %.3f", get_reg_name(reg), data);
+
+   if (NORMAL) {
+      if (newmin) {
+	 fprintf(stderr, " *** NEW MIN (prev %.3f)", reg->vmin);
+	 reg->vmin = data;
+      }
+      else if (newmax) {
+	 fprintf(stderr, " *** NEW MAX (prev %.3f)", reg->vmax);
+	 reg->vmax = data;
+      }
+      else if (changed) {
+	 fprintf(stderr, " *** CHANGED");
+      }
+   }
+
+   reg->current.f = data;
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+static int print_reg_assignment( struct reg *reg, int data )
+{
+   reg->flags |= TOUCHED;
+   if (reg->flags & ISFLOAT)
+      return print_float_reg_assignment( reg, *(float *)&data );
+   else
+      return print_int_reg_assignment( reg, data );
+}
+
+static void print_reg( struct reg *reg )
+{
+   if (reg->flags & TOUCHED) {
+      if (reg->flags & ISFLOAT) {
+	 fprintf(stderr, "   %s == %f\n", get_reg_name(reg), reg->current.f);
+      } else {
+	 fprintf(stderr, "   %s == 0x%x\n", get_reg_name(reg), reg->current.i);
+      }
+   }
+}
+
+
+static void dump_state( void )
+{
+   int i;
+
+   for (i = 0 ; i < Elements(regs) ; i++) 
+      print_reg( &regs[i] );
+
+   for (i = 0 ; i < Elements(scalars) ; i++) 
+      print_reg( &scalars[i] );
+
+   for (i = 0 ; i < Elements(vectors) ; i++) 
+      print_reg( &vectors[i] );
+}
+
+
+
+static int radeon_emit_packets( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int id = (int)header.packet.packet_id;
+   int sz = packet[id].len;
+   int *data = (int *)cmdbuf->buf;
+   int i;
+   
+   if (sz * sizeof(int) > cmdbuf->bufsz) {
+      fprintf(stderr, "Packet overflows cmdbuf\n");      
+      return -EINVAL;
+   }
+
+   if (!packet[id].name) {
+      fprintf(stderr, "*** Unknown packet 0 nr %d\n", id );
+      return -EINVAL;
+   }
+
+   
+   if (VERBOSE) 
+      fprintf(stderr, "Packet 0 reg %s nr %d\n", packet[id].name, sz );
+
+   for ( i = 0 ; i < sz ; i++) {
+      struct reg *reg = lookup_reg( regs, packet[id].start + i*4 );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int radeon_emit_scalars( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset;
+   int stride = header.scalars.stride;
+   int i;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars, start %d stride %d nr %d (end %d)\n",
+	      start, stride, sz, start + stride * sz);
+
+
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+	 
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int radeon_emit_scalars2( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset + 0x100;
+   int stride = header.scalars.stride;
+   int i;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars2, start %d stride %d nr %d (end %d)\n",
+	      start, stride, sz, start + stride * sz);
+
+   if (start + stride * sz > 257) {
+      fprintf(stderr, "emit scalars OVERFLOW %d/%d/%d\n", start, stride, sz);
+      return -1;
+   }
+
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+	 
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+/* Check: inf/nan/extreme-size?
+ * Check: table start, end, nr, etc.
+ */
+static int radeon_emit_vectors( 
+   drmRadeonCmdHeader header,
+   drmRadeonCmdBuffer *cmdbuf )
+{
+   int sz = header.vectors.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.vectors.offset;
+   int stride = header.vectors.stride;
+   int i,j;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit vectors, start %d stride %d nr %d (end %d) (0x%x)\n",
+	      start, stride, sz, start + stride * sz, header.i);
+
+/*    if (start + stride * (sz/4) > 128) { */
+/*       fprintf(stderr, "emit vectors OVERFLOW %d/%d/%d\n", start, stride, sz); */
+/*       return -1; */
+/*    } */
+
+   for (i = 0 ; i < sz ;  start += stride) {
+      int changed = 0;
+      for (j = 0 ; j < 4 ; i++,j++) {
+	 struct reg *reg = lookup_reg( vectors, start*4+j );
+	 if (print_reg_assignment( reg, data[i] ))
+	    changed = 1;
+      }
+      if (changed)
+	 total_changed += 4;
+      total += 4;
+   }
+	 
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int print_vertex_format( int vfmt )
+{
+   if (NORMAL) {
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+	      "vertex format",
+	      vfmt,
+	      "xy,",
+	      (vfmt & RADEON_CP_VC_FRMT_Z) ? "z," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_W0) ? "w0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPCOLOR) ? "fpcolor," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPALPHA) ? "fpalpha," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_PKCOLOR) ? "pkcolor," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPSPEC) ? "fpspec," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPFOG) ? "fpfog," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_PKSPEC) ? "pkspec," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST0) ? "st0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST1) ? "st1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q1) ? "q1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST2) ? "st2," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q2) ? "q2," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST3) ? "st3," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q3) ? "q3," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q0) ? "q0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_N0) ? "n0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_XY1) ? "xy1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Z1) ? "z1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_W1) ? "w1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_N1) ? "n1," : "");
+
+   
+/*       if (!find_or_add_value( &others[V_VTXFMT], vfmt )) */
+/* 	 fprintf(stderr, " *** NEW VALUE"); */
+
+      fprintf(stderr, "\n");
+   }
+
+   return 0;
+}
+
+static char *primname[0xf] = {
+   "NONE",
+   "POINTS",
+   "LINES",
+   "LINE_STRIP",
+   "TRIANGLES",
+   "TRIANGLE_FAN",
+   "TRIANGLE_STRIP",
+   "TRI_TYPE_2",
+   "RECT_LIST",
+   "3VRT_POINTS",
+   "3VRT_LINES",
+};
+
+static int print_prim_and_flags( int prim )
+{
+   int numverts;
+   
+   if (NORMAL)
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s\n",
+	      "prim flags",
+	      prim,
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_IND) ? "IND," : "",
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_LIST) ? "LIST," : "",
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_RING) ? "RING," : "",
+	      (prim & RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA) ? "RGBA," : "BGRA, ",
+	      (prim & RADEON_CP_VC_CNTL_MAOS_ENABLE) ? "MAOS," : "",
+	      (prim & RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE) ? "RADEON," : "",
+	      (prim & RADEON_CP_VC_CNTL_TCL_ENABLE) ? "TCL," : "");
+
+   if ((prim & 0xf) > RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST) {
+      fprintf(stderr, "   *** Bad primitive: %x\n", prim & 0xf);
+      return -1;
+   }
+
+   numverts = prim>>16;
+   
+   if (NORMAL)
+      fprintf(stderr, "   prim: %s numverts %d\n", primname[prim&0xf], numverts);
+
+   switch (prim & 0xf) {
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_NONE:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_POINT:
+      if (numverts < 1) {
+	 fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE:
+      if ((numverts & 1) || numverts == 0) {
+	 fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP:
+      if (numverts < 2) {
+	 fprintf(stderr, "Bad nr verts for line_strip %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_POINT_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST:
+      if (numverts % 3 || numverts == 0) {
+	 fprintf(stderr, "Bad nr verts for tri %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP:
+      if (numverts < 3) {
+	 fprintf(stderr, "Bad nr verts for strip/fan %d\n", numverts);
+	 return -1;
+      }
+      break;
+   default:
+      fprintf(stderr, "Bad primitive\n");
+      return -1;
+   }	
+   return 0;
+}
+
+/* build in knowledge about each packet type
+ */
+static int radeon_emit_packet3( drmRadeonCmdBuffer *cmdbuf )
+{
+   int cmdsz;
+   int *cmd = (int *)cmdbuf->buf;
+   int *tmp;
+   int i, stride, size, start;
+
+   cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+
+   if ((cmd[0] & RADEON_CP_PACKET_MASK) != RADEON_CP_PACKET3 ||
+       cmdsz * 4 > cmdbuf->bufsz ||
+       cmdsz > RADEON_CP_PACKET_MAX_DWORDS) {
+      fprintf(stderr, "Bad packet\n");
+      return -EINVAL;
+   }
+
+   switch( cmd[0] & ~RADEON_CP_PACKET_COUNT_MASK ) {
+   case RADEON_CP_PACKET3_NOP:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_NOP, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_NEXT_CHAR:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_NEXT_CHAR, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_PLY_NEXTSCAN:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_PLY_NEXTSCAN, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_SET_SCISSORS:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_SET_SCISSORS, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_RNDR_GEN_INDX_PRIM, %d dwords\n",
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_LOAD_MICROCODE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_LOAD_MICROCODE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_WAIT_FOR_IDLE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_WAIT_FOR_IDLE, %d dwords\n", cmdsz);
+      break;
+
+   case RADEON_CP_PACKET3_3D_DRAW_VBUF:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_VBUF, %d dwords\n", cmdsz);
+      print_vertex_format(cmd[1]);
+      print_prim_and_flags(cmd[2]);
+      break;
+
+   case RADEON_CP_PACKET3_3D_DRAW_IMMD:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_IMMD, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_DRAW_INDX: {
+      int neltdwords;
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_INDX, %d dwords\n", cmdsz);
+      print_vertex_format(cmd[1]);
+      print_prim_and_flags(cmd[2]);
+      neltdwords = cmd[2]>>16;
+      neltdwords += neltdwords & 1;
+      neltdwords /= 2;
+      if (neltdwords + 3 != cmdsz)
+	 fprintf(stderr, "Mismatch in DRAW_INDX, %d vs cmdsz %d\n",
+		 neltdwords, cmdsz);
+      break;
+   }
+   case RADEON_CP_PACKET3_LOAD_PALETTE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_LOAD_PALETTE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_LOAD_VBPNTR:
+      if (NORMAL) {
+	 fprintf(stderr, "PACKET3_3D_LOAD_VBPNTR, %d dwords\n", cmdsz);
+	 fprintf(stderr, "   nr arrays: %d\n", cmd[1]);
+      }
+
+      if (cmd[1]/2 + cmd[1]%2 != cmdsz - 3) {
+	 fprintf(stderr, "  ****** MISMATCH %d/%d *******\n",
+		 cmd[1]/2 + cmd[1]%2 + 3, cmdsz);
+	 return -EINVAL;
+      }
+
+      if (NORMAL) {
+	 tmp = cmd+2;
+	 for (i = 0 ; i < cmd[1] ; i++) {
+	    if (i & 1) {
+	       stride = (tmp[0]>>24) & 0xff;
+	       size = (tmp[0]>>16) & 0xff;
+	       start = tmp[2];
+	       tmp += 3;
+	    }
+	    else {
+	       stride = (tmp[0]>>8) & 0xff;
+	       size = (tmp[0]) & 0xff;
+	       start = tmp[1];
+	    }
+	    fprintf(stderr, "   array %d: start 0x%x vsize %d vstride %d\n",
+		    i, start, size, stride );
+	 }
+      }
+      break;
+   case RADEON_CP_PACKET3_CNTL_PAINT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_PAINT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_BITBLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_BITBLT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_SMALLTEXT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_SMALLTEXT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_HOSTDATA_BLT, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_POLYLINE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_POLYLINE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_POLYSCANLINES:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_POLYSCANLINES, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_PAINT_MULTI:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_PAINT_MULTI, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_BITBLT_MULTI:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_BITBLT_MULTI, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_TRANS_BITBLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_TRANS_BITBLT, %d dwords\n", 
+	      cmdsz);
+      break;
+   default:
+      fprintf(stderr, "UNKNOWN PACKET, %d dwords\n", cmdsz);
+      break;
+   }
+      
+   cmdbuf->buf += cmdsz * 4;
+   cmdbuf->bufsz -= cmdsz * 4;
+   return 0;
+}
+
+
+/* Check cliprects for bounds, then pass on to above:
+ */
+static int radeon_emit_packet3_cliprect( drmRadeonCmdBuffer *cmdbuf )
+{   
+   XF86DRIClipRectRec *boxes = (XF86DRIClipRectRec *)cmdbuf->boxes;
+   int i = 0;
+
+   if (VERBOSE && total_changed) {
+      dump_state();
+      total_changed = 0;
+   }
+   else fprintf(stderr, "total_changed zero\n");
+
+   if (NORMAL) {
+      do {
+	 if ( i < cmdbuf->nbox ) {
+	    fprintf(stderr, "Emit box %d/%d %d,%d %d,%d\n",
+		    i, cmdbuf->nbox,
+		    boxes[i].x1, boxes[i].y1, boxes[i].x2, boxes[i].y2);
+	 }
+      } while ( ++i < cmdbuf->nbox );
+   }
+
+   if (cmdbuf->nbox == 1)
+      cmdbuf->nbox = 0;
+
+   return radeon_emit_packet3( cmdbuf );
+}
+
+
+int radeonSanityCmdBuffer( radeonContextPtr rmesa,
+			   int nbox,
+			   XF86DRIClipRectRec *boxes )
+{
+   int idx;
+   drmRadeonCmdBuffer cmdbuf;
+   drmRadeonCmdHeader header;
+   static int inited = 0;
+
+   if (!inited) {
+      init_regs();
+      inited = 1;
+   }
+
+   cmdbuf.buf = rmesa->store.cmd_buf;
+   cmdbuf.bufsz = rmesa->store.cmd_used;
+   cmdbuf.boxes = (drmClipRect *)boxes;
+   cmdbuf.nbox = nbox;
+
+   while ( cmdbuf.bufsz >= sizeof(header) ) {
+		
+      header.i = *(int *)cmdbuf.buf;
+      cmdbuf.buf += sizeof(header);
+      cmdbuf.bufsz -= sizeof(header);
+
+      switch (header.header.cmd_type) {
+      case RADEON_CMD_PACKET: 
+	 if (radeon_emit_packets( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packets failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_SCALARS:
+	 if (radeon_emit_scalars( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_scalars failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_SCALARS2:
+	 if (radeon_emit_scalars2( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_scalars failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_VECTORS:
+	 if (radeon_emit_vectors( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_vectors failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_DMA_DISCARD:
+	 idx = header.dma.buf_idx;
+	 if (NORMAL)
+	    fprintf(stderr, "RADEON_CMD_DMA_DISCARD buf %d\n", idx);
+	 bufs++;
+	 break;
+
+      case RADEON_CMD_PACKET3:
+	 if (radeon_emit_packet3( &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packet3 failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_PACKET3_CLIP:
+	 if (radeon_emit_packet3_cliprect( &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packet3_clip failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_WAIT:
+	 break;
+
+      default:
+	 fprintf(stderr,"bad cmd_type %d at %p\n", 
+		   header.header.cmd_type,
+		   cmdbuf.buf - sizeof(header));
+	 return -EINVAL;
+      }
+   }
+
+   if (0)
+   {
+      static int n = 0;
+      n++;
+      if (n == 10) {
+	 fprintf(stderr, "Bufs %d Total emitted %d real changes %d (%.2f%%)\n",
+		 bufs,
+		 total, total_changed, 
+		 ((float)total_changed/(float)total*100.0));
+	 fprintf(stderr, "Total emitted per buf: %.2f\n",
+		 (float)total/(float)bufs);
+	 fprintf(stderr, "Real changes per buf: %.2f\n",
+		 (float)total_changed/(float)bufs);
+
+	 bufs = n = total = total_changed = 0;
+      }
+   }
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.h b/src/mesa/drivers/dri/radeon/radeon_sanity.h
new file mode 100644
index 0000000000..58e8335dd6
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_sanity.h
@@ -0,0 +1,8 @@
+#ifndef RADEON_SANITY_H
+#define RADEON_SANITY_H
+
+extern int radeonSanityCmdBuffer( radeonContextPtr rmesa,
+				  int nbox,
+				  XF86DRIClipRectRec *boxes );
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
new file mode 100644
index 0000000000..ec8ed42d64
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -0,0 +1,421 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c,v 1.6 2002/12/16 16:18:58 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#include "glheader.h"
+#include "imports.h"
+
+#include "radeon_context.h"
+#include "radeon_screen.h"
+#include "radeon_macros.h"
+
+#include "utils.h"
+#include "context.h"
+#include "vblank.h"
+
+#ifndef _SOLO
+#include "glxextensions.h"
+#endif 
+
+#if 1
+/* Including xf86PciInfo.h introduces a bunch of errors...
+ */
+#define PCI_CHIP_RADEON_QD	0x5144
+#define PCI_CHIP_RADEON_QE	0x5145
+#define PCI_CHIP_RADEON_QF	0x5146
+#define PCI_CHIP_RADEON_QG	0x5147
+
+#define PCI_CHIP_RADEON_QY	0x5159
+#define PCI_CHIP_RADEON_QZ	0x515A
+
+#define PCI_CHIP_RADEON_LW	0x4C57 /* mobility 7 - has tcl */
+
+#define PCI_CHIP_RADEON_LY	0x4C59
+#define PCI_CHIP_RADEON_LZ	0x4C5A
+
+#define PCI_CHIP_RV200_QW	0x5157 /* Radeon 7500 - not an R200 at all */
+#endif
+
+static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo );
+
+/* Create the device specific screen private data struct.
+ */
+radeonScreenPtr radeonCreateScreen( __DRIscreenPrivate *sPriv )
+{
+   radeonScreenPtr screen;
+   RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
+
+   if ( ! driCheckDriDdxDrmVersions( sPriv, "Radeon", 4, 0, 4, 0, 1, 3 ) )
+      return NULL;
+
+   /* Allocate the private area */
+   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
+   if ( !screen ) {
+      __driUtilMessage("%s: Could not allocate memory for screen structure",
+		       __FUNCTION__);
+      return NULL;
+   }
+
+   if ( sPriv->drmMinor < 3 ||
+        getenv("RADEON_COMPAT")) {
+	   fprintf( stderr, "Radeon DRI driver:\n\t"
+		    "Compatibility mode for DRM driver version %d.%d.%d\n\t"
+		    "TCL will be disabled, expect reduced performance\n\t"
+		    "(prefer DRM radeon.o 1.3.x or newer)\n\t", 
+		    sPriv->drmMajor, sPriv->drmMinor, sPriv->drmPatch ); 
+   }
+
+
+   /* This is first since which regions we map depends on whether or
+    * not we are using a PCI card.
+    */
+   screen->IsPCI = dri_priv->IsPCI;
+
+   if (sPriv->drmMinor >= 3) {
+      int ret;
+      drmRadeonGetParam gp;
+
+      gp.param = RADEON_PARAM_AGP_BUFFER_OFFSET;
+      gp.value = &screen->agp_buffer_offset;
+
+      ret = drmCommandWriteRead( sPriv->fd, DRM_RADEON_GETPARAM,
+				 &gp, sizeof(gp));
+      if (ret) {
+	 fprintf(stderr, "drmRadeonGetParam (RADEON_PARAM_AGP_BUFFER_OFFSET): %d\n", ret);
+	 return NULL;
+      }
+
+      if (sPriv->drmMinor >= 6) {
+	 gp.param = RADEON_PARAM_IRQ_NR;
+	 gp.value = &screen->irq;
+
+	 ret = drmCommandWriteRead( sPriv->fd, DRM_RADEON_GETPARAM,
+				    &gp, sizeof(gp));
+	 if (ret) {
+	    FREE( screen );
+	    fprintf(stderr, "drmRadeonGetParam (RADEON_PARAM_IRQ_NR): %d\n", ret);
+	    return NULL;
+	 }
+      }
+   }
+
+   screen->mmio.handle = dri_priv->registerHandle;
+   screen->mmio.size   = dri_priv->registerSize;
+   if ( drmMap( sPriv->fd,
+		screen->mmio.handle,
+		screen->mmio.size,
+		&screen->mmio.map ) ) {
+      FREE( screen );
+      __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
+      return NULL;
+   }
+
+   screen->status.handle = dri_priv->statusHandle;
+   screen->status.size   = dri_priv->statusSize;
+   if ( drmMap( sPriv->fd,
+		screen->status.handle,
+		screen->status.size,
+		&screen->status.map ) ) {
+      drmUnmap( screen->mmio.map, screen->mmio.size );
+      FREE( screen );
+      __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
+      return NULL;
+   }
+   screen->scratch = (__volatile__ CARD32 *)
+      ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
+
+   screen->buffers = drmMapBufs( sPriv->fd );
+   if ( !screen->buffers ) {
+      drmUnmap( screen->status.map, screen->status.size );
+      drmUnmap( screen->mmio.map, screen->mmio.size );
+      FREE( screen );
+      __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
+      return NULL;
+   }
+
+   if ( !screen->IsPCI ) {
+      screen->agpTextures.handle = dri_priv->agpTexHandle;
+      screen->agpTextures.size   = dri_priv->agpTexMapSize;
+      if ( drmMap( sPriv->fd,
+		   screen->agpTextures.handle,
+		   screen->agpTextures.size,
+		   (drmAddressPtr)&screen->agpTextures.map ) ) {
+	 drmUnmapBufs( screen->buffers );
+	 drmUnmap( screen->status.map, screen->status.size );
+	 drmUnmap( screen->mmio.map, screen->mmio.size );
+	 FREE( screen );
+         __driUtilMessage("%s: IsPCI failed\n", __FUNCTION__);
+	 return NULL;
+      }
+   }
+
+   screen->chipset = 0;
+   switch ( dri_priv->deviceID ) {
+   default:
+      fprintf(stderr, "unknown chip id, assuming full radeon support\n");
+   case PCI_CHIP_RADEON_QD:
+   case PCI_CHIP_RADEON_QE:
+   case PCI_CHIP_RADEON_QF:
+   case PCI_CHIP_RADEON_QG:
+   case PCI_CHIP_RV200_QW:
+   case PCI_CHIP_RADEON_LW:
+      screen->chipset |= RADEON_CHIPSET_TCL;
+   case PCI_CHIP_RADEON_QY:
+   case PCI_CHIP_RADEON_QZ:
+   case PCI_CHIP_RADEON_LY:
+   case PCI_CHIP_RADEON_LZ:
+      break;
+   }
+
+   screen->cpp = dri_priv->bpp / 8;
+   screen->AGPMode = dri_priv->AGPMode;
+
+   screen->frontOffset	= dri_priv->frontOffset;
+   screen->frontPitch	= dri_priv->frontPitch;
+   screen->backOffset	= dri_priv->backOffset;
+   screen->backPitch	= dri_priv->backPitch;
+   screen->depthOffset	= dri_priv->depthOffset;
+   screen->depthPitch	= dri_priv->depthPitch;
+
+   screen->texOffset[RADEON_CARD_HEAP] = dri_priv->textureOffset;
+   screen->texSize[RADEON_CARD_HEAP] = dri_priv->textureSize;
+   screen->logTexGranularity[RADEON_CARD_HEAP] =
+      dri_priv->log2TexGran;
+
+   if ( screen->IsPCI 
+	|| getenv( "RADEON_AGPTEXTURING_FORCE_DISABLE" ) ) {
+      screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1;
+      screen->texOffset[RADEON_AGP_HEAP] = 0;
+      screen->texSize[RADEON_AGP_HEAP] = 0;
+      screen->logTexGranularity[RADEON_AGP_HEAP] = 0;
+   } else {
+      screen->numTexHeaps = RADEON_NR_TEX_HEAPS;
+      screen->texOffset[RADEON_AGP_HEAP] =
+	 dri_priv->agpTexOffset + RADEON_AGP_TEX_OFFSET;
+      screen->texSize[RADEON_AGP_HEAP] = dri_priv->agpTexMapSize;
+      screen->logTexGranularity[RADEON_AGP_HEAP] =
+	 dri_priv->log2AGPTexGran;
+   }
+
+   screen->driScreen = sPriv;
+   screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
+   return screen;
+}
+
+/* Destroy the device specific screen private data struct.
+ */
+void radeonDestroyScreen( __DRIscreenPrivate *sPriv )
+{
+   radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
+
+   if (!screen)
+      return;
+
+   if ( !screen->IsPCI ) {
+      drmUnmap( screen->agpTextures.map,
+		screen->agpTextures.size );
+   }
+   drmUnmapBufs( screen->buffers );
+   drmUnmap( screen->status.map, screen->status.size );
+   drmUnmap( screen->mmio.map, screen->mmio.size );
+
+   FREE( screen );
+   sPriv->private = NULL;
+}
+
+
+/* Initialize the driver specific screen private data.
+ */
+static GLboolean
+radeonInitDriver( __DRIscreenPrivate *sPriv )
+{
+   sPriv->private = (void *) radeonCreateScreen( sPriv );
+   if ( !sPriv->private ) {
+      radeonDestroyScreen( sPriv );
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+
+/* Create and initialize the Mesa and driver specific pixmap buffer
+ * data.
+ */
+static GLboolean
+radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
+                    __DRIdrawablePrivate *driDrawPriv,
+                    const __GLcontextModes *mesaVis,
+                    GLboolean isPixmap )
+{
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   }
+   else {
+      const GLboolean swDepth = GL_FALSE;
+      const GLboolean swAlpha = GL_FALSE;
+      const GLboolean swAccum = mesaVis->accumRedBits > 0;
+      const GLboolean swStencil = mesaVis->stencilBits > 0 &&
+         mesaVis->depthBits != 24;
+      driDrawPriv->driverPrivate = (void *)
+         _mesa_create_framebuffer( mesaVis,
+                                   swDepth,
+                                   swStencil,
+                                   swAccum,
+                                   swAlpha );
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+
+static void
+radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
+{
+   _mesa_destroy_framebuffer((GLframebuffer *) (driDrawPriv->driverPrivate));
+}
+
+
+
+
+/* Fullscreen mode isn't used for much -- could be a way to shrink
+ * front/back buffers & get more texture memory if the client has
+ * changed the video resolution.
+ * 
+ * Pageflipping is now done automatically whenever there is a single
+ * 3d client.
+ */
+static GLboolean
+radeonOpenCloseFullScreen( __DRIcontextPrivate *driContextPriv )
+{
+   return GL_TRUE;
+}
+
+static struct __DriverAPIRec radeonAPI = {
+   .InitDriver      = radeonInitDriver,
+   .DestroyScreen   = radeonDestroyScreen,
+   .CreateContext   = radeonCreateContext,
+   .DestroyContext  = radeonDestroyContext,
+   .CreateBuffer    = radeonCreateBuffer,
+   .DestroyBuffer   = radeonDestroyBuffer,
+   .SwapBuffers     = radeonSwapBuffers,
+   .MakeCurrent     = radeonMakeCurrent,
+   .UnbindContext   = radeonUnbindContext,
+   .OpenFullScreen  = radeonOpenCloseFullScreen,
+   .CloseFullScreen = radeonOpenCloseFullScreen,
+   .GetSwapInfo     = getSwapInfo,
+   .GetMSC          = driGetMSC32,
+   .WaitForMSC      = driWaitForMSC32,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL
+};
+
+
+
+/*
+ * This is the bootstrap function for the driver.
+ * The __driCreateScreen name is the symbol that libGL.so fetches.
+ * Return:  pointer to a __DRIscreenPrivate.
+ */
+#ifndef _SOLO
+void *__driCreateScreen(Display *dpy, int scrn, __DRIscreen *psc,
+                        int numConfigs, __GLXvisualConfig *config)
+{
+   __DRIscreenPrivate *psp;
+   psp = __driUtilCreateScreen(dpy, scrn, psc, numConfigs, config, &radeonAPI);
+   return (void *) psp;
+}
+#else
+void *__driCreateScreen(struct DRIDriverRec *driver,
+                        struct DRIDriverContextRec *driverContext)
+{
+   __DRIscreenPrivate *psp;
+   psp = __driUtilCreateScreen(driver, driverContext, &radeonAPI);
+   return (void *) psp;
+}
+#endif
+
+
+#ifndef _SOLO
+/* This function is called by libGL.so as soon as libGL.so is loaded.
+ * This is where we'd register new extension functions with the dispatcher.
+ */
+void
+__driRegisterExtensions( void )
+{
+   PFNGLXENABLEEXTENSIONPROC glx_enable_extension;
+
+
+   if ( driCompareGLXAPIVersion( 20030317 ) >= 0 ) {
+      glx_enable_extension = (PFNGLXENABLEEXTENSIONPROC)
+	  glXGetProcAddress( "__glXEnableExtension" );
+
+      if ( glx_enable_extension != NULL ) {
+	 glx_enable_extension( "GLX_SGI_swap_control", GL_FALSE );
+	 glx_enable_extension( "GLX_SGI_video_sync", GL_FALSE );
+	 glx_enable_extension( "GLX_MESA_swap_control", GL_FALSE );
+	 glx_enable_extension( "GLX_MESA_swap_frame_usage", GL_FALSE );
+      }
+   }
+}
+#endif
+
+
+/**
+ * Get information about previous buffer swaps.
+ */
+static int
+getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
+{
+   radeonContextPtr  rmesa;
+
+   if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
+	|| (dPriv->driContextPriv->driverPrivate == NULL)
+	|| (sInfo == NULL) ) {
+      return -1;
+   }
+
+   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+   sInfo->swap_count = rmesa->swap_count;
+   sInfo->swap_ust = rmesa->swap_ust;
+   sInfo->swap_missed_count = rmesa->swap_missed_count;
+
+   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
+       ? driCalculateSwapUsage( dPriv, 0, rmesa->swap_missed_ust )
+       : 0.0;
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
new file mode 100644
index 0000000000..2c69d8657a
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -0,0 +1,101 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h,v 1.5 2002/12/16 16:18:58 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifndef __RADEON_SCREEN_H__
+#define __RADEON_SCREEN_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+/*
+ * IMPORTS: these headers contain all the DRI, X and kernel-related
+ * definitions that we need.
+ */
+#include "dri_util.h"
+#include "radeon_common.h"
+#include "radeon_dri.h"
+#include "radeon_reg.h"
+#include "radeon_sarea.h"
+
+
+typedef struct {
+   drmHandle handle;			/* Handle to the DRM region */
+   drmSize size;			/* Size of the DRM region */
+   drmAddress map;			/* Mapping of the DRM region */
+} radeonRegionRec, *radeonRegionPtr;
+
+/* chipset features */
+#define RADEON_CHIPSET_TCL	(1 << 0)
+
+typedef struct {
+
+   int chipset;
+   int cpp;
+   int IsPCI;				/* Current card is a PCI card */
+   int AGPMode;
+   unsigned int irq;			/* IRQ number (0 means none) */
+
+   unsigned int frontOffset;
+   unsigned int frontPitch;
+   unsigned int backOffset;
+   unsigned int backPitch;
+
+   unsigned int depthOffset;
+   unsigned int depthPitch;
+
+    /* Shared texture data */
+   int numTexHeaps;
+   int texOffset[RADEON_NR_TEX_HEAPS];
+   int texSize[RADEON_NR_TEX_HEAPS];
+   int logTexGranularity[RADEON_NR_TEX_HEAPS];
+
+   radeonRegionRec mmio;
+   radeonRegionRec status;
+   radeonRegionRec agpTextures;
+
+   drmBufMapPtr buffers;
+
+   __volatile__ CARD32 *scratch;
+
+   __DRIscreenPrivate *driScreen;
+   unsigned int sarea_priv_offset;
+   unsigned int agp_buffer_offset;	/* offset in card memory space */
+} radeonScreenRec, *radeonScreenPtr;
+
+extern radeonScreenPtr radeonCreateScreen( __DRIscreenPrivate *sPriv );
+extern void radeonDestroyScreen( __DRIscreenPrivate *sPriv );
+
+#endif
+#endif /* __RADEON_SCREEN_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
new file mode 100644
index 0000000000..029d7cd8ee
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -0,0 +1,415 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_span.c,v 1.6 2002/10/30 12:51:56 alanh Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "glheader.h"
+#include "swrast/swrast.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_span.h"
+#include "radeon_tex.h"
+
+#define DBG 0
+
+#define LOCAL_VARS							\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
+   radeonScreenPtr radeonScreen = rmesa->radeonScreen;			\
+   __DRIscreenPrivate *sPriv = rmesa->dri.screen;			\
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;			\
+   GLuint pitch = radeonScreen->frontPitch * radeonScreen->cpp;		\
+   GLuint height = dPriv->h;						\
+   char *buf = (char *)(sPriv->pFB +					\
+			rmesa->state.color.drawOffset +			\
+			(dPriv->x * radeonScreen->cpp) +		\
+			(dPriv->y * pitch));				\
+   char *read_buf = (char *)(sPriv->pFB +				\
+			     rmesa->state.pixel.readOffset +		\
+			     (dPriv->x * radeonScreen->cpp) +		\
+			     (dPriv->y * pitch));			\
+   GLuint p;								\
+   (void) read_buf; (void) buf; (void) p
+
+#define LOCAL_DEPTH_VARS						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
+   radeonScreenPtr radeonScreen = rmesa->radeonScreen;			\
+   __DRIscreenPrivate *sPriv = rmesa->dri.screen;			\
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;			\
+   GLuint height = dPriv->h;						\
+   GLuint xo = dPriv->x;						\
+   GLuint yo = dPriv->y;						\
+   char *buf = (char *)(sPriv->pFB + radeonScreen->depthOffset);	\
+   (void) buf
+
+#define LOCAL_STENCIL_VARS	LOCAL_DEPTH_VARS
+
+
+#define CLIPPIXEL( _x, _y )						\
+   ((_x >= minx) && (_x < maxx) && (_y >= miny) && (_y < maxy))
+
+
+#define CLIPSPAN( _x, _y, _n, _x1, _n1, _i )				\
+   if ( _y < miny || _y >= maxy ) {					\
+      _n1 = 0, _x1 = x;							\
+   } else {								\
+      _n1 = _n;								\
+      _x1 = _x;								\
+      if ( _x1 < minx ) _i += (minx-_x1), n1 -= (minx-_x1), _x1 = minx; \
+      if ( _x1 + _n1 >= maxx ) n1 -= (_x1 + n1 - maxx);		        \
+   }
+
+#define Y_FLIP( _y )		(height - _y - 1)
+
+
+#define HW_LOCK() 
+
+#define HW_CLIPLOOP()							\
+   do {									\
+      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;		\
+      int _nc = dPriv->numClipRects;					\
+									\
+      while ( _nc-- ) {							\
+	 int minx = dPriv->pClipRects[_nc].x1 - dPriv->x;		\
+	 int miny = dPriv->pClipRects[_nc].y1 - dPriv->y;		\
+	 int maxx = dPriv->pClipRects[_nc].x2 - dPriv->x;		\
+	 int maxy = dPriv->pClipRects[_nc].y2 - dPriv->y;
+
+#define HW_ENDCLIPLOOP()						\
+      }									\
+   } while (0)
+
+#define HW_UNLOCK()							
+
+
+
+/* ================================================================
+ * Color buffer
+ */
+
+/* 16 bit, RGB565 color spanline and pixel functions
+ */
+#define INIT_MONO_PIXEL(p, color) \
+  p = PACK_COLOR_565( color[0], color[1], color[2] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   *(GLushort *)(buf + _x*2 + _y*pitch) = ((((int)r & 0xf8) << 8) |	\
+					   (((int)g & 0xfc) << 3) |	\
+					   (((int)b & 0xf8) >> 3))
+
+#define WRITE_PIXEL( _x, _y, p )					\
+   *(GLushort *)(buf + _x*2 + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )					\
+   do {									\
+      GLushort p = *(GLushort *)(read_buf + _x*2 + _y*pitch);		\
+      rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8;				\
+      rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc;				\
+      rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;				\
+      rgba[3] = 0xff;							\
+   } while (0)
+
+#define TAG(x) radeon##x##_RGB565
+#include "spantmp.h"
+
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p, color) \
+  p = PACK_COLOR_8888( color[3], color[0], color[1], color[2] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )			\
+do {								\
+   *(GLuint *)(buf + _x*4 + _y*pitch) = ((b <<  0) |		\
+					 (g <<  8) |		\
+					 (r << 16) |		\
+					 (a << 24) );		\
+} while (0)
+
+#define WRITE_PIXEL( _x, _y, p ) 			\
+do {							\
+   *(GLuint *)(buf + _x*4 + _y*pitch) = p;		\
+} while (0)
+
+#define READ_RGBA( rgba, _x, _y )				\
+do {								\
+   volatile GLuint *ptr = (volatile GLuint *)(read_buf + _x*4 + _y*pitch); \
+   GLuint p = *ptr;					\
+   rgba[0] = (p >> 16) & 0xff;					\
+   rgba[1] = (p >>  8) & 0xff;					\
+   rgba[2] = (p >>  0) & 0xff;					\
+   rgba[3] = (p >> 24) & 0xff;					\
+} while (0)
+
+#define TAG(x) radeon##x##_ARGB8888
+#include "spantmp.h"
+
+
+
+/* ================================================================
+ * Depth buffer
+ */
+
+/* The Radeon family has depth tiling on all the time, so we have to convert
+ * the x,y coordinates into the memory bus address (mba) in the same
+ * manner as the engine.  In each case, the linear block address (ba)
+ * is calculated, and then wired with x and y to produce the final
+ * memory address.
+ */
+
+static GLuint radeon_mba_z32( radeonContextPtr rmesa,
+				       GLint x, GLint y )
+{
+   GLuint pitch = rmesa->radeonScreen->frontPitch;
+   GLuint ba, address = 0;			/* a[0..1] = 0           */
+
+   ba = (y / 16) * (pitch / 16) + (x / 16);
+
+   address |= (x & 0x7) << 2;			/* a[2..4] = x[0..2]     */
+   address |= (y & 0x3) << 5;			/* a[5..6] = y[0..1]     */
+   address |=
+      (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
+   address |= (ba & 0x3) << 8;			/* a[8..9] = ba[0..1]    */
+
+   address |= (y & 0x8) << 7;			/* a[10]   = y[3]        */
+   address |=
+      (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
+   address |= (ba & ~0x3) << 10;		/* a[12..] = ba[2..]     */
+
+   return address;
+}
+
+static __inline GLuint radeon_mba_z16( radeonContextPtr rmesa, GLint x, GLint y )
+{
+   GLuint pitch = rmesa->radeonScreen->frontPitch;
+   GLuint ba, address = 0;			/* a[0]    = 0           */
+
+   ba = (y / 16) * (pitch / 32) + (x / 32);
+
+   address |= (x & 0x7) << 1;			/* a[1..3] = x[0..2]     */
+   address |= (y & 0x7) << 4;			/* a[4..6] = y[0..2]     */
+   address |= (x & 0x8) << 4;			/* a[7]    = x[3]        */
+   address |= (ba & 0x3) << 8;			/* a[8..9] = ba[0..1]    */
+   address |= (y & 0x8) << 7;			/* a[10]   = y[3]        */
+   address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
+   address |= (ba & ~0x3) << 10;		/* a[12..] = ba[2..]     */
+
+   return address;
+}
+
+
+/* 16-bit depth buffer functions
+ */
+#define WRITE_DEPTH( _x, _y, d )					\
+   *(GLushort *)(buf + radeon_mba_z16( rmesa, _x + xo, _y + yo )) = d;
+
+#define READ_DEPTH( d, _x, _y )						\
+   d = *(GLushort *)(buf + radeon_mba_z16( rmesa, _x + xo, _y + yo ));
+
+#define TAG(x) radeon##x##_16
+#include "depthtmp.h"
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ */
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint offset = radeon_mba_z32( rmesa, _x + xo, _y + yo );		\
+   GLuint tmp = *(GLuint *)(buf + offset);				\
+   tmp &= 0xff000000;							\
+   tmp |= ((d) & 0x00ffffff);						\
+   *(GLuint *)(buf + offset) = tmp;					\
+} while (0)
+
+#define READ_DEPTH( d, _x, _y )						\
+   d = *(GLuint *)(buf + radeon_mba_z32( rmesa, _x + xo,		\
+					 _y + yo )) & 0x00ffffff;
+
+#define TAG(x) radeon##x##_24_8
+#include "depthtmp.h"
+
+
+/* ================================================================
+ * Stencil buffer
+ */
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ */
+#define WRITE_STENCIL( _x, _y, d )					\
+do {									\
+   GLuint offset = radeon_mba_z32( rmesa, _x + xo, _y + yo );		\
+   GLuint tmp = *(GLuint *)(buf + offset);				\
+   tmp &= 0x00ffffff;							\
+   tmp |= (((d) & 0xff) << 24);						\
+   *(GLuint *)(buf + offset) = tmp;					\
+} while (0)
+
+#define READ_STENCIL( d, _x, _y )					\
+do {									\
+   GLuint offset = radeon_mba_z32( rmesa, _x + xo, _y + yo );		\
+   GLuint tmp = *(GLuint *)(buf + offset);				\
+   tmp &= 0xff000000;							\
+   d = tmp >> 24;							\
+} while (0)
+
+#define TAG(x) radeon##x##_24_8
+#include "stenciltmp.h"
+
+
+/*
+ * This function is called to specify which buffer to read and write
+ * for software rasterization (swrast) fallbacks.  This doesn't necessarily
+ * correspond to glDrawBuffer() or glReadBuffer() calls.
+ */
+static void radeonSetBuffer( GLcontext *ctx,
+                             GLframebuffer *colorBuffer,
+                             GLuint bufferBit )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   switch ( bufferBit ) {
+   case FRONT_LEFT_BIT:
+      if ( rmesa->sarea->pfCurrentPage == 1 ) {
+        rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
+        rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+      } else {
+      	rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
+      	rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+      }
+      break;
+   case BACK_LEFT_BIT:
+      if ( rmesa->sarea->pfCurrentPage == 1 ) {
+      	rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
+      	rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+      	rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+      } else {
+        rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
+        rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+        rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+      }
+      break;
+   default:
+      assert(0);
+      break;
+   }
+}
+
+/* Move locking out to get reasonable span performance (10x better
+ * than doing this in HW_LOCK above).  WaitForIdle() is the main
+ * culprit.
+ */
+
+static void radeonSpanRenderStart( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   RADEON_FIREVERTICES( rmesa );
+   LOCK_HARDWARE( rmesa );
+   radeonWaitForIdleLocked( rmesa );
+}
+
+static void radeonSpanRenderFinish( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   _swrast_flush( ctx );
+   UNLOCK_HARDWARE( rmesa );
+}
+
+void radeonInitSpanFuncs( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+
+   swdd->SetBuffer = radeonSetBuffer;
+
+   switch ( rmesa->radeonScreen->cpp ) {
+   case 2:
+      swdd->WriteRGBASpan	= radeonWriteRGBASpan_RGB565;
+      swdd->WriteRGBSpan	= radeonWriteRGBSpan_RGB565;
+      swdd->WriteMonoRGBASpan	= radeonWriteMonoRGBASpan_RGB565;
+      swdd->WriteRGBAPixels	= radeonWriteRGBAPixels_RGB565;
+      swdd->WriteMonoRGBAPixels	= radeonWriteMonoRGBAPixels_RGB565;
+      swdd->ReadRGBASpan	= radeonReadRGBASpan_RGB565;
+      swdd->ReadRGBAPixels      = radeonReadRGBAPixels_RGB565;
+      break;
+
+   case 4:
+      swdd->WriteRGBASpan	= radeonWriteRGBASpan_ARGB8888;
+      swdd->WriteRGBSpan	= radeonWriteRGBSpan_ARGB8888;
+      swdd->WriteMonoRGBASpan   = radeonWriteMonoRGBASpan_ARGB8888;
+      swdd->WriteRGBAPixels     = radeonWriteRGBAPixels_ARGB8888;
+      swdd->WriteMonoRGBAPixels = radeonWriteMonoRGBAPixels_ARGB8888;
+      swdd->ReadRGBASpan	= radeonReadRGBASpan_ARGB8888;
+      swdd->ReadRGBAPixels      = radeonReadRGBAPixels_ARGB8888;
+      break;
+
+   default:
+      break;
+   }
+
+   switch ( rmesa->glCtx->Visual.depthBits ) {
+   case 16:
+      swdd->ReadDepthSpan	= radeonReadDepthSpan_16;
+      swdd->WriteDepthSpan	= radeonWriteDepthSpan_16;
+      swdd->ReadDepthPixels	= radeonReadDepthPixels_16;
+      swdd->WriteDepthPixels	= radeonWriteDepthPixels_16;
+      break;
+
+   case 24:
+      swdd->ReadDepthSpan	= radeonReadDepthSpan_24_8;
+      swdd->WriteDepthSpan	= radeonWriteDepthSpan_24_8;
+      swdd->ReadDepthPixels	= radeonReadDepthPixels_24_8;
+      swdd->WriteDepthPixels	= radeonWriteDepthPixels_24_8;
+
+      swdd->ReadStencilSpan	= radeonReadStencilSpan_24_8;
+      swdd->WriteStencilSpan	= radeonWriteStencilSpan_24_8;
+      swdd->ReadStencilPixels	= radeonReadStencilPixels_24_8;
+      swdd->WriteStencilPixels	= radeonWriteStencilPixels_24_8;
+      break;
+
+   default:
+      break;
+   }
+
+   swdd->SpanRenderStart          = radeonSpanRenderStart;
+   swdd->SpanRenderFinish         = radeonSpanRenderFinish; 
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h
new file mode 100644
index 0000000000..011e8eff57
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_span.h
@@ -0,0 +1,45 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_span.h,v 1.2 2002/02/22 21:45:01 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifndef __RADEON_SPAN_H__
+#define __RADEON_SPAN_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+extern void radeonInitSpanFuncs( GLcontext *ctx );
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
new file mode 100644
index 0000000000..7b1bbe75fd
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -0,0 +1,2211 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state.c,v 1.8 2002/12/16 16:18:58 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "api_arrayelt.h"
+#include "enums.h"
+#include "colormac.h"
+#include "state.h"
+
+#include "swrast/swrast.h"
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "main/light.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_tcl.h"
+#include "radeon_tex.h"
+#include "radeon_swtcl.h"
+#include "radeon_vtxfmt.h"
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
+   GLubyte refByte;
+
+   CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+
+   RADEON_STATECHANGE( rmesa, ctx );
+
+   pp_misc &= ~(RADEON_ALPHA_TEST_OP_MASK | RADEON_REF_ALPHA_MASK);
+   pp_misc |= (refByte & RADEON_REF_ALPHA_MASK);
+
+   switch ( func ) {
+   case GL_NEVER:
+      pp_misc |= RADEON_ALPHA_TEST_FAIL;
+      break;
+   case GL_LESS:
+      pp_misc |= RADEON_ALPHA_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      pp_misc |= RADEON_ALPHA_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      pp_misc |= RADEON_ALPHA_TEST_PASS;
+      break;
+   }
+
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc;
+}
+
+static void radeonBlendEquation( GLcontext *ctx, GLenum mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
+   GLboolean fallback = GL_FALSE;
+
+   switch ( mode ) {
+   case GL_FUNC_ADD:
+   case GL_LOGIC_OP:
+      b |= RADEON_COMB_FCN_ADD_CLAMP;
+      break;
+
+   case GL_FUNC_SUBTRACT:
+      b |= RADEON_COMB_FCN_SUB_CLAMP;
+      break;
+
+   default:
+      if (ctx->Color.BlendEnabled)
+	 fallback = GL_TRUE;
+      else
+	 b |= RADEON_COMB_FCN_ADD_CLAMP;
+      break;
+   }
+
+   FALLBACK( rmesa, RADEON_FALLBACK_BLEND_EQ, fallback );
+   if ( !fallback ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+      if ( ctx->Color.ColorLogicOpEnabled ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
+      }
+   }
+}
+
+static void radeonBlendFunc( GLcontext *ctx, GLenum sfactor, GLenum dfactor )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
+      ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
+   GLboolean fallback = GL_FALSE;
+
+   switch ( ctx->Color.BlendSrcRGB ) {
+   case GL_ZERO:
+      b |= RADEON_SRC_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      b |= RADEON_SRC_BLEND_GL_ONE;
+      break;
+   case GL_DST_COLOR:
+      b |= RADEON_SRC_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_SRC_COLOR:
+      b |= RADEON_SRC_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_SRC_ALPHA_SATURATE:
+      b |= RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE;
+      break;
+   case GL_CONSTANT_COLOR:
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+   case GL_CONSTANT_ALPHA:
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      if (ctx->Color.BlendEnabled)
+	 fallback = GL_TRUE;
+      else
+	 b |= RADEON_SRC_BLEND_GL_ONE;
+      break;
+   default:
+      break;
+   }
+
+   switch ( ctx->Color.BlendDstRGB ) {
+   case GL_ZERO:
+      b |= RADEON_DST_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      b |= RADEON_DST_BLEND_GL_ONE;
+      break;
+   case GL_SRC_COLOR:
+      b |= RADEON_DST_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      b |= RADEON_DST_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_COLOR:
+      b |= RADEON_DST_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_DST_ALPHA:
+      b |= RADEON_DST_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_CONSTANT_COLOR:
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+   case GL_CONSTANT_ALPHA:
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      if (ctx->Color.BlendEnabled)
+	 fallback = GL_TRUE;
+      else
+	 b |= RADEON_DST_BLEND_GL_ZERO;
+      break;
+   default:
+      break;
+   }
+
+   FALLBACK( rmesa, RADEON_FALLBACK_BLEND_FUNC, fallback );
+   if ( !fallback ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+   }
+}
+
+static void radeonBlendFuncSeparate( GLcontext *ctx,
+				     GLenum sfactorRGB, GLenum dfactorRGB,
+				     GLenum sfactorA, GLenum dfactorA )
+{
+   radeonBlendFunc( ctx, sfactorRGB, dfactorRGB );
+}
+
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void radeonDepthFunc( GLcontext *ctx, GLenum func )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
+
+   switch ( ctx->Depth.Func ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_ALWAYS;
+      break;
+   }
+}
+
+
+static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, ctx );
+
+   if ( ctx->Depth.Mask ) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |=  RADEON_Z_WRITE_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_WRITE_ENABLE;
+   }
+}
+
+static void radeonClearDepth( GLcontext *ctx, GLclampd d )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
+		    RADEON_DEPTH_FORMAT_MASK);
+
+   switch ( format ) {
+   case RADEON_DEPTH_FORMAT_16BIT_INT_Z:
+      rmesa->state.depth.clear = d * 0x0000ffff;
+      break;
+   case RADEON_DEPTH_FORMAT_24BIT_INT_Z:
+      rmesa->state.depth.clear = d * 0x00ffffff;
+      break;
+   }
+}
+
+
+/* =============================================================
+ * Fog
+ */
+
+
+static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   union { int i; float f; } c, d;
+   GLchan col[4];
+
+   c.i = rmesa->hw.fog.cmd[FOG_C];
+   d.i = rmesa->hw.fog.cmd[FOG_D];
+
+   switch (pname) {
+   case GL_FOG_MODE:
+      if (!ctx->Fog.Enabled)
+	 return;
+      RADEON_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
+      switch (ctx->Fog.Mode) {
+      case GL_LINEAR:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_LINEAR;
+	 if (ctx->Fog.Start == ctx->Fog.End) {
+	    c.f = 1.0F;
+	    d.f = 1.0F;
+	 }
+	 else {
+	    c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+	    d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start);
+	 }
+	 break;
+      case GL_EXP:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP;
+	 c.f = 0.0;
+	 d.f = ctx->Fog.Density;
+	 break;
+      case GL_EXP2:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP2;
+	 c.f = 0.0;
+	 d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+	 break;
+      default:
+	 return;
+      }
+      break;
+   case GL_FOG_DENSITY:
+      switch (ctx->Fog.Mode) {
+      case GL_EXP:
+	 c.f = 0.0;
+	 d.f = ctx->Fog.Density;
+	 break;
+      case GL_EXP2:
+	 c.f = 0.0;
+	 d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+	 break;
+      default:
+	 break;
+      }
+      break;
+   case GL_FOG_START:
+   case GL_FOG_END:
+      if (ctx->Fog.Mode == GL_LINEAR) {
+	 if (ctx->Fog.Start == ctx->Fog.End) {
+	    c.f = 1.0F;
+	    d.f = 1.0F;
+	 } else {
+	    c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+	    d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start);
+	 }
+      }
+      break;
+   case GL_FOG_COLOR: 
+      RADEON_STATECHANGE( rmesa, ctx );
+      UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] =
+	 radeonPackColor( 4, col[0], col[1], col[2], 0 );
+      break;
+   case GL_FOG_COORDINATE_SOURCE_EXT: 
+      /* What to do?
+       */
+      break;
+   default:
+      return;
+   }
+
+   if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
+      RADEON_STATECHANGE( rmesa, fog );
+      rmesa->hw.fog.cmd[FOG_C] = c.i;
+      rmesa->hw.fog.cmd[FOG_D] = d.i;
+   }
+}
+
+
+/* =============================================================
+ * Scissoring
+ */
+
+
+static GLboolean intersect_rect( XF86DRIClipRectPtr out,
+				 XF86DRIClipRectPtr a,
+				 XF86DRIClipRectPtr b )
+{
+   *out = *a;
+   if ( b->x1 > out->x1 ) out->x1 = b->x1;
+   if ( b->y1 > out->y1 ) out->y1 = b->y1;
+   if ( b->x2 < out->x2 ) out->x2 = b->x2;
+   if ( b->y2 < out->y2 ) out->y2 = b->y2;
+   if ( out->x1 >= out->x2 ) return GL_FALSE;
+   if ( out->y1 >= out->y2 ) return GL_FALSE;
+   return GL_TRUE;
+}
+
+
+void radeonRecalcScissorRects( radeonContextPtr rmesa )
+{
+   XF86DRIClipRectPtr out;
+   int i;
+
+   /* Grow cliprect store?
+    */
+   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+	 rmesa->state.scissor.numAllocedClipRects += 1;	/* zero case */
+	 rmesa->state.scissor.numAllocedClipRects *= 2;
+      }
+
+      if (rmesa->state.scissor.pClipRects)
+	 FREE(rmesa->state.scissor.pClipRects);
+
+      rmesa->state.scissor.pClipRects = 
+	 MALLOC( rmesa->state.scissor.numAllocedClipRects * 
+		 sizeof(XF86DRIClipRectRec) );
+
+      if ( rmesa->state.scissor.pClipRects == NULL ) {
+	 rmesa->state.scissor.numAllocedClipRects = 0;
+	 return;
+      }
+   }
+   
+   out = rmesa->state.scissor.pClipRects;
+   rmesa->state.scissor.numClipRects = 0;
+
+   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
+      if ( intersect_rect( out, 
+			   &rmesa->pClipRects[i], 
+			   &rmesa->state.scissor.rect ) ) {
+	 rmesa->state.scissor.numClipRects++;
+	 out++;
+      }
+   }
+}
+
+
+static void radeonUpdateScissor( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if ( rmesa->dri.drawable ) {
+      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+
+      int x = ctx->Scissor.X;
+      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
+      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
+      int h = dPriv->h - ctx->Scissor.Y - 1;
+
+      rmesa->state.scissor.rect.x1 = x + dPriv->x;
+      rmesa->state.scissor.rect.y1 = y + dPriv->y;
+      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
+      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
+
+      radeonRecalcScissorRects( rmesa );
+   }
+}
+
+
+static void radeonScissor( GLcontext *ctx,
+			   GLint x, GLint y, GLsizei w, GLsizei h )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if ( ctx->Scissor.Enabled ) {
+      RADEON_FIREVERTICES( rmesa );	/* don't pipeline cliprect changes */
+      radeonUpdateScissor( ctx );
+   }
+
+}
+
+
+/* =============================================================
+ * Culling
+ */
+
+static void radeonCullFace( GLcontext *ctx, GLenum unused )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+   GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
+
+   s |= RADEON_FFACE_SOLID | RADEON_BFACE_SOLID;
+   t &= ~(RADEON_CULL_FRONT | RADEON_CULL_BACK);
+
+   if ( ctx->Polygon.CullFlag ) {
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_FRONT:
+	 s &= ~RADEON_FFACE_SOLID;
+	 t |= RADEON_CULL_FRONT;
+	 break;
+      case GL_BACK:
+	 s &= ~RADEON_BFACE_SOLID;
+	 t |= RADEON_CULL_BACK;
+	 break;
+      case GL_FRONT_AND_BACK:
+	 s &= ~(RADEON_FFACE_SOLID | RADEON_BFACE_SOLID);
+	 t |= (RADEON_CULL_FRONT | RADEON_CULL_BACK);
+	 break;
+      }
+   }
+
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      RADEON_STATECHANGE(rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+
+   if ( rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] != t ) {
+      RADEON_STATECHANGE(rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = t;
+   }
+}
+
+static void radeonFrontFace( GLcontext *ctx, GLenum mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   RADEON_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
+
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_CULL_FRONT_IS_CCW;
+
+   switch ( mode ) {
+   case GL_CW:
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CW;
+      break;
+   case GL_CCW:
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CCW;
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_CULL_FRONT_IS_CCW;
+      break;
+   }
+}
+
+
+/* =============================================================
+ * Line state
+ */
+static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   RADEON_STATECHANGE( rmesa, lin );
+   RADEON_STATECHANGE( rmesa, set );
+
+   /* Line width is stored in U6.4 format.
+    */
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (GLuint)(widthf * 16.0);
+   if ( widthf > 1.0 ) {
+      rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_WIDELINE_ENABLE;
+   } else {
+      rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_WIDELINE_ENABLE;
+   }
+}
+
+static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   RADEON_STATECHANGE( rmesa, lin );
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = 
+      ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
+}
+
+
+/* =============================================================
+ * Masks
+ */
+static void radeonColorMask( GLcontext *ctx,
+			     GLboolean r, GLboolean g,
+			     GLboolean b, GLboolean a )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint mask = radeonPackColor( rmesa->radeonScreen->cpp,
+				  ctx->Color.ColorMask[RCOMP],
+				  ctx->Color.ColorMask[GCOMP],
+				  ctx->Color.ColorMask[BCOMP],
+				  ctx->Color.ColorMask[ACOMP] );
+
+   if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
+      RADEON_STATECHANGE( rmesa, msk );
+      rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = mask;
+   }
+}
+
+
+/* =============================================================
+ * Polygon state
+ */
+
+static void radeonPolygonOffset( GLcontext *ctx,
+				 GLfloat factor, GLfloat units )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat constant = units * rmesa->state.depth.scale;
+
+   RADEON_STATECHANGE( rmesa, zbs );
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = *(GLuint *)&factor;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = *(GLuint *)&constant;
+}
+
+static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint i;
+   drmRadeonStipple stipple;
+
+   /* Must flip pattern upside down.
+    */
+   for ( i = 0 ; i < 32 ; i++ ) {
+      rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i];
+   }
+
+   /* TODO: push this into cmd mechanism
+    */
+   RADEON_FIREVERTICES( rmesa );
+   LOCK_HARDWARE( rmesa );
+
+   /* FIXME: Use window x,y offsets into stipple RAM.
+    */
+   stipple.mask = rmesa->state.stipple.mask;
+   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
+                    &stipple, sizeof(drmRadeonStipple) );
+   UNLOCK_HARDWARE( rmesa );
+}
+
+static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
+
+   /* Can't generally do unfilled via tcl, but some good special
+    * cases work. 
+    */
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag);
+   if (rmesa->TclFallback) {
+      radeonChooseRenderState( ctx );
+      radeonChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+
+/* Examine lighting and texture state to determine if separate specular
+ * should be enabled.
+ */
+static void radeonUpdateSpecular( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   CARD32 p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+
+   RADEON_STATECHANGE( rmesa, tcl );
+
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC;
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_DIFFUSE;
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE;
+
+   p &= ~RADEON_SPECULAR_ENABLE;
+
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_DIFFUSE_SPECULAR_COMBINE;
+
+
+   if (ctx->Light.Enabled &&
+       ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      p |=  RADEON_SPECULAR_ENABLE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= 
+	 ~RADEON_DIFFUSE_SPECULAR_COMBINE;
+   }
+   else if (ctx->Light.Enabled) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+   } else if (ctx->Fog.ColorSumEnabled ) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      p |= RADEON_SPECULAR_ENABLE;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+   }
+
+   if (ctx->Fog.Enabled) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+
+      /* Bizzare: have to leave lighting enabled to get fog.
+       */
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+   }
+
+   if ( ctx->_TriangleCaps & DD_SEPARATE_SPECULAR ) {
+      assert( (p & RADEON_SPECULAR_ENABLE) != 0 );
+   } else {
+      assert( (p & RADEON_SPECULAR_ENABLE) == 0 );
+   }
+
+   if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p;
+   }
+
+   /* Update vertex/render formats
+    */
+   if (rmesa->TclFallback) { 
+      radeonChooseRenderState( ctx );
+      radeonChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Materials
+ */
+
+
+/* Update on colormaterial, material emmissive/ambient, 
+ * lightmodel.globalambient
+ */
+static void update_global_ambient( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   float *fcmd = (float *)RADEON_DB_STATE( glt );
+
+   /* Need to do more if both emmissive & ambient are PREMULT:
+    */
+   if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &
+       ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+	(3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) 
+   {
+      COPY_3V( &fcmd[GLT_RED], 
+	       ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]);
+      ACC_SCALE_3V( &fcmd[GLT_RED],
+		   ctx->Light.Model.Ambient,
+		   ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]);
+   } 
+   else
+   {
+      COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
+   }
+   
+   RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
+}
+
+/* Update on change to 
+ *    - light[p].colors
+ *    - light[p].enabled
+ *    - material,
+ *    - colormaterial enabled
+ *    - colormaterial bitmask
+ */
+static void update_light_colors( GLcontext *ctx, GLuint p )
+{
+   struct gl_light *l = &ctx->Light.Light[p];
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   if (l->Enabled) {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
+      GLuint bitmask = ctx->Light.ColorMaterialBitmask;
+      GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+
+      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );	 
+      COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
+      COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
+      
+      if (!ctx->Light.ColorMaterialEnabled)
+	 bitmask = 0;
+
+      if ((bitmask & MAT_BIT_FRONT_AMBIENT) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_AMBIENT_RED], mat[MAT_ATTRIB_FRONT_AMBIENT] );
+
+      if ((bitmask & MAT_BIT_FRONT_DIFFUSE) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_DIFFUSE_RED], mat[MAT_ATTRIB_FRONT_DIFFUSE] );
+      
+      if ((bitmask & MAT_BIT_FRONT_SPECULAR) == 0) 
+	 SELF_SCALE_3V( &fcmd[LIT_SPECULAR_RED], mat[MAT_ATTRIB_FRONT_SPECULAR] );
+
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+   }
+}
+
+/* Also fallback for asym colormaterial mode in twoside lighting...
+ */
+static void check_twoside_fallback( GLcontext *ctx )
+{
+   GLboolean fallback = GL_FALSE;
+   GLint i;
+
+   if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+      if (ctx->Light.ColorMaterialEnabled &&
+	  (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != 
+	  ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1))
+	 fallback = GL_TRUE;
+      else {
+	 for (i = MAT_ATTRIB_FRONT_AMBIENT; i < MAT_ATTRIB_FRONT_INDEXES; i+=2)
+	    if (memcmp( ctx->Light.Material.Attrib[i],
+			ctx->Light.Material.Attrib[i+1],
+			sizeof(GLfloat)*4) != 0) {
+	       fallback = GL_TRUE;  
+	       break;
+	    }
+      }
+   }
+
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_LIGHT_TWOSIDE, fallback );
+}
+
+
+static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   if (ctx->Light.ColorMaterialEnabled) {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+      GLuint mask = ctx->Light.ColorMaterialBitmask;
+
+      /* Default to PREMULT:
+       */
+      light_model_ctl1 &= ~((0xf << RADEON_EMISSIVE_SOURCE_SHIFT) |
+			   (0xf << RADEON_AMBIENT_SOURCE_SHIFT) |
+			   (0xf << RADEON_DIFFUSE_SOURCE_SHIFT) |
+			   (0xf << RADEON_SPECULAR_SOURCE_SHIFT)); 
+   
+      if (mask & MAT_BIT_FRONT_EMISSION) {
+	 light_model_ctl1 |= (3 <<
+			     RADEON_EMISSIVE_SOURCE_SHIFT);
+      }
+
+      if (mask & MAT_BIT_FRONT_AMBIENT) {
+	 light_model_ctl1 |= (3 <<
+			     RADEON_AMBIENT_SOURCE_SHIFT);
+      }
+	 
+      if (mask & MAT_BIT_FRONT_DIFFUSE) {
+	 light_model_ctl1 |= (3 <<
+			     RADEON_DIFFUSE_SOURCE_SHIFT);
+      }
+   
+      if (mask & MAT_BIT_FRONT_SPECULAR) {
+	 light_model_ctl1 |= (3 <<
+			     RADEON_SPECULAR_SOURCE_SHIFT);
+      }
+   
+      if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) {
+	 GLuint p;
+
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1;      
+
+	 for (p = 0 ; p < MAX_LIGHTS; p++) 
+	    update_light_colors( ctx, p );
+	 update_global_ambient( ctx );
+      }
+   }
+   
+   check_twoside_fallback( ctx );
+}
+
+void radeonUpdateMaterial( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+   GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
+   GLuint p;
+   GLuint mask = ~0;
+   
+   if (ctx->Light.ColorMaterialEnabled)
+      mask &= ~ctx->Light.ColorMaterialBitmask;
+
+   if (RADEON_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+      
+   if (mask & MAT_BIT_FRONT_EMISSION) {
+      fcmd[MTL_EMMISSIVE_RED]   = mat[MAT_ATTRIB_FRONT_EMISSION][0];
+      fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1];
+      fcmd[MTL_EMMISSIVE_BLUE]  = mat[MAT_ATTRIB_FRONT_EMISSION][2];
+      fcmd[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_FRONT_EMISSION][3];
+   }
+   if (mask & MAT_BIT_FRONT_AMBIENT) {
+      fcmd[MTL_AMBIENT_RED]     = mat[MAT_ATTRIB_FRONT_AMBIENT][0];
+      fcmd[MTL_AMBIENT_GREEN]   = mat[MAT_ATTRIB_FRONT_AMBIENT][1];
+      fcmd[MTL_AMBIENT_BLUE]    = mat[MAT_ATTRIB_FRONT_AMBIENT][2];
+      fcmd[MTL_AMBIENT_ALPHA]   = mat[MAT_ATTRIB_FRONT_AMBIENT][3];
+   }
+   if (mask & MAT_BIT_FRONT_DIFFUSE) {
+      fcmd[MTL_DIFFUSE_RED]     = mat[MAT_ATTRIB_FRONT_DIFFUSE][0];
+      fcmd[MTL_DIFFUSE_GREEN]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][1];
+      fcmd[MTL_DIFFUSE_BLUE]    = mat[MAT_ATTRIB_FRONT_DIFFUSE][2];
+      fcmd[MTL_DIFFUSE_ALPHA]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][3];
+   }
+   if (mask & MAT_BIT_FRONT_SPECULAR) {
+      fcmd[MTL_SPECULAR_RED]    = mat[MAT_ATTRIB_FRONT_SPECULAR][0];
+      fcmd[MTL_SPECULAR_GREEN]  = mat[MAT_ATTRIB_FRONT_SPECULAR][1];
+      fcmd[MTL_SPECULAR_BLUE]   = mat[MAT_ATTRIB_FRONT_SPECULAR][2];
+      fcmd[MTL_SPECULAR_ALPHA]  = mat[MAT_ATTRIB_FRONT_SPECULAR][3];
+   }
+   if (mask & MAT_BIT_FRONT_SHININESS) {
+      fcmd[MTL_SHININESS]       = mat[MAT_ATTRIB_FRONT_SHININESS][0];
+   }
+
+   if (RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mtl )) {
+      for (p = 0 ; p < MAX_LIGHTS; p++) 
+	 update_light_colors( ctx, p );
+
+      check_twoside_fallback( ctx );
+      update_global_ambient( ctx );
+   }
+   else if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_STATE))
+      fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__);
+}
+
+/* _NEW_LIGHT
+ * _NEW_MODELVIEW
+ * _MESA_NEW_NEED_EYE_COORDS
+ *
+ * Uses derived state from mesa:
+ *       _VP_inf_norm
+ *       _h_inf_norm
+ *       _Position
+ *       _NormDirection
+ *       _ModelViewInvScale
+ *       _NeedEyeCoords
+ *       _EyeZDir
+ *
+ * which are calculated in light.c and are correct for the current
+ * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
+ * and _MESA_NEW_NEED_EYE_COORDS.  
+ */
+static void update_light( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   /* Have to check these, or have an automatic shortcircuit mechanism
+    * to remove noop statechanges. (Or just do a better job on the
+    * front end).
+    */
+   {
+      GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+
+      if (ctx->_NeedEyeCoords)
+	 tmp &= ~RADEON_LIGHT_IN_MODELSPACE;
+      else
+	 tmp |= RADEON_LIGHT_IN_MODELSPACE;
+      
+
+      /* Leave this test disabled: (unexplained q3 lockup) (even with
+         new packets)
+      */
+      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) 
+      {
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp;
+      }
+   }
+
+   {
+      GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( eye );
+      fcmd[EYE_X] = ctx->_EyeZDir[0];
+      fcmd[EYE_Y] = ctx->_EyeZDir[1];
+      fcmd[EYE_Z] = - ctx->_EyeZDir[2];
+      fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale;
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.eye );
+   }
+
+
+
+   if (ctx->Light.Enabled) {
+      GLint p;
+      for (p = 0 ; p < MAX_LIGHTS; p++) {
+	 if (ctx->Light.Light[p].Enabled) {
+	    struct gl_light *l = &ctx->Light.Light[p];
+	    GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] );
+	    
+	    if (l->EyePosition[3] == 0.0) {
+	       COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); 
+	       COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); 
+	       fcmd[LIT_POSITION_W] = 0;
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    } else {
+	       COPY_4V( &fcmd[LIT_POSITION_X], l->_Position );
+	       fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0];
+	       fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1];
+	       fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2];
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    }
+
+	    RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+	 }
+      }
+   }
+}
+
+static void radeonLightfv( GLcontext *ctx, GLenum light,
+			   GLenum pname, const GLfloat *params )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLint p = light - GL_LIGHT0;
+   struct gl_light *l = &ctx->Light.Light[p];
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+   
+
+   switch (pname) {
+   case GL_AMBIENT:		
+   case GL_DIFFUSE:
+   case GL_SPECULAR:
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_SPOT_DIRECTION: 
+      /* picked up in update_light */	
+      break;
+
+   case GL_POSITION: {
+      /* positions picked up in update_light, but can do flag here */	
+      GLuint flag;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      /* FIXME: Set RANGE_ATTEN only when needed */
+      if (p&1) 
+	 flag = RADEON_LIGHT_1_IS_LOCAL;
+      else
+	 flag = RADEON_LIGHT_0_IS_LOCAL;
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->EyePosition[3] != 0.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+
+   case GL_SPOT_EXPONENT:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_EXPONENT] = params[0];
+      break;
+
+   case GL_SPOT_CUTOFF: {
+      GLuint flag = (p&1) ? RADEON_LIGHT_1_IS_SPOT : RADEON_LIGHT_0_IS_SPOT;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff;
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->SpotCutoff != 180.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+
+      break;
+   }
+
+   case GL_CONSTANT_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_CONST] = params[0];
+      break;
+   case GL_LINEAR_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_LINEAR] = params[0];
+      break;
+   case GL_QUADRATIC_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_QUADRATIC] = params[0];
+      break;
+   default:
+      return;
+   }
+
+   /* Set RANGE_ATTEN only when needed */
+   switch (pname) {
+   case GL_POSITION:
+   case GL_LINEAR_ATTENUATION:
+   case GL_QUADRATIC_ATTENUATION:
+   {
+      GLuint flag;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      if (p&1) 
+        flag = RADEON_LIGHT_1_ENABLE_RANGE_ATTEN;
+      else
+        flag = RADEON_LIGHT_0_ENABLE_RANGE_ATTEN;
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->EyePosition[3] != 0.0F &&
+         (l->LinearAttenuation != 0.0F || l->QuadraticAttenuation != 0.0F))
+        rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+        rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+   default:
+      break;
+   }
+}
+
+		  
+
+
+static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
+				const GLfloat *param )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   switch (pname) {
+      case GL_LIGHT_MODEL_AMBIENT: 
+	 update_global_ambient( ctx );
+	 break;
+
+      case GL_LIGHT_MODEL_LOCAL_VIEWER:
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.LocalViewer)
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LOCAL_VIEWER;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LOCAL_VIEWER;
+         break;
+
+      case GL_LIGHT_MODEL_TWO_SIDE:
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.TwoSide)
+	    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_LIGHT_TWOSIDE;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_LIGHT_TWOSIDE;
+
+	 check_twoside_fallback( ctx );
+
+	 if (rmesa->TclFallback) {
+	    radeonChooseRenderState( ctx );
+	    radeonChooseVertexState( ctx );
+	 }
+         break;
+
+      case GL_LIGHT_MODEL_COLOR_CONTROL:
+	 radeonUpdateSpecular(ctx);
+         break;
+
+      default:
+         break;
+   }
+}
+
+static void radeonShadeModel( GLcontext *ctx, GLenum mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+
+   s &= ~(RADEON_DIFFUSE_SHADE_MASK |
+	  RADEON_ALPHA_SHADE_MASK |
+	  RADEON_SPECULAR_SHADE_MASK |
+	  RADEON_FOG_SHADE_MASK);
+
+   switch ( mode ) {
+   case GL_FLAT:
+      s |= (RADEON_DIFFUSE_SHADE_FLAT |
+	    RADEON_ALPHA_SHADE_FLAT |
+	    RADEON_SPECULAR_SHADE_FLAT |
+	    RADEON_FOG_SHADE_FLAT);
+      break;
+   case GL_SMOOTH:
+      s |= (RADEON_DIFFUSE_SHADE_GOURAUD |
+	    RADEON_ALPHA_SHADE_GOURAUD |
+	    RADEON_SPECULAR_SHADE_GOURAUD |
+	    RADEON_FOG_SHADE_GOURAUD);
+      break;
+   default:
+      return;
+   }
+
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+}
+
+
+/* =============================================================
+ * User clip planes
+ */
+
+static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+   GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+   RADEON_STATECHANGE( rmesa, ucp[p] );
+   rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+   rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+   rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+   rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+}
+
+static void radeonUpdateClipPlanes( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint p;
+
+   for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+      if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
+	 GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+	 RADEON_STATECHANGE( rmesa, ucp[p] );
+	 rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+	 rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+	 rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+	 rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+      }
+   }
+}
+
+
+/* =============================================================
+ * Stencil
+ */
+
+static void radeonStencilFunc( GLcontext *ctx, GLenum func,
+			       GLint ref, GLuint mask )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint refmask = ((ctx->Stencil.Ref[0] << RADEON_STENCIL_REF_SHIFT) |
+		     (ctx->Stencil.ValueMask[0] << RADEON_STENCIL_MASK_SHIFT));
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   RADEON_STATECHANGE( rmesa, msk );
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_STENCIL_TEST_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~(RADEON_STENCIL_REF_MASK|
+						   RADEON_STENCIL_VALUE_MASK);
+
+   switch ( ctx->Stencil.Function[0] ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_ALWAYS;
+      break;
+   }
+
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
+}
+
+static void radeonStencilMask( GLcontext *ctx, GLuint mask )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   RADEON_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
+      (ctx->Stencil.WriteMask[0] << RADEON_STENCIL_WRITEMASK_SHIFT);
+}
+
+static void radeonStencilOp( GLcontext *ctx, GLenum fail,
+			     GLenum zfail, GLenum zpass )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(RADEON_STENCIL_FAIL_MASK |
+					       RADEON_STENCIL_ZFAIL_MASK |
+					       RADEON_STENCIL_ZPASS_MASK);
+
+   switch ( ctx->Stencil.FailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_DEC;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INVERT;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZFailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_DEC;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INVERT;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZPassFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_DEC;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INVERT;
+      break;
+   }
+}
+
+static void radeonClearStencil( GLcontext *ctx, GLint s )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   rmesa->state.stencil.clear = 
+      ((GLuint) ctx->Stencil.Clear |
+       (0xff << RADEON_STENCIL_MASK_SHIFT) |
+       (ctx->Stencil.WriteMask[0] << RADEON_STENCIL_WRITEMASK_SHIFT));
+}
+
+
+/* =============================================================
+ * Window position and viewport transformation
+ */
+
+/*
+ * To correctly position primitives:
+ */
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+
+void radeonUpdateWindow( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   GLfloat xoffset = (GLfloat)dPriv->x;
+   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   GLfloat sx = v[MAT_SX];
+   GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
+   GLfloat sy = - v[MAT_SY];
+   GLfloat ty = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+   GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
+   GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
+   RADEON_FIREVERTICES( rmesa );
+   RADEON_STATECHANGE( rmesa, vpt );
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = *(GLuint *)&sx;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = *(GLuint *)&sy;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = *(GLuint *)&sz;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = *(GLuint *)&tz;
+}
+
+
+
+static void radeonViewport( GLcontext *ctx, GLint x, GLint y,
+			    GLsizei width, GLsizei height )
+{
+   /* Don't pipeline viewport changes, conflict with window offset
+    * setting below.  Could apply deltas to rescue pipelined viewport
+    * values, or keep the originals hanging around.
+    */
+   RADEON_FIREVERTICES( RADEON_CONTEXT(ctx) );
+   radeonUpdateWindow( ctx );
+}
+
+static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
+			      GLclampd farval )
+{
+   radeonUpdateWindow( ctx );
+}
+
+void radeonUpdateViewportOffset( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   GLfloat xoffset = (GLfloat)dPriv->x;
+   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   GLfloat tx = v[MAT_TX] + xoffset;
+   GLfloat ty = (- v[MAT_TY]) + yoffset;
+
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != *(GLuint *)&tx ||
+	rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != *(GLuint *)&ty )
+   {
+      /* Note: this should also modify whatever data the context reset
+       * code uses...
+       */
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = *(GLuint *)&tx;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = *(GLuint *)&ty;
+      
+      /* update polygon stipple x/y screen offset */
+      {
+         GLuint stx, sty;
+         GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC];
+
+         m &= ~(RADEON_STIPPLE_X_OFFSET_MASK |
+                RADEON_STIPPLE_Y_OFFSET_MASK);
+
+         /* add magic offsets, then invert */
+         stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
+         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
+                     & RADEON_STIPPLE_COORD_MASK);
+
+         m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
+               (sty << RADEON_STIPPLE_Y_OFFSET_SHIFT));
+
+         if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) {
+            RADEON_STATECHANGE( rmesa, msc );
+	    rmesa->hw.msc.cmd[MSC_RE_MISC] = m;
+         }
+      }
+   }
+
+   radeonUpdateScissor( ctx );
+}
+
+
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLubyte c[4];
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+   rmesa->state.color.clear = radeonPackColor( rmesa->radeonScreen->cpp,
+					       c[0], c[1], c[2], c[3] );
+}
+
+
+static void radeonRenderMode( GLcontext *ctx, GLenum mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+
+
+static GLuint radeon_rop_tab[] = {
+   RADEON_ROP_CLEAR,
+   RADEON_ROP_AND,
+   RADEON_ROP_AND_REVERSE,
+   RADEON_ROP_COPY,
+   RADEON_ROP_AND_INVERTED,
+   RADEON_ROP_NOOP,
+   RADEON_ROP_XOR,
+   RADEON_ROP_OR,
+   RADEON_ROP_NOR,
+   RADEON_ROP_EQUIV,
+   RADEON_ROP_INVERT,
+   RADEON_ROP_OR_REVERSE,
+   RADEON_ROP_COPY_INVERTED,
+   RADEON_ROP_OR_INVERTED,
+   RADEON_ROP_NAND,
+   RADEON_ROP_SET,
+};
+
+static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint rop = (GLuint)opcode - GL_CLEAR;
+
+   ASSERT( rop < 16 );
+
+   RADEON_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = radeon_rop_tab[rop];
+}
+
+
+void radeonSetCliprects( radeonContextPtr rmesa, GLenum mode )
+{
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+
+   switch ( mode ) {
+   case GL_FRONT_LEFT:
+      rmesa->numClipRects = dPriv->numClipRects;
+      rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
+      break;
+   case GL_BACK_LEFT:
+      /* Can't ignore 2d windows if we are page flipping.
+       */
+      if ( dPriv->numBackClipRects == 0 || rmesa->doPageFlip ) {
+	 rmesa->numClipRects = dPriv->numClipRects;
+	 rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pClipRects;
+      }
+      else {
+	 rmesa->numClipRects = dPriv->numBackClipRects;
+	 rmesa->pClipRects = (XF86DRIClipRectPtr)dPriv->pBackClipRects;
+      }
+      break;
+   default:
+      fprintf(stderr, "bad mode in radeonSetCliprects\n");
+      return;
+   }
+
+   if (rmesa->state.scissor.enabled)
+      radeonRecalcScissorRects( rmesa );
+}
+
+
+static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "%s %s\n", __FUNCTION__,
+	      _mesa_lookup_enum_by_nr( mode ));
+
+   RADEON_FIREVERTICES(rmesa);	/* don't pipeline cliprect changes */
+
+   /*
+    * _DrawDestMask is easier to cope with than <mode>.
+    */
+   switch ( ctx->Color._DrawDestMask ) {
+   case FRONT_LEFT_BIT:
+      FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      radeonSetCliprects( rmesa, GL_FRONT_LEFT );
+      break;
+   case BACK_LEFT_BIT:
+      FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      radeonSetCliprects( rmesa, GL_BACK_LEFT );
+      break;
+   default:
+      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+      FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   /* We want to update the s/w rast state too so that r200SetBuffer()
+    * gets called.
+    */
+   _swrast_DrawBuffer(ctx, mode);
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = (rmesa->state.color.drawOffset &
+					    RADEON_COLOROFFSET_MASK);
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch;
+}
+
+static void radeonReadBuffer( GLcontext *ctx, GLenum mode )
+{
+   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+}
+
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint p, flag;
+
+   if ( RADEON_DEBUG & DEBUG_STATE )
+      fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__,
+	       _mesa_lookup_enum_by_nr( cap ),
+	       state ? "GL_TRUE" : "GL_FALSE" );
+
+   switch ( cap ) {
+      /* Fast track this one...
+       */
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      break;
+
+   case GL_ALPHA_TEST:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if (state) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_ALPHA_TEST_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ALPHA_TEST_ENABLE;
+      }
+      break;
+
+   case GL_BLEND:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if (state) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ALPHA_BLEND_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ALPHA_BLEND_ENABLE;
+      }
+      if ( ctx->Color.ColorLogicOpEnabled ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
+      }
+
+      /* Catch a possible fallback:
+       */
+      if (state) {
+	 ctx->Driver.BlendEquation( ctx, ctx->Color.BlendEquation );
+	 ctx->Driver.BlendFunc( ctx, ctx->Color.BlendSrcRGB,
+				ctx->Color.BlendDstRGB );
+      }
+      else {
+	 FALLBACK( rmesa, RADEON_FALLBACK_BLEND_FUNC, GL_FALSE );
+	 FALLBACK( rmesa, RADEON_FALLBACK_BLEND_EQ, GL_FALSE );
+      }
+      break;
+
+   case GL_CLIP_PLANE0:
+   case GL_CLIP_PLANE1:
+   case GL_CLIP_PLANE2:
+   case GL_CLIP_PLANE3:
+   case GL_CLIP_PLANE4:
+   case GL_CLIP_PLANE5: 
+      p = cap-GL_CLIP_PLANE0;
+      RADEON_STATECHANGE( rmesa, tcl );
+      if (state) {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (RADEON_UCP_ENABLE_0<<p);
+	 radeonClipPlane( ctx, cap, NULL );
+      }
+      else {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(RADEON_UCP_ENABLE_0<<p);
+      }
+      break;
+
+   case GL_COLOR_MATERIAL:
+      radeonColorMaterial( ctx, 0, 0 );
+      if (!state) 
+	 radeonUpdateMaterial( ctx );
+      break;
+
+   case GL_CULL_FACE:
+      radeonCullFace( ctx, 0 );
+      break;
+
+   case GL_DEPTH_TEST:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_Z_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_Z_ENABLE;
+      }
+      break;
+
+   case GL_DITHER:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
+      }
+      break;
+
+   case GL_FOG:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_FOG_ENABLE;
+	 radeonFogfv( ctx, GL_FOG_MODE, 0 );
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_FOG_ENABLE;
+	 RADEON_STATECHANGE(rmesa, tcl);
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
+      }
+      radeonUpdateSpecular( ctx ); /* for PK_SPEC */
+      if (rmesa->TclFallback) 
+	 radeonChooseVertexState( ctx );
+      _mesa_allow_light_in_model( ctx, !state );
+      break;
+
+   case GL_LIGHT0:
+   case GL_LIGHT1:
+   case GL_LIGHT2:
+   case GL_LIGHT3:
+   case GL_LIGHT4:
+   case GL_LIGHT5:
+   case GL_LIGHT6:
+   case GL_LIGHT7:
+      RADEON_STATECHANGE(rmesa, tcl);
+      p = cap - GL_LIGHT0;
+      if (p&1) 
+	 flag = (RADEON_LIGHT_1_ENABLE |
+		 RADEON_LIGHT_1_ENABLE_AMBIENT | 
+		 RADEON_LIGHT_1_ENABLE_SPECULAR);
+      else
+	 flag = (RADEON_LIGHT_0_ENABLE |
+		 RADEON_LIGHT_0_ENABLE_AMBIENT | 
+		 RADEON_LIGHT_0_ENABLE_SPECULAR);
+
+      if (state)
+	 rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
+
+      /* 
+       */
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_LIGHTING:
+      RADEON_STATECHANGE(rmesa, tcl);
+      radeonUpdateSpecular(ctx);
+      check_twoside_fallback( ctx );
+      break;
+
+   case GL_LINE_SMOOTH:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_ANTI_ALIAS_LINE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_LINE;
+      }
+      break;
+
+   case GL_LINE_STIPPLE:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_PATTERN_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_PATTERN_ENABLE;
+      }
+      break;
+
+   case GL_COLOR_LOGIC_OP:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
+      }
+      break;
+      
+   case GL_NORMALIZE:
+      RADEON_STATECHANGE( rmesa, tcl );
+      if ( state ) {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_NORMALIZE_NORMALS;
+      } else {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_NORMALIZE_NORMALS;
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_POINT:
+      if (rmesa->dri.drmMinor == 1) {
+	 radeonChooseRenderState( ctx );
+      } 
+      else {
+	 RADEON_STATECHANGE( rmesa, set );
+	 if ( state ) {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_POINT;
+	 } else {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_POINT;
+	 }
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_LINE:
+      if (rmesa->dri.drmMinor == 1) {
+	 radeonChooseRenderState( ctx );
+      } 
+      else {
+	 RADEON_STATECHANGE( rmesa, set );
+	 if ( state ) {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_LINE;
+	 } else {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_LINE;
+	 }
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_FILL:
+      if (rmesa->dri.drmMinor == 1) {
+	 radeonChooseRenderState( ctx );
+      } 
+      else {
+	 RADEON_STATECHANGE( rmesa, set );
+	 if ( state ) {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_TRI;
+	 } else {
+	    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_TRI;
+	 }
+      }
+      break;
+
+   case GL_POLYGON_SMOOTH:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_ANTI_ALIAS_POLY;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_POLY;
+      }
+      break;
+
+   case GL_POLYGON_STIPPLE:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_STIPPLE_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_STIPPLE_ENABLE;
+      }
+      break;
+
+   case GL_RESCALE_NORMAL_EXT: {
+      GLboolean tmp = ctx->_NeedEyeCoords ? state : !state;
+      RADEON_STATECHANGE( rmesa, tcl );
+      if ( tmp ) {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_RESCALE_NORMALS;
+      } else {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
+      }
+      break;
+   }
+
+   case GL_SCISSOR_TEST:
+      RADEON_FIREVERTICES( rmesa );
+      rmesa->state.scissor.enabled = state;
+      radeonUpdateScissor( ctx );
+      break;
+
+   case GL_STENCIL_TEST:
+      if ( rmesa->state.stencil.hwBuffer ) {
+	 RADEON_STATECHANGE( rmesa, ctx );
+	 if ( state ) {
+	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
+	 } else {
+	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE;
+	 }
+      } else {
+	 FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state );
+      }
+      break;
+
+   case GL_TEXTURE_GEN_Q:
+   case GL_TEXTURE_GEN_R:
+   case GL_TEXTURE_GEN_S:
+   case GL_TEXTURE_GEN_T:
+      /* Picked up in radeonUpdateTextureState.
+       */
+      rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; 
+      break;
+
+   case GL_COLOR_SUM_EXT:
+      radeonUpdateSpecular ( ctx );
+      break;
+
+   default:
+      return;
+   }
+}
+
+
+static void radeonLightingSpaceChange( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLboolean tmp;
+   RADEON_STATECHANGE( rmesa, tcl );
+
+   if (RADEON_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+	      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]);
+
+   if (ctx->_NeedEyeCoords)
+      tmp = ctx->Transform.RescaleNormals;
+   else
+      tmp = !ctx->Transform.RescaleNormals;
+
+   if ( tmp ) {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_RESCALE_NORMALS;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
+   }
+
+   if (RADEON_DEBUG & DEBUG_STATE) 
+      fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+	      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]);
+}
+
+/* =============================================================
+ * Deferred state management - matrices, textures, other?
+ */
+
+
+
+
+static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   int i;
+
+
+   for (i = 0 ; i < 4 ; i++) {
+      *dest++ = src[i];
+      *dest++ = src[i+4];
+      *dest++ = src[i+8];
+      *dest++ = src[i+12];
+   }
+
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   memcpy(dest, src, 16*sizeof(float));
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+
+static void update_texturematrix( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
+   GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
+   int unit;
+
+   rmesa->TexMatEnabled = 0;
+
+   for (unit = 0 ; unit < 2; unit++) {
+      if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
+      }
+      else if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
+	 GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+	 
+	 rmesa->TexMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE|
+				  RADEON_TEXMAT_0_ENABLE) << unit;
+
+	 if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+	    /* Need to preconcatenate any active texgen 
+	     * obj/eyeplane matrices:
+	     */
+	    _math_matrix_mul_matrix( &rmesa->tmpmat, 
+				     &rmesa->TexGenMatrix[unit],
+				     ctx->TextureMatrixStack[unit].Top );
+	    upload_matrix( rmesa, rmesa->tmpmat.m, TEXMAT_0+unit );
+	 } 
+	 else {
+	    rmesa->TexMatEnabled |= 
+	       (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+	    upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m, 
+			   TEXMAT_0+unit );
+	 }
+      }
+      else if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+	 upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, 
+			TEXMAT_0+unit );
+      }
+   }
+
+
+   tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
+
+   vs &= ~((0xf << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
+	   (0xf << RADEON_TCL_TEX_1_OUTPUT_SHIFT));
+
+   if (tpc & RADEON_TEXGEN_TEXMAT_0_ENABLE)
+      vs |= RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT;
+   else
+      vs |= RADEON_TCL_TEX_INPUT_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT;
+
+   if (tpc & RADEON_TEXGEN_TEXMAT_1_ENABLE)
+      vs |= RADEON_TCL_TEX_COMPUTED_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT;
+   else
+      vs |= RADEON_TCL_TEX_INPUT_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT;
+
+   if (tpc != rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] ||
+       vs != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]) {
+      
+      RADEON_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = tpc;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] = vs;
+   }
+}
+
+
+
+void radeonValidateState( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint new_state = rmesa->NewGLState;
+
+   if (new_state & _NEW_TEXTURE) {
+      radeonUpdateTextureState( ctx );
+      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
+   }
+
+   /* Need an event driven matrix update?
+    */
+   if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) 
+      upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, MODEL_PROJ );
+
+   /* Need these for lighting (shouldn't upload otherwise)
+    */
+   if (new_state & (_NEW_MODELVIEW)) {
+      upload_matrix( rmesa, ctx->ModelviewMatrixStack.Top->m, MODEL );
+      upload_matrix_t( rmesa, ctx->ModelviewMatrixStack.Top->inv, MODEL_IT );
+   }
+
+   /* Does this need to be triggered on eg. modelview for
+    * texgen-derived objplane/eyeplane matrices?
+    */
+   if (new_state & _NEW_TEXTURE_MATRIX) {
+      update_texturematrix( ctx );
+   }      
+
+   if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
+      update_light( ctx );
+   }
+
+   /* emit all active clip planes if projection matrix changes.
+    */
+   if (new_state & (_NEW_PROJECTION)) {
+      if (ctx->Transform.ClipPlanesEnabled) 
+	 radeonUpdateClipPlanes( ctx );
+   }
+
+
+   rmesa->NewGLState = 0;
+}
+
+
+static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _ac_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _ae_invalidate_state( ctx, new_state );
+   RADEON_CONTEXT(ctx)->NewGLState |= new_state;
+   radeonVtxfmtInvalidate( ctx );
+}
+
+static void radeonWrapRunPipeline( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   if (0)
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
+
+   /* Validate state:
+    */
+   if (rmesa->NewGLState)
+      radeonValidateState( ctx );
+
+   if (tnl->vb.Material) {
+      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_TRUE );
+   }
+
+   /* Run the pipeline.
+    */ 
+   _tnl_run_pipeline( ctx );
+
+   if (tnl->vb.Material) {
+      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_FALSE );
+      radeonUpdateMaterial( ctx ); /* not needed any more? */
+   }
+}
+
+
+/* Initialize the driver's state functions.
+ */
+void radeonInitStateFuncs( GLcontext *ctx )
+{
+   ctx->Driver.UpdateState		= radeonInvalidateState;
+   ctx->Driver.LightingSpaceChange      = radeonLightingSpaceChange;
+
+   ctx->Driver.DrawBuffer		= radeonDrawBuffer;
+   ctx->Driver.ReadBuffer		= radeonReadBuffer;
+
+   ctx->Driver.AlphaFunc		= radeonAlphaFunc;
+   ctx->Driver.BlendEquation		= radeonBlendEquation;
+   ctx->Driver.BlendFunc		= radeonBlendFunc;
+   ctx->Driver.BlendFuncSeparate	= radeonBlendFuncSeparate;
+   ctx->Driver.ClearColor		= radeonClearColor;
+   ctx->Driver.ClearDepth		= radeonClearDepth;
+   ctx->Driver.ClearIndex		= NULL;
+   ctx->Driver.ClearStencil		= radeonClearStencil;
+   ctx->Driver.ClipPlane		= radeonClipPlane;
+   ctx->Driver.ColorMask		= radeonColorMask;
+   ctx->Driver.CullFace			= radeonCullFace;
+   ctx->Driver.DepthFunc		= radeonDepthFunc;
+   ctx->Driver.DepthMask		= radeonDepthMask;
+   ctx->Driver.DepthRange		= radeonDepthRange;
+   ctx->Driver.Enable			= radeonEnable;
+   ctx->Driver.Fogfv			= radeonFogfv;
+   ctx->Driver.FrontFace		= radeonFrontFace;
+   ctx->Driver.Hint			= NULL;
+   ctx->Driver.IndexMask		= NULL;
+   ctx->Driver.LightModelfv		= radeonLightModelfv;
+   ctx->Driver.Lightfv			= radeonLightfv;
+   ctx->Driver.LineStipple              = radeonLineStipple;
+   ctx->Driver.LineWidth                = radeonLineWidth;
+   ctx->Driver.LogicOpcode		= radeonLogicOpCode;
+   ctx->Driver.PolygonMode		= radeonPolygonMode;
+
+   if (RADEON_CONTEXT(ctx)->dri.drmMinor > 1)
+      ctx->Driver.PolygonOffset		= radeonPolygonOffset;
+
+   ctx->Driver.PolygonStipple		= radeonPolygonStipple;
+   ctx->Driver.RenderMode		= radeonRenderMode;
+   ctx->Driver.Scissor			= radeonScissor;
+   ctx->Driver.ShadeModel		= radeonShadeModel;
+   ctx->Driver.StencilFunc		= radeonStencilFunc;
+   ctx->Driver.StencilMask		= radeonStencilMask;
+   ctx->Driver.StencilOp		= radeonStencilOp;
+   ctx->Driver.Viewport			= radeonViewport;
+
+   /* Pixel path fallbacks
+    */
+   ctx->Driver.Accum                    = _swrast_Accum;
+   ctx->Driver.Bitmap                   = _swrast_Bitmap;
+   ctx->Driver.CopyPixels               = _swrast_CopyPixels;
+   ctx->Driver.DrawPixels               = _swrast_DrawPixels;
+   ctx->Driver.ReadPixels               = _swrast_ReadPixels;
+
+   /* Swrast hooks for imaging extensions:
+    */
+   ctx->Driver.CopyColorTable		= _swrast_CopyColorTable;
+   ctx->Driver.CopyColorSubTable	= _swrast_CopyColorSubTable;
+   ctx->Driver.CopyConvolutionFilter1D	= _swrast_CopyConvolutionFilter1D;
+   ctx->Driver.CopyConvolutionFilter2D	= _swrast_CopyConvolutionFilter2D;
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = radeonWrapRunPipeline;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.h b/src/mesa/drivers/dri/radeon/radeon_state.h
new file mode 100644
index 0000000000..a181d427e6
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_state.h
@@ -0,0 +1,77 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state.h,v 1.5 2002/11/05 17:46:09 tsi Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __RADEON_STATE_H__
+#define __RADEON_STATE_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "radeon_context.h"
+
+extern void radeonInitState( radeonContextPtr rmesa );
+extern void radeonInitStateFuncs( GLcontext *ctx );
+
+extern void radeonUpdateMaterial( GLcontext *ctx );
+
+extern void radeonSetCliprects( radeonContextPtr rmesa, GLenum mode );
+extern void radeonRecalcScissorRects( radeonContextPtr rmesa );
+extern void radeonUpdateViewportOffset( GLcontext *ctx );
+extern void radeonUpdateWindow( GLcontext *ctx );
+
+extern void radeonValidateState( GLcontext *ctx );
+
+extern void radeonPrintDirty( radeonContextPtr rmesa,
+			      const char *msg );
+
+
+extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( rmesa, bit, mode ) do {				\
+   if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",		\
+		     __FUNCTION__, bit, mode );				\
+   radeonFallback( rmesa->glCtx, bit, mode );				\
+} while (0)
+
+
+#define MODEL_PROJ 0
+#define MODEL      1
+#define MODEL_IT   2
+#define TEXMAT_0   3
+#define TEXMAT_1   4
+#define TEXMAT_2   5
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
new file mode 100644
index 0000000000..971ea699ea
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
@@ -0,0 +1,531 @@
+/* $XFree86$ */
+/*
+ * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "api_arrayelt.h"
+
+#include "swrast/swrast.h"
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_tcl.h"
+#include "radeon_tex.h"
+#include "radeon_swtcl.h"
+#include "radeon_vtxfmt.h"
+
+/* =============================================================
+ * State initialization
+ */
+
+void radeonPrintDirty( radeonContextPtr rmesa, const char *msg )
+{
+   struct radeon_state_atom *l;
+
+   fprintf(stderr, msg);
+   fprintf(stderr, ": ");
+
+   foreach(l, &(rmesa->hw.dirty)) {
+      fprintf(stderr, "%s, ", l->name);
+   }
+
+   fprintf(stderr, "\n");
+}
+
+static int cmdpkt( int id ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.packet.cmd_type = RADEON_CMD_PACKET;
+   h.packet.packet_id = id;
+   return h.i;
+}
+
+static int cmdvec( int offset, int stride, int count ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.vectors.cmd_type = RADEON_CMD_VECTORS;
+   h.vectors.offset = offset;
+   h.vectors.stride = stride;
+   h.vectors.count = count;
+   return h.i;
+}
+
+static int cmdscl( int offset, int stride, int count ) 
+{
+   drmRadeonCmdHeader h;
+   h.i = 0;
+   h.scalars.cmd_type = RADEON_CMD_SCALARS;
+   h.scalars.offset = offset;
+   h.scalars.stride = stride;
+   h.scalars.count = count;
+   return h.i;
+}
+
+#define CHECK( NM, FLAG )			\
+static GLboolean check_##NM( GLcontext *ctx )	\
+{						\
+   return FLAG;					\
+}
+
+#define TCL_CHECK( NM, FLAG )				\
+static GLboolean check_##NM( GLcontext *ctx )		\
+{							\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);	\
+   return !rmesa->TclFallback && (FLAG);		\
+}
+
+
+CHECK( always, GL_TRUE )
+CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled )
+CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+CHECK( fog, ctx->Fog.Enabled )
+TCL_CHECK( tcl, GL_TRUE )
+TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled )
+TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
+TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled )
+TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled )
+TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled )
+TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled )
+TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled )
+TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled )
+TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled )
+TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled )
+TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled )
+TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1) )
+TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2) )
+TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4) )
+TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8) )
+TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10) )
+TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20) )
+TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled ) 
+
+CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT))
+CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT))
+
+
+
+/* Initialize the context's hardware state.
+ */
+void radeonInitState( radeonContextPtr rmesa )
+{
+   GLcontext *ctx = rmesa->glCtx;
+   GLuint color_fmt, depth_fmt, i;
+
+   switch ( rmesa->radeonScreen->cpp ) {
+   case 2:
+      color_fmt = RADEON_COLOR_FORMAT_RGB565;
+      break;
+   case 4:
+      color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
+      exit( -1 );
+   }
+
+   rmesa->state.color.clear = 0x00000000;
+
+   switch ( ctx->Visual.depthBits ) {
+   case 16:
+      rmesa->state.depth.clear = 0x0000ffff;
+      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
+      depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+      rmesa->state.stencil.clear = 0x00000000;
+      break;
+   case 24:
+      rmesa->state.depth.clear = 0x00ffffff;
+      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
+      depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+      rmesa->state.stencil.clear = 0xff000000;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
+	       ctx->Visual.depthBits );
+      exit( -1 );
+   }
+
+   /* Only have hw stencil when depth buffer is 24 bits deep */
+   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
+				     ctx->Visual.depthBits == 24 );
+
+   rmesa->Fallback = 0;
+
+   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
+      rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
+      rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
+   } else {
+      rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
+      rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
+   }
+   rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
+   rmesa->state.pixel.readPitch  = rmesa->state.color.drawPitch;
+
+   /* Initialize lists:
+    */
+   make_empty_list(&(rmesa->hw.dirty));
+   make_empty_list(&(rmesa->hw.clean));
+
+
+#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )				\
+   do {								\
+      rmesa->hw.ATOM.cmd_size = SZ;				\
+      rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));	\
+      rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));	\
+      rmesa->hw.ATOM.name = NM;					\
+      rmesa->hw.ATOM.is_tcl = FLAG;					\
+      rmesa->hw.ATOM.check = check_##CHK;				\
+      insert_at_head(&(rmesa->hw.dirty), &(rmesa->hw.ATOM));	\
+   } while (0)
+      
+      
+   /* Allocate state buffers:
+    */
+   ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 );
+   ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+   ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+   ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
+   ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
+   ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
+   ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
+   ALLOC_STATE( tcl, always, TCL_STATE_SIZE, "TCL/tcl", 1 );
+   ALLOC_STATE( mtl, tcl_lighting, MTL_STATE_SIZE, "MTL/material", 1 );
+   ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+   ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
+   ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
+   ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+   ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 );
+   ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 );
+   ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+   ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
+   ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+   ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+   ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+   ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+   ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+   ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+   ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+   ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+   ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+   ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 );
+   ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 );
+   ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 );
+   ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 );
+   ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 );
+   ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
+   ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+   ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
+   ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 );
+   ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 );
+
+
+   /* Fill in the packet headers:
+    */
+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
+   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS);
+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
+   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0);
+   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0);
+   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1);
+   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1);
+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
+   rmesa->hw.mtl.cmd[MTL_CMD_0] = 
+      cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
+   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0);
+   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1);
+   rmesa->hw.grd.cmd[GRD_CMD_0] = 
+      cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
+   rmesa->hw.fog.cmd[FOG_CMD_0] = 
+      cmdvec( RADEON_VS_FOG_PARAM_ADDR, 1, 4 );
+   rmesa->hw.glt.cmd[GLT_CMD_0] = 
+      cmdvec( RADEON_VS_GLOBAL_AMBIENT_ADDR, 1, 4 );
+   rmesa->hw.eye.cmd[EYE_CMD_0] = 
+      cmdvec( RADEON_VS_EYE_VECTOR_ADDR, 1, 4 );
+
+   for (i = 0 ; i < 5; i++) {
+      rmesa->hw.mat[i].cmd[MAT_CMD_0] = 
+	 cmdvec( RADEON_VS_MATRIX_0_ADDR + i*4, 1, 16);
+   }
+
+   for (i = 0 ; i < 8; i++) {
+      rmesa->hw.lit[i].cmd[LIT_CMD_0] = 
+	 cmdvec( RADEON_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 );
+      rmesa->hw.lit[i].cmd[LIT_CMD_1] = 
+	 cmdscl( RADEON_SS_LIGHT_DCD_ADDR + i, 8, 6 );
+   }
+
+   for (i = 0 ; i < 6; i++) {
+      rmesa->hw.ucp[i].cmd[UCP_CMD_0] = 
+	 cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 );
+   }
+
+   rmesa->last_ReallyEnabled = -1;
+
+   /* Initial Harware state:
+    */
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = (RADEON_ALPHA_TEST_PASS |
+				     RADEON_CHROMA_FUNC_FAIL |
+				     RADEON_CHROMA_KEY_NEAREST |
+				     RADEON_SHADOW_FUNC_EQUAL |
+				     RADEON_SHADOW_PASS_1 |
+				     RADEON_RIGHT_HAND_CUBE_OGL);
+
+   rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (RADEON_FOG_VERTEX |
+					  RADEON_FOG_USE_DEPTH);
+
+   rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (RADEON_COMB_FCN_ADD_CLAMP |
+					    RADEON_SRC_BLEND_GL_ONE |
+					    RADEON_DST_BLEND_GL_ZERO );
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
+      rmesa->radeonScreen->depthOffset;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
+      ((rmesa->radeonScreen->depthPitch &
+	RADEON_DEPTHPITCH_MASK) |
+       RADEON_DEPTH_ENDIAN_NO_SWAP);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
+					       RADEON_Z_TEST_LESS |
+					       RADEON_STENCIL_TEST_ALWAYS |
+					       RADEON_STENCIL_FAIL_KEEP |
+					       RADEON_STENCIL_ZPASS_KEEP |
+					       RADEON_STENCIL_ZFAIL_KEEP |
+					       RADEON_Z_WRITE_ENABLE);
+
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (RADEON_SCISSOR_ENABLE |
+				     RADEON_ANTI_ALIAS_NONE);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE |
+				       color_fmt |
+				       (1<<15));
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = (rmesa->state.color.drawOffset &
+					      RADEON_COLOROFFSET_MASK);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch &
+					      RADEON_COLORPITCH_MASK) |
+					     RADEON_COLOR_ENDIAN_NO_SWAP);
+
+   rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW |
+				     RADEON_BFACE_SOLID |
+				     RADEON_FFACE_SOLID |
+/*  			     RADEON_BADVTX_CULL_DISABLE | */
+				     RADEON_FLAT_SHADE_VTX_LAST |
+				     RADEON_DIFFUSE_SHADE_GOURAUD |
+				     RADEON_ALPHA_SHADE_GOURAUD |
+				     RADEON_SPECULAR_SHADE_GOURAUD |
+				     RADEON_FOG_SHADE_GOURAUD |
+				     RADEON_VPORT_XY_XFORM_ENABLE |
+				     RADEON_VPORT_Z_XFORM_ENABLE |
+				     RADEON_VTX_PIX_CENTER_OGL |
+				     RADEON_ROUND_MODE_TRUNC |
+				     RADEON_ROUND_PREC_8TH_PIX);
+
+   rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] =
+#ifdef MESA_BIG_ENDIAN
+					    RADEON_VC_32BIT_SWAP;
+#else
+  					    RADEON_VC_NO_SWAP;
+#endif
+
+   if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
+     rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS;
+   }
+
+   rmesa->hw.set.cmd[SET_SE_COORDFMT] = (
+      RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+      RADEON_TEX1_W_ROUTING_USE_Q1);
+
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((1 << 16) | 0xffff);
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] = 
+      ((0 << RADEON_LINE_CURRENT_PTR_SHIFT) |
+       (1 << RADEON_LINE_CURRENT_COUNT_SHIFT));
+
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4);
+
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] = 
+      ((0x00 << RADEON_STENCIL_REF_SHIFT) |
+       (0xff << RADEON_STENCIL_MASK_SHIFT) |
+       (0xff << RADEON_STENCIL_WRITEMASK_SHIFT));
+
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = RADEON_ROP_COPY;
+   rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff;
+
+   rmesa->hw.msc.cmd[MSC_RE_MISC] = 
+      ((0 << RADEON_STIPPLE_X_OFFSET_SHIFT) |
+       (0 << RADEON_STIPPLE_Y_OFFSET_SHIFT) |
+       RADEON_STIPPLE_BIG_BIT_ORDER);
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000;
+
+   for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) {
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] = RADEON_BORDER_MODE_OGL;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT] = 
+	  (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+	   RADEON_TXFORMAT_PERSPECTIVE_ENABLE |
+	   (i << 24) | /* This is one of RADEON_TXFORMAT_ST_ROUTE_STQ[012] */
+	   (2 << RADEON_TXFORMAT_WIDTH_SHIFT) |
+	   (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
+
+      /* FIXME: What is this magic value? */
+      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] = 0x2000 << (2 * i);
+
+      rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] =  
+	  (RADEON_COLOR_ARG_A_ZERO |
+	   RADEON_COLOR_ARG_B_ZERO |
+	   RADEON_COLOR_ARG_C_CURRENT_COLOR |
+	   RADEON_BLEND_CTL_ADD |
+	   RADEON_SCALE_1X |
+	   RADEON_CLAMP_TX);
+      rmesa->hw.tex[i].cmd[TEX_PP_TXABLEND] = 
+	  (RADEON_ALPHA_ARG_A_ZERO |
+	   RADEON_ALPHA_ARG_B_ZERO |
+	   RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+	   RADEON_BLEND_CTL_ADD |
+	   RADEON_SCALE_1X |
+	   RADEON_CLAMP_TX);
+      rmesa->hw.tex[i].cmd[TEX_PP_TFACTOR] = 0;
+   }
+
+   /* Can only add ST1 at the time of doing some multitex but can keep
+    * it after that.  Errors if DIFFUSE is missing.
+    */
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = 
+      (RADEON_TCL_VTX_Z0 |
+       RADEON_TCL_VTX_W0 |
+       RADEON_TCL_VTX_PK_DIFFUSE
+	 );	/* need to keep this uptodate */
+						   
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] =
+      ( RADEON_TCL_COMPUTE_XYZW 	|
+	(RADEON_TCL_TEX_INPUT_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
+	(RADEON_TCL_TEX_INPUT_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT) |
+	(RADEON_TCL_TEX_INPUT_TEX_2 << RADEON_TCL_TEX_2_OUTPUT_SHIFT));
+
+
+   /* XXX */
+   rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_0] = 
+      ((MODEL << RADEON_MODELVIEW_0_SHIFT) |
+       (MODEL_IT << RADEON_IT_MODELVIEW_0_SHIFT));
+
+   rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_1] = 
+      ((MODEL_PROJ << RADEON_MODELPROJECT_0_SHIFT) |
+       (TEXMAT_0 << RADEON_TEXMAT_0_SHIFT) |
+       (TEXMAT_1 << RADEON_TEXMAT_1_SHIFT));
+
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = 
+      (RADEON_UCP_IN_CLIP_SPACE |
+       RADEON_CULL_FRONT_IS_CCW);
+
+   rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = 0; 
+
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = 
+      (RADEON_SPECULAR_LIGHTS |
+       RADEON_DIFFUSE_SPECULAR_COMBINE |
+       RADEON_LOCAL_LIGHT_VEC_GL |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_EMISSIVE_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_AMBIENT_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_DIFFUSE_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_PREMULT << RADEON_SPECULAR_SOURCE_SHIFT)); 
+
+   for (i = 0 ; i < 8; i++) {
+      struct gl_light *l = &ctx->Light.Light[i];
+      GLenum p = GL_LIGHT0 + i;
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX;
+
+      ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient );
+      ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse );
+      ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular );
+      ctx->Driver.Lightfv( ctx, p, GL_POSITION, 0 );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, 0 );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff );
+      ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION,
+			   &l->ConstantAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION, 
+			   &l->LinearAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION, 
+		     &l->QuadraticAttenuation );
+   }
+
+   ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT, 
+			     ctx->Light.Model.Ambient );
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx );
+
+   for (i = 0 ; i < 6; i++) {
+      ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL );
+   }
+
+   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 );
+   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, 0 );
+   
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE;
+
+   rmesa->hw.eye.cmd[EYE_X] = 0;
+   rmesa->hw.eye.cmd[EYE_Y] = 0;
+   rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
+   rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_subset.h b/src/mesa/drivers/dri/radeon/radeon_subset.h
new file mode 100644
index 0000000000..1b6e7bd432
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_subset.h
@@ -0,0 +1,75 @@
+/**
+ * \file radeon_subset.h
+ * \brief Radeon subset driver declarations.
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/*
+ * Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+ *                      Tungsten Grahpics Inc., Austin, Texas.
+ * 
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * ATI, TUNGSTEN GRAHPICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* $XFree86$ */
+
+#ifndef __RADEON_SUBSET_H__
+#define __RADEON_SUBSET_H__
+
+extern void radeonPointsBitmap( GLsizei width, GLsizei height,
+				GLfloat xorig, GLfloat yorig,
+				GLfloat xmove, GLfloat ymove,
+				const GLubyte *bitmap );
+
+extern void radeonReadPixels( GLint x, GLint y,
+			      GLsizei width, GLsizei height,
+			      GLenum format, GLenum type,
+			      GLvoid *pixels );
+
+extern void radeon_select_Install( GLcontext *ctx );
+
+extern void radeonInitSelect( GLcontext *ctx );
+
+extern void radeonVtxfmtDestroy( GLcontext *ctx );
+
+extern void radeonVtxfmtMakeCurrent( GLcontext *ctx );
+
+extern void radeonVtxfmtUnbindContext( GLcontext *ctx );
+
+extern void radeonVtxfmtInit( GLcontext *ctx );
+
+extern void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+
+extern void radeonVtxfmtInvalidate( GLcontext *ctx );
+
+extern void radeonSubsetVtxEnableTCL( radeonContextPtr rmesa, GLboolean flag );
+
+extern void radeonUpdateTextureState( GLcontext *ctx );
+
+extern void radeonInitTextureFuncs( GLcontext *ctx );
+
+extern void radeonAgeTextures( radeonContextPtr rmesa, int heap );
+
+extern void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t );
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_subset_bitmap.c b/src/mesa/drivers/dri/radeon/radeon_subset_bitmap.c
new file mode 100644
index 0000000000..cb4a514221
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_subset_bitmap.c
@@ -0,0 +1,197 @@
+/**
+ * \file radeon_subset_bitmap.c
+ * \brief Bitmap drawing.
+ * 
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/*
+ * Copyright 2003       ATI Technologies Inc., Ontario, Canada, and
+ *                      Tungsten Graphics Inc., Cedar Park, Texas.
+ * 
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ */
+
+/* $XFree86$ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "context.h"
+#include "enums.h"
+#include "imports.h"
+#include "image.h"
+/*#include "mmath.h"*/
+#include "macros.h"
+#include "state.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_subset.h"
+
+/**
+ * \brief Cope with depth operations by drawing individual pixels as points
+ *
+ * \param xorig x coordinate of the bitmap corner.
+ * \param yorig y coordinate of the bitmap corner.
+ * \param xmove increment to the final x coordinate.
+ * \param ymove increment to the final y coordinate.
+ * \param width bitmap width.
+ * \param height bitmap height.
+ * \param bitmap bitmap pointer.
+ * 
+ * Clips the bitmap coordinates and adjusts for windows coordinates. Draws the
+ * bitmap with glPoints(), turning off TCL and hardware viewport transformation
+ * to emit raw pixel coordinates. Finally fires any outstanding vertices and
+ * restores TCL, viewport, texture and color states.
+ */
+void
+radeonPointsBitmap(  GLsizei width, GLsizei height,
+		     GLfloat xorig, GLfloat yorig, 
+		     GLfloat xmove, GLfloat ymove,
+		     const GLubyte *bitmap )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLsizei bmwidth = width, bmheight = height;   
+   GLint px, py;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat saved_color[4], saved_tex0[2];
+   GLint row, col;
+   GLuint orig_se_cntl;
+   GLuint w, h;
+   const struct gl_pixelstore_attrib *unpack = &ctx->Unpack;
+
+   ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
+
+   if (width < 0 || height < 0) {
+      _mesa_error( ctx, GL_INVALID_VALUE, "glBitmap(width or height < 0)" );
+      return;
+   }
+
+   if (!ctx->Current.RasterPosValid)
+      return;
+
+   if (ctx->NewState) 
+      _mesa_update_state(ctx);
+
+
+   if (ctx->_RotateMode) {
+      width = bmheight; height = bmwidth;
+
+      px = IFLOOR(ctx->Current.RasterPos[0] + yorig);
+      py = IFLOOR(ctx->Current.RasterPos[1] + xorig);
+
+      ctx->Current.RasterPos[0] += ymove;
+      ctx->Current.RasterPos[1] += xmove;
+   }
+   else {
+      px = IFLOOR(ctx->Current.RasterPos[0] - xorig);
+      py = IFLOOR(ctx->Current.RasterPos[1] - yorig);
+
+      ctx->Current.RasterPos[0] += xmove;
+      ctx->Current.RasterPos[1] += ymove;
+   }
+
+
+
+   /* Turn off tcl and the hw viewport transformation so that we can
+    * emit raw pixel coordinates:
+    */
+   radeonSubsetVtxEnableTCL( rmesa, GL_FALSE );
+   RADEON_STATECHANGE( rmesa, set );
+   orig_se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
+   rmesa->hw.set.cmd[SET_SE_CNTL] &= ~(RADEON_VPORT_XY_XFORM_ENABLE |
+				       RADEON_VPORT_Z_XFORM_ENABLE);
+
+
+   /* Adjust for window coordinates, flip y values:
+    */
+   h = rmesa->dri.drawable->h + rmesa->dri.drawable->y - 1;
+   w = rmesa->dri.drawable->w;
+   px += rmesa->dri.drawable->x;
+
+   /* Save current color, texcoord to restore later:
+    */
+   COPY_4V( saved_color, ctx->Current.Attrib[VERT_ATTRIB_COLOR0] );
+   COPY_2V( saved_tex0, ctx->Current.Attrib[VERT_ATTRIB_TEX0] );
+
+   /* Just use the GL entrypoints to talk to radeon_subset_vtx.c:
+    */
+   glBegin( GL_POINTS );
+   glColor4fv( ctx->Current.RasterColor );
+   glTexCoord2fv( ctx->Current.RasterTexCoords[0] );
+
+
+   if (ctx->_RotateMode) {
+      for (col=0; col<width; col++) {
+	 const GLubyte *src = (const GLubyte *) 
+	    _mesa_image_address( unpack, bitmap, height, width, 
+				 GL_COLOR_INDEX, GL_BITMAP, 0, col, 0 );
+	    
+	 /* Msb first */
+	 GLubyte mask = 128U >> (unpack->SkipPixels & 0x7);
+	 for (row=0; row<height; row++) {
+	    if (*src & mask) {
+	       glVertex2f( px-col, h - (py + row) );
+	    }
+	    src += mask & 1;
+	    mask = ((mask << 7) & 0xff) | (mask >> 1);
+	 }
+	 /* get ready for next row */
+	 if (mask != 128)
+	    src++;
+      }
+   }
+   else {
+      for (row=0; row<height; row++) {
+	 const GLubyte *src = (const GLubyte *) 
+	    _mesa_image_address( unpack, bitmap, width, height, 
+				 GL_COLOR_INDEX, GL_BITMAP, 0, row, 0 );
+	    
+	 /* Msb first */
+	 GLubyte mask = 128U >> (unpack->SkipPixels & 0x7);
+	 for (col=0; col<width; col++) {
+	    if (*src & mask) {
+	       glVertex2f( px+col, h - (py + row) );
+	    }
+	    src += mask & 1;
+	    mask = ((mask << 7) & 0xff) | (mask >> 1);
+	 }
+	 /* get ready for next row */
+	 if (mask != 128)
+	    src++;
+      }
+   }
+
+   glEnd();
+   glColor4fv( saved_color );
+   glTexCoord2fv( saved_tex0 );
+
+   /* Fire outstanding vertices, restore state
+    */
+   RADEON_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_SE_CNTL] = orig_se_cntl;
+   radeonSubsetVtxEnableTCL( rmesa, GL_TRUE );
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_subset_readpix.c b/src/mesa/drivers/dri/radeon/radeon_subset_readpix.c
new file mode 100644
index 0000000000..f72d3b8472
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_subset_readpix.c
@@ -0,0 +1,246 @@
+/**
+ * \file radeon_subset_readpix.c
+ * \brief Pixel reading.
+ * 
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Brian Paul <brian@tungstengraphics.com>
+ */
+
+/*
+ * Copyright 2003       ATI Technologies Inc., Ontario, Canada, and
+ *                      Tungsten Graphics Inc., Cedar Park, Texas.
+ * 
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* $XFree86$ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "context.h"
+#include "enums.h"
+#include "imports.h"
+/*#include "mmath.h" */
+#include "macros.h"
+#include "state.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_subset.h"
+
+/**
+ * \brief Read pixel in RGBA format on a Radeon 16bpp frame buffer.
+ *
+ * \param rgba destination pointer.
+ * \param ptr pointer to the pixel in the frame buffer.
+ */
+#define READ_RGBA_16( rgba, ptr )		\
+do {						\
+    GLushort p = *(GLushort *)ptr;		\
+    rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8;	\
+    rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc;	\
+    rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;	\
+    rgba[3] = 0xff;				\
+} while (0)
+
+/**
+ * \brief Read pixel in RGBA format on a Radeon 32bpp frame buffer.
+ *
+ * \param rgba destination pointer.
+ * \param ptr pointer to the pixel in the frame buffer.
+ */
+#define READ_RGBA_32( rgba, ptr )		\
+do {						\
+   GLuint p = *(GLuint *)ptr;			\
+   rgba[0] = (p >> 16) & 0xff;			\
+   rgba[1] = (p >>  8) & 0xff;			\
+   rgba[2] = (p >>  0) & 0xff;			\
+   rgba[3] = (p >> 24) & 0xff;			\
+} while (0)
+
+/**
+ * \brief Read a span in RGBA format.
+ * 
+ * \param ctx GL context.
+ * \param n number of pixels in the span.
+ * \param x x position of the span start.
+ * \param y y position of the span.
+ * \param rgba destination buffer.
+ *
+ * Calculates the pointer to the span start in the frame buffer and uses either
+ * #READ_RGBA_16 or #READ_RGBA_32 macros to copy the values.
+ */
+static void ReadRGBASpan( const GLcontext *ctx,
+			       GLuint n, GLint x, GLint y,
+			       GLubyte rgba[][4])
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonScreenPtr radeonScreen = rmesa->radeonScreen;
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   GLuint cpp = radeonScreen->cpp;
+   GLuint pitch = radeonScreen->frontPitch * cpp;
+   GLint i;
+
+   if (ctx->_RotateMode) {
+      char *ptr = (char *)(rmesa->dri.screen->pFB +
+			   rmesa->state.pixel.readOffset +
+			   ((dPriv->x + (dPriv->w - y - 1)) * cpp) +
+			   ((dPriv->y + (dPriv->h - x - 1)) * pitch));
+
+      if (cpp == 4)
+	 for (i = 0; i < n; i++, ptr -= pitch)
+	    READ_RGBA_32( rgba[i], ptr );
+      else
+	 for (i = 0; i < n; i++, ptr -= pitch)
+	    READ_RGBA_16( rgba[i], ptr );
+   }
+   else {
+      char *ptr = (char *)(rmesa->dri.screen->pFB +
+			   rmesa->state.pixel.readOffset +
+			   ((dPriv->x + x) * cpp) +
+			   ((dPriv->y + (dPriv->h - y - 1)) * pitch));
+
+      if (cpp == 4)
+	 for (i = 0; i < n; i++, ptr += cpp)
+	    READ_RGBA_32( rgba[i], ptr );
+      else
+	 for (i = 0; i < n; i++, ptr += cpp)
+	    READ_RGBA_16( rgba[i], ptr );
+   }
+}
+
+
+/**
+ * \brief Optimized glReadPixels().
+ *
+ * To be used with particular pixel formats GL_UNSIGNED_BYTE and GL_RGBA, when pixel
+ * scaling, biasing and mapping are disabled.
+ *
+ * \param x x start position of the reading rectangle.
+ * \param y y start position of the reading rectangle.
+ * \param width width of the reading rectangle.
+ * \param height height of the reading rectangle.
+ * \param format pixel format. Must be GL_RGBA.
+ * \param type pixel type. Must be GL_UNSIGNED_BYTE.
+ * \param pixels pixel data.
+ * 
+ * After asserting the above conditions, compensates for clipping and calls
+ * ReadRGBASpan() to read each row.
+ */
+void radeonReadPixels( GLint x, GLint y,
+		       GLsizei width, GLsizei height,
+		       GLenum format, GLenum type,
+		       GLvoid *pixels )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLint srcX = x;
+   GLint srcY = y;
+   GLint readWidth = width;           /* actual width read */
+   GLint readHeight = height;         /* actual height read */
+   const struct gl_pixelstore_attrib *packing = &ctx->Pack;
+   GLint skipRows = packing->SkipRows;
+   GLint skipPixels = packing->SkipPixels;
+   GLint rowLength;
+   ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
+
+   {
+      GLint tmp, tmps;
+      tmp = x; x = y; y = tmp;
+      tmps = width; width = height; height = tmps;
+   }
+
+   if (width < 0 || height < 0) {
+      _mesa_error( ctx, GL_INVALID_VALUE,
+                   "glReadPixels(width=%d height=%d)", width, height );
+      return;
+   }
+
+   if (!pixels) {
+      _mesa_error( ctx, GL_INVALID_VALUE, "glReadPixels(pixels)" );
+      return;
+   }
+
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+
+   /* can't do scale, bias, mapping, etc */
+   assert(!ctx->_ImageTransferState);
+
+   /* can't do fancy pixel packing */
+   assert (packing->Alignment == 1 &&
+	   !packing->SwapBytes &&
+	   !packing->LsbFirst);
+
+
+   if (packing->RowLength > 0)
+      rowLength = packing->RowLength;
+   else
+      rowLength = width;
+
+   /* horizontal clipping */
+   if (srcX < 0) {
+      skipPixels -= srcX;
+      readWidth += srcX;
+      srcX = 0;
+   }
+   if (srcX + readWidth > (GLint) ctx->ReadBuffer->Width)
+      readWidth -= (srcX + readWidth - (GLint) ctx->ReadBuffer->Width);
+   if (readWidth <= 0)
+      return;
+
+   /* vertical clipping */
+   if (srcY < 0) {
+      skipRows -= srcY;
+      readHeight += srcY;
+      srcY = 0;
+   }
+   if (srcY + readHeight > (GLint) ctx->ReadBuffer->Height)
+      readHeight -= (srcY + readHeight - (GLint) ctx->ReadBuffer->Height);
+   if (readHeight <= 0)
+      return;
+
+   /*
+    * Ready to read!
+    * The window region at (destX, destY) of size (readWidth, readHeight)
+    * will be read back.
+    * We'll write pixel data to buffer pointed to by "pixels" but we'll
+    * skip "skipRows" rows and skip "skipPixels" pixels/row.
+    */
+   if (format == GL_RGBA && type == GL_UNSIGNED_BYTE) {
+      GLchan *dest = (GLchan *) pixels 
+	 + (skipRows * rowLength + skipPixels) * 4;
+      GLint row;
+
+      for (row=0; row<readHeight; row++) {
+	 ReadRGBASpan(ctx, readWidth, srcX, srcY, (GLchan (*)[4]) dest);
+	 dest += rowLength * 4;
+	 srcY++;
+      }
+   }
+   else {
+      /* can't do this format/type combination */
+      assert(0);
+   }
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_subset_select.c b/src/mesa/drivers/dri/radeon/radeon_subset_select.c
new file mode 100644
index 0000000000..bd5003ffe5
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_subset_select.c
@@ -0,0 +1,998 @@
+/**
+ * \file radeon_subset_select.c
+ * \brief Selection.
+ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  4.1
+ *
+ * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* $Id: radeon_subset_select.c,v 1.2 2003/08/22 20:11:45 brianp Exp $ */
+
+
+#include "glheader.h"
+#include "imports.h"
+#include "context.h"
+/*#include "mmath.h"*/
+#include "mtypes.h"
+#include "enums.h"
+#include "glapi.h"
+#include "feedback.h"
+
+#include "radeon_context.h"
+#include "radeon_subset.h"
+
+/**
+ * \brief Vertex.
+ */
+typedef struct {
+   struct { GLfloat x, y, z, w; } pos;      /**< \brief position */
+   struct { GLfloat x, y, z, w; } eyePos;   /**< \brief position, eye coordinates */
+   struct { GLfloat x, y, z, w; } clipPos;  /**< \brief clipped coordinates */
+   struct { GLfloat x, y, z, w; } winPos;   /**< \brief position, windows coordinates */
+   struct { GLfloat s, t; } texCoord;       /**< \brief texture coordinates */
+   struct { GLfloat r, g, b, a; } color;    /**< \brief color */
+} vertex;
+
+
+/**
+ * \brief Vertex buffer.
+ */
+static struct select_vb_t {
+   GLuint    vCount;		/**< \brief vertex count */
+   vertex  vBuffer[4];		/**< \brief vertex buffer */
+   GLboolean lineReset;
+   GLboolean partialLineLoop;	/**< \brief whether we are in a middle of a line loop */
+} vb;
+
+
+
+
+/**********************************************************************/
+/** \name        Vertex Transformation and Clipping                   */
+/**********************************************************************/
+/*@{*/
+
+/**
+ * \brief Transform a point (column vector) by a matrix:  Q = M * P.
+ *
+ * \param Q destination point.
+ * \param P source point.
+ * \param M transformation matrix.
+ */
+#define TRANSFORM_POINT( Q, M, P )				\
+   Q.x = M[0] * P.x + M[4] * P.y + M[8] *  P.z + M[12] * P.w;	\
+   Q.y = M[1] * P.x + M[5] * P.y + M[9] *  P.z + M[13] * P.w;	\
+   Q.z = M[2] * P.x + M[6] * P.y + M[10] * P.z + M[14] * P.w;	\
+   Q.w = M[3] * P.x + M[7] * P.y + M[11] * P.z + M[15] * P.w;
+
+/**
+ * \brief Clip coord to window coord mapping.
+ *
+ * \param Q destination point.
+ * \param P source point.
+ * \param VP view port.
+ */
+#define MAP_POINT( Q, P, VP )                                      \
+   Q.x = (GLfloat) (((P.x / P.w) + 1.0) * VP.Width / 2.0 + VP.X);  \
+   Q.y = (GLfloat) (((P.y / P.w) + 1.0) * VP.Height / 2.0 + VP.Y);  \
+   Q.z = (GLfloat) (((P.z / P.w) + 1.0) * (VP.Far - VP.Near) / 2.0 + VP.Near);\
+   Q.w = (GLfloat) P.w;
+
+
+/**
+ * \brief Linear interpolation:  (1 - T) * A + T * B.
+ *
+ * \param T interpolation factor.
+ * \param A first value.
+ * \param B second value.
+ * \result interpolated value.
+ */
+#define INTERPOLATE(T, A, B)   ((A) + ((B) - (A)) * (T))
+
+
+
+/**
+ * \brief Interpolate vertex position, color, texcoords, etc.
+ *
+ * \param t interpolation factor.
+ * \param v0 first vertex.
+ * \param v1 second vertex.
+ * \param vOut output vertex.
+ * 
+ * Uses the #INTERPOLATE macro for all the interpolation of all elements.
+ */
+static void
+interpolate_vertex(GLfloat t, const vertex *v0, const vertex *v1, 
+		   vertex *vOut)
+{
+   vOut->eyePos.x = INTERPOLATE(t, v0->eyePos.x, v1->eyePos.x);
+   vOut->eyePos.y = INTERPOLATE(t, v0->eyePos.y, v1->eyePos.y);
+   vOut->eyePos.z = INTERPOLATE(t, v0->eyePos.z, v1->eyePos.z);
+   vOut->eyePos.w = INTERPOLATE(t, v0->eyePos.w, v1->eyePos.w);
+
+   vOut->clipPos.x = INTERPOLATE(t, v0->clipPos.x, v1->clipPos.x);
+   vOut->clipPos.y = INTERPOLATE(t, v0->clipPos.y, v1->clipPos.y);
+   vOut->clipPos.z = INTERPOLATE(t, v0->clipPos.z, v1->clipPos.z);
+   vOut->clipPos.w = INTERPOLATE(t, v0->clipPos.w, v1->clipPos.w);
+
+   vOut->color.r = INTERPOLATE(t, v0->color.r, v1->color.r);
+   vOut->color.g = INTERPOLATE(t, v0->color.g, v1->color.g);
+   vOut->color.b = INTERPOLATE(t, v0->color.b, v1->color.b);
+   vOut->color.a = INTERPOLATE(t, v0->color.a, v1->color.a);
+
+   vOut->texCoord.s = INTERPOLATE(t, v0->texCoord.s, v1->texCoord.s);
+   vOut->texCoord.t = INTERPOLATE(t, v0->texCoord.t, v1->texCoord.t);
+}
+
+
+
+
+/*
+ * Clip bit codes 
+ */
+#define CLIP_LEFT    1
+#define CLIP_RIGHT   2
+#define CLIP_BOTTOM  4
+#define CLIP_TOP     8
+#define CLIP_NEAR   16
+#define CLIP_FAR    32
+
+
+/**
+ * \brief Apply view volume clip testing to a point.
+ *
+ * \param v point to test.
+ * \return zero if visible, or the clip code mask, i.e., binary OR of a
+ * combination of the #CLIP_LEFT, #CLIP_RIGHT, #CLIP_BOTTOM, #CLIP_TOP, #CLIP_NEAR,
+ * #CLIP_FAR clip bit codes.
+ */
+static GLuint
+clip_point(const vertex *v)
+{
+   GLuint mask = 0;
+   if (v->clipPos.x > v->clipPos.w) mask |= CLIP_RIGHT;
+   if (v->clipPos.x < -v->clipPos.w) mask |= CLIP_LEFT;
+   if (v->clipPos.y > v->clipPos.w) mask |= CLIP_TOP;
+   if (v->clipPos.y < -v->clipPos.w) mask |= CLIP_BOTTOM;
+   if (v->clipPos.z > v->clipPos.w) mask |= CLIP_FAR;
+   if (v->clipPos.z < -v->clipPos.w) mask |= CLIP_NEAR;
+   return mask;
+}
+
+
+/**
+ * \def GENERAL_CLIP
+ * \brief Clipping utility macro.
+ * 
+ * We use 6 instances of this code in each of the clip_line() and
+ * clip_polygon() to clip against the 6 planes.  For each plane, we define the
+ * #OUTSIDE and #COMPUTE_INTERSECTION macros appropriately.
+ */
+
+
+/**
+ * \brief Apply clipping to a line segment.
+ *
+ * \param v0in input start vertex
+ * \param v1in input end vertesx
+ * \param v0new output start vertex
+ * \param v1new output end vertex
+ *
+ * \return GL_TRUE if the line segment is visible, or GL_FALSE if it is totally
+ * clipped.
+ *
+ * \sa #GENERAL_CLIP.
+ */
+static GLboolean
+clip_line(const vertex *v0in, const vertex *v1in,
+	  vertex *v0new, vertex *v1new)
+{
+   vertex v0, v1, vNew;
+   GLfloat dx, dy, dz, dw, t;
+   GLuint code0, code1;
+
+   code0 = clip_point(v0in);
+   code1 = clip_point(v1in);
+   if (code0 & code1)
+      return GL_FALSE;  /* totally clipped */
+   
+   *v0new = *v0in;
+   *v1new = *v1in;
+   if (code0 == 0 && code1 == 0)
+      return GL_TRUE;   /* no clipping needed */
+   
+   v0 = *v0in;
+   v1 = *v1in;
+
+
+#define GENERAL_CLIP                                                    \
+   if (OUTSIDE(v0)) {                                                   \
+      if (OUTSIDE(v1)) {                                                \
+         /* both verts are outside ==> return 0 */                      \
+         return 0;                                                      \
+      }                                                                 \
+      else {                                                            \
+         /* v0 is outside, v1 is inside ==> clip */                     \
+         COMPUTE_INTERSECTION( v1, v0, vNew )                           \
+         interpolate_vertex(t, &v1, &v0, &vNew);                        \
+         v0 = vNew;                                                     \
+      }                                                                 \
+   }                                                                    \
+   else {                                                               \
+      if (OUTSIDE(v1)) {                                                \
+         /* v0 is inside, v1 is outside ==> clip */                     \
+         COMPUTE_INTERSECTION( v0, v1, vNew )                           \
+         interpolate_vertex(t, &v0, &v1, &vNew);                        \
+         v1 = vNew;                                                     \
+      }                                                                 \
+      /* else both verts are inside ==> do nothing */                   \
+   }
+
+   /* Clip against +X side */
+#define OUTSIDE(V)      (V.clipPos.x > V.clipPos.w)
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )                         \
+        dx = OUT.clipPos.x - IN.clipPos.x;                           \
+        dw = OUT.clipPos.w - IN.clipPos.w;                           \
+        t = (IN.clipPos.x - IN.clipPos.w) / (dw-dx);
+   GENERAL_CLIP
+#undef OUTSIDE
+#undef COMPUTE_INTERSECTION
+
+   /* Clip against -X side */
+#define OUTSIDE(V)      (V.clipPos.x < -(V.clipPos.w))
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )                         \
+        dx = OUT.clipPos.x - IN.clipPos.x;                           \
+        dw = OUT.clipPos.w - IN.clipPos.w;                           \
+        t = -(IN.clipPos.x + IN.clipPos.w) / (dw+dx);
+   GENERAL_CLIP
+#undef OUTSIDE
+#undef COMPUTE_INTERSECTION
+
+   /* Clip against +Y side */
+#define OUTSIDE(V)      (V.clipPos.y > V.clipPos.w)
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )                         \
+        dy = OUT.clipPos.y - IN.clipPos.y;                           \
+        dw = OUT.clipPos.w - IN.clipPos.w;                           \
+        t = (IN.clipPos.y - IN.clipPos.w) / (dw-dy);
+   GENERAL_CLIP
+#undef OUTSIDE
+#undef COMPUTE_INTERSECTION
+
+   /* Clip against -Y side */
+#define OUTSIDE(V)      (V.clipPos.y < -(V.clipPos.w))
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )                         \
+        dy = OUT.clipPos.y - IN.clipPos.y;                           \
+        dw = OUT.clipPos.w - IN.clipPos.w;                           \
+        t = -(IN.clipPos.y + IN.clipPos.w) / (dw+dy);
+   GENERAL_CLIP
+#undef OUTSIDE
+#undef COMPUTE_INTERSECTION
+
+   /* Clip against +Z side */
+#define OUTSIDE(V)      (V.clipPos.z > V.clipPos.w)
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )                         \
+        dz = OUT.clipPos.z - IN.clipPos.z;                           \
+        dw = OUT.clipPos.w - IN.clipPos.w;                           \
+        t = (IN.clipPos.z - IN.clipPos.w) / (dw-dz);
+   GENERAL_CLIP
+#undef OUTSIDE
+#undef COMPUTE_INTERSECTION
+
+   /* Clip against -Z side */
+#define OUTSIDE(V)      (V.clipPos.z < -(V.clipPos.w))
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )                         \
+        dz = OUT.clipPos.z - IN.clipPos.z;                           \
+        dw = OUT.clipPos.w - IN.clipPos.w;                           \
+        t = -(IN.clipPos.z + IN.clipPos.w) / (dw+dz);
+   GENERAL_CLIP
+#undef OUTSIDE
+#undef COMPUTE_INTERSECTION
+
+#undef GENERAL_CLIP
+
+   *v0new = v0;
+   *v1new = v1;
+   return GL_TRUE;
+}
+
+
+
+/**
+ * \brief Apply clipping to a polygon.
+ *
+ * \param vIn array of input vertices.
+ * \param inCount number of input vertices
+ * \param vOut array of output vertices.
+ *
+ * \return number of vertices in \p vOut.
+ *
+ * \sa #GENERAL_CLIP.
+ */
+static GLuint
+clip_polygon(const vertex *vIn, unsigned int inCount, vertex *vOut)
+{
+   vertex inlist[20], outlist[20];
+   GLfloat dx, dy, dz, dw, t;
+   GLuint incount, outcount, previ, curri, result;
+   const vertex *currVert, *prevVert;
+   vertex *newVert;
+
+
+#define GENERAL_CLIP(INCOUNT, INLIST, OUTCOUNT, OUTLIST)                \
+   if (INCOUNT < 3)                                                     \
+      return GL_FALSE;                                                  \
+   previ = INCOUNT - 1;         /* let previous = last vertex */        \
+   prevVert = INLIST + previ;                                           \
+   OUTCOUNT = 0;                                                        \
+   for (curri = 0; curri < INCOUNT; curri++) {                          \
+      currVert = INLIST + curri;                                        \
+      if (INSIDE(currVert)) {                                           \
+         if (INSIDE(prevVert)) {                                        \
+            /* both verts are inside ==> copy current to outlist */     \
+            OUTLIST[OUTCOUNT] = *currVert;                              \
+            OUTCOUNT++;                                                 \
+         }                                                              \
+         else {                                                         \
+            newVert = OUTLIST + OUTCOUNT;                               \
+            /* current is inside and previous is outside ==> clip */    \
+            COMPUTE_INTERSECTION( currVert, prevVert, newVert )         \
+            OUTCOUNT++;                                                 \
+            /* Output current */                                        \
+            OUTLIST[OUTCOUNT] = *currVert;                              \
+            OUTCOUNT++;                                                 \
+         }                                                              \
+      }                                                                 \
+      else {                                                            \
+         if (INSIDE(prevVert)) {                                        \
+            newVert = OUTLIST + OUTCOUNT;                               \
+            /* current is outside and previous is inside ==> clip */    \
+            COMPUTE_INTERSECTION( prevVert, currVert, newVert );        \
+            OUTLIST[OUTCOUNT] = *newVert;                               \
+            OUTCOUNT++;                                                 \
+         }                                                              \
+         /* else both verts are outside ==> do nothing */               \
+      }                                                                 \
+      /* let previous = current */                                      \
+      previ = curri;                                                    \
+      prevVert = currVert;                                              \
+   }
+
+/*
+ * Clip against +X
+ */
+#define INSIDE(V)       (V->clipPos.x <= V->clipPos.w)
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )              \
+        dx = OUT->clipPos.x - IN->clipPos.x;              \
+        dw = OUT->clipPos.w - IN->clipPos.w;              \
+        t = (IN->clipPos.x - IN->clipPos.w) / (dw - dx);  \
+        interpolate_vertex(t, IN, OUT, NEW );
+
+   GENERAL_CLIP(inCount, vIn, outcount, outlist)
+
+#undef INSIDE
+#undef COMPUTE_INTERSECTION
+
+/*
+ * Clip against -X
+ */
+#define INSIDE(V)       (V->clipPos.x >= -V->clipPos.w)
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )               \
+        dx = OUT->clipPos.x - IN->clipPos.x;               \
+        dw = OUT->clipPos.w - IN->clipPos.w;               \
+        t = -(IN->clipPos.x + IN->clipPos.w) / (dw + dx);  \
+        interpolate_vertex(t, IN, OUT, NEW );
+
+   GENERAL_CLIP(outcount, outlist, incount, inlist)
+
+#undef INSIDE
+#undef COMPUTE_INTERSECTION
+
+/*
+ * Clip against +Y
+ */
+#define INSIDE(V)       (V->clipPos.y <= V->clipPos.w)
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )              \
+        dy = OUT->clipPos.y - IN->clipPos.y;              \
+        dw = OUT->clipPos.w - IN->clipPos.w;              \
+        t = (IN->clipPos.y - IN->clipPos.w) / (dw - dy);  \
+        interpolate_vertex(t, IN, OUT, NEW );
+
+   GENERAL_CLIP(incount, inlist, outcount, outlist)
+
+#undef INSIDE
+#undef COMPUTE_INTERSECTION
+
+/*
+ * Clip against -Y
+ */
+#define INSIDE(V)       (V->clipPos.y >= -V->clipPos.w)
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )               \
+        dy = OUT->clipPos.y - IN->clipPos.y;               \
+        dw = OUT->clipPos.w - IN->clipPos.w;               \
+        t = -(IN->clipPos.y + IN->clipPos.w) / (dw + dy);  \
+        interpolate_vertex(t, IN, OUT, NEW );
+
+   GENERAL_CLIP(outcount, outlist, incount, inlist)
+
+#undef INSIDE
+#undef COMPUTE_INTERSECTION
+
+/*
+ * Clip against +Z
+ */
+#define INSIDE(V)       (V->clipPos.z <= V->clipPos.w)
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )              \
+        dz = OUT->clipPos.z - IN->clipPos.z;              \
+        dw = OUT->clipPos.w - IN->clipPos.w;              \
+        t = (IN->clipPos.z - IN->clipPos.w) / (dw - dz);  \
+        interpolate_vertex(t, IN, OUT, NEW );
+
+   GENERAL_CLIP(incount, inlist, outcount, outlist)
+
+#undef INSIDE
+#undef COMPUTE_INTERSECTION
+
+/*
+ * Clip against -Z
+ */
+#define INSIDE(V)       (V->clipPos.z >= -V->clipPos.w)
+#define COMPUTE_INTERSECTION( IN, OUT, NEW )               \
+        dz = OUT->clipPos.z - IN->clipPos.z;               \
+        dw = OUT->clipPos.w - IN->clipPos.w;               \
+        t = -(IN->clipPos.z + IN->clipPos.w) / (dw + dz);  \
+        interpolate_vertex(t, IN, OUT, NEW );
+
+   GENERAL_CLIP(outcount, outlist, result, vOut)
+
+#undef INSIDE
+#undef COMPUTE_INTERSECTION
+
+#undef GENERAL_CLIP
+
+	return result;
+}
+
+/*@}*/
+
+
+
+/**********************************************************************/
+/** \name                    Selection                                */
+/**********************************************************************/
+/*@{*/
+
+/**
+ * \brief Select point.
+ * 
+ * \param v vertex.
+ *
+ * If the clipped point is visible then maps the vertex into window coordinates
+ * and calls _mesa_update_hitflag().
+ */
+static void
+select_point(const vertex *v)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   if (clip_point(v) == 0)
+   {
+      vertex c = *v;
+      MAP_POINT(c.winPos, c.clipPos, ctx->Viewport);
+      _mesa_update_hitflag(ctx, c.winPos.z);
+   }
+}
+
+/**
+ * \brief Select line.
+ * 
+ * \param v0 first vertex.
+ * \param v1 second vertex.
+ *
+ * If the clipped line is visible then maps the vertices into window coordinates
+ * and calls _mesa_update_hitflag().
+ */
+static void
+select_line(const vertex *v0, const vertex *v1)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   vertex c0, c1;
+   if (clip_line(v0, v1, &c0, &c1))
+   {
+      MAP_POINT(c0.winPos, c0.clipPos, ctx->Viewport);
+      MAP_POINT(c1.winPos, c1.clipPos, ctx->Viewport);
+      _mesa_update_hitflag(ctx, c0.winPos.z);
+      _mesa_update_hitflag(ctx, c1.winPos.z);
+   }
+}
+
+/**
+ * \brief Select line.
+ * 
+ * \param v0 first vertex.
+ * \param v1 second vertex.
+ * \param v2 third vertex.
+ *
+ * If the clipped polygon is visible then maps the vertices into window
+ * coordinates and calls _mesa_update_hitflag().
+ */
+static void
+select_triangle(const vertex *v0,
+		const vertex *v1,
+		const vertex *v2)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   vertex vlist[3], vclipped[8];
+   GLuint i, n;
+
+   vlist[0] = *v0;
+   vlist[1] = *v1;
+   vlist[2] = *v2;
+   n = clip_polygon(vlist, 3, vclipped);
+   for (i = 0; i < n; i++) {
+      MAP_POINT(vclipped[i].winPos, vclipped[i].clipPos, ctx->Viewport);
+      _mesa_update_hitflag(ctx, vclipped[i].winPos.z);
+   }
+}
+
+/**
+ * \brief Set current vertex coordinates.
+ *
+ * \param x x vertex coordinate.
+ * \param y y vertex coordinate.
+ * \param z z vertex coordinate.
+ * \param w homogeneous coordinate.
+ * 
+ * Stores the vertex and current attributes in ::vb, transforms it into eye space and then clip space.
+ * 
+ * If a sufficient number of vertices is stored calls one of select_point(),
+ * select_line() or select_triangle(), according to the current primitive.
+ */
+static void
+radeon_select_Vertex4f(GLfloat x, GLfloat y, GLfloat z, GLfloat w)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_polygon_attrib *p = &(ctx->Polygon);
+   vertex *v = vb.vBuffer + vb.vCount;
+
+   /* store the vertex */
+   v->pos.x = x;
+   v->pos.y = y;
+   v->pos.z = z;
+   v->pos.w = w;
+   v->color.r = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0];  
+   v->color.g = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1];  
+   v->color.b = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2];  
+   v->color.a = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3];  
+   v->texCoord.s = ctx->Current.Attrib[VERT_ATTRIB_TEX0][0]; 
+   v->texCoord.t = ctx->Current.Attrib[VERT_ATTRIB_TEX0][1]; 
+
+   /* transform to eye space, then clip space */
+   TRANSFORM_POINT(v->eyePos, ctx->ModelviewMatrixStack.Top->m, v->pos);
+   TRANSFORM_POINT(v->clipPos, ctx->ProjectionMatrixStack.Top->m, v->eyePos);
+
+   switch (ctx->Driver.CurrentExecPrimitive) {
+   case GL_POINTS:
+      assert(vb.vCount == 0);
+      select_point(v);
+      break;
+   case GL_LINES:
+      if (vb.vCount == 0)
+      {
+	 vb.vCount = 1;
+      }
+      else
+      {
+	 assert(vb.vCount == 1);
+	 select_line(vb.vBuffer + 0, vb.vBuffer + 1);
+	 vb.vCount = 0;
+      }
+      break;
+   case GL_LINE_STRIP:
+      if (vb.vCount == 0)
+      {
+	 vb.vCount = 1;
+      }
+      else
+      {
+	 assert(vb.vCount == 1);
+	 select_line(vb.vBuffer + 0, vb.vBuffer + 1);
+	 vb.vBuffer[0] = vb.vBuffer[1];
+	 /* leave vb.vCount at 1 */
+      }
+      break;
+   case GL_LINE_LOOP:
+      if (vb.vCount == 0)
+      {
+	 vb.vCount = 1;
+	 vb.partialLineLoop = GL_FALSE;
+      }
+      else if (vb.vCount == 1)
+      {
+	 select_line(vb.vBuffer + 0, vb.vBuffer + 1);
+	 vb.partialLineLoop = GL_TRUE;
+	 vb.vCount = 2;
+      }
+      else
+      {
+	 assert(vb.vCount == 2);
+	 vb.partialLineLoop = GL_FALSE;
+	 select_line(vb.vBuffer + 1, vb.vBuffer + 2);
+	 vb.vBuffer[1] = vb.vBuffer[2];
+	 /* leave vb.vCount at 2 */
+      }
+      break;
+   case GL_TRIANGLES:
+      if (vb.vCount == 0 || vb.vCount == 1)
+      {
+	 vb.vCount++;
+      }
+      else
+      {
+	 assert(vb.vCount == 2);
+	 select_triangle(vb.vBuffer + 0, vb.vBuffer + 1, vb.vBuffer + 2);
+	 vb.vCount = 0;
+      }
+      break;
+   case GL_TRIANGLE_STRIP:
+      if (vb.vCount == 0 || vb.vCount == 1)
+      {
+	 vb.vCount++;
+      }
+      else if (vb.vCount == 2)
+      {
+	 select_triangle(vb.vBuffer + 0, vb.vBuffer + 1, vb.vBuffer + 2);
+	 vb.vCount = 3;
+      }
+      else
+      {
+	 assert(vb.vCount == 3);
+	 select_triangle(vb.vBuffer + 1, vb.vBuffer + 3, vb.vBuffer + 2);
+	 vb.vBuffer[0] = vb.vBuffer[2];
+	 vb.vBuffer[1] = vb.vBuffer[3];
+	 vb.vCount = 2;
+      }
+      break;		
+   case GL_TRIANGLE_FAN:
+      if (vb.vCount == 0 || vb.vCount == 1)
+      {
+	 vb.vCount++;
+      }
+      else
+      {
+	 assert(vb.vCount == 2);
+	 select_triangle(vb.vBuffer + 0, vb.vBuffer + 1, vb.vBuffer + 2);
+	 vb.vBuffer[1] = vb.vBuffer[2];
+	 /* leave vb.vCount = 2 */
+      }
+      break;
+   case GL_QUADS:
+      if (vb.vCount < 3)
+      {
+	 vb.vCount++;
+      }
+      else
+      {
+	 assert(vb.vCount == 3);
+	 select_triangle(vb.vBuffer + 0, vb.vBuffer + 1, vb.vBuffer + 2);
+	 select_triangle(vb.vBuffer + 0, vb.vBuffer + 2, vb.vBuffer + 3);
+	 vb.vCount = 0;
+      }
+      break;		
+   case GL_QUAD_STRIP:
+      if (vb.vCount < 3)
+      {
+	 vb.vCount++;
+      }
+      else
+      {
+	 assert(vb.vCount == 3);
+	 select_triangle(vb.vBuffer + 0, vb.vBuffer + 1, vb.vBuffer + 2);
+	 select_triangle(vb.vBuffer + 1, vb.vBuffer + 3, vb.vBuffer + 2);
+	 vb.vBuffer[0] = vb.vBuffer[2];
+	 vb.vBuffer[1] = vb.vBuffer[3];
+	 vb.vCount = 2;
+      }
+      break;		
+   case GL_POLYGON:
+      switch (p->FrontMode) {
+      case GL_POINT:
+	 assert(vb.vCount == 0);
+	 select_point(v);
+	 break;
+      case GL_LINE:
+	 if (vb.vCount == 0)
+	 {
+	    vb.vCount = 1;
+	    vb.partialLineLoop = GL_FALSE;
+	 }
+	 else if (vb.vCount == 1)
+	 {
+	    select_line(vb.vBuffer + 0, vb.vBuffer + 1);
+	    vb.partialLineLoop = GL_TRUE;
+	    vb.vCount = 2;
+	 }
+	 else
+	 {
+	    assert(vb.vCount == 2);
+	    vb.partialLineLoop = GL_FALSE;
+	    select_line(vb.vBuffer + 1, vb.vBuffer + 2);
+	    vb.vBuffer[1] = vb.vBuffer[2];
+	    /* leave vb.vCount at 2 */
+	 }
+	 break;
+      case GL_FILL:
+	 /* draw as a tri-fan */
+	 if (vb.vCount == 0 || vb.vCount == 1)
+	 {
+	    vb.vCount++;
+	 }
+	 else
+	 {
+	    assert(vb.vCount == 2);
+	    select_triangle(vb.vBuffer + 0, vb.vBuffer + 1, vb.vBuffer + 2);
+	    vb.vBuffer[1] = vb.vBuffer[2];
+	    /* leave vb.vCount = 2 */
+	 }
+	 break;
+      default:
+	 ; /* impossible */
+      }
+      break;
+   default:
+      ; /* outside begin/end -- no action required */
+   }
+}
+
+/**
+ * \brief Calls radeon_select_Vertex4f().
+ */
+static void radeon_select_Vertex2f(GLfloat x, GLfloat y)
+{
+   radeon_select_Vertex4f(x, y, 0.0, 1.0);
+}
+
+/**
+ * \brief Calls radeon_select_Vertex4f().
+ */
+static void radeon_select_Vertex2fv(const GLfloat * v)
+{
+   radeon_select_Vertex4f(v[0], v[1], 0.0, 1.0);
+}
+
+/**
+ * \brief Calls radeon_select_Vertex4f().
+ */
+static void radeon_select_Vertex3f(GLfloat x, GLfloat y, GLfloat z)
+{
+   radeon_select_Vertex4f(x, y, z, 1.0);
+}
+
+/**
+ * \brief Calls radeon_select_Vertex4f().
+ */
+static void radeon_select_Vertex3fv(const GLfloat * v)
+{
+   radeon_select_Vertex4f(v[0], v[1], v[2], 1.0);
+}
+
+
+/**
+ * \brief Set current vertex color.
+ *
+ * \param r red color component.
+ * \param g gree color component.
+ * \param b blue color component.
+ * \param a alpha color component.
+ *
+ * Updates the GL context's current vertex color.
+ */
+static void radeon_select_Color4f( GLfloat r, GLfloat g,
+				   GLfloat b, GLfloat a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLfloat *dest = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = a;
+}
+
+/**
+ * \brief Calls radeon_select_Color4f().
+ */
+static void radeon_select_Color4fv( const GLfloat *v )
+{
+   radeon_select_Color4f( v[0], v[1], v[2], v[3] );
+}
+
+/**
+ * \brief Calls radeon_select_Color4f().
+ */
+static void radeon_select_Color3f( GLfloat r, GLfloat g, GLfloat b )
+{
+   radeon_select_Color4f( r, g, b, 1.0 );
+}
+
+/**
+ * \brief Calls radeon_select_Color4f().
+ */
+static void radeon_select_Color3fv( const GLfloat *v )
+{
+   radeon_select_Color4f( v[0], v[1], v[2], 1.0 );
+}
+
+/**
+ * \brief Set current vertex texture coordinates.
+ *
+ * \param s texture coordinate.
+ * \param t texture coordinate.
+ *
+ * Updates the GL context's current vertex texture coordinates.
+ */
+static __inline__ void radeon_select_TexCoord2f( GLfloat s, GLfloat t )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLfloat *dest = ctx->Current.Attrib[VERT_ATTRIB_TEX0];
+   dest[0] = s;
+   dest[1] = t;
+}
+
+/**
+ * \brief Calls radeon_select_TexCoord2f().
+ */
+static void radeon_select_TexCoord2fv( const GLfloat *v )
+{
+   radeon_select_TexCoord2f( v[0], v[1] );
+}
+
+
+/**
+ * \brief Process glBegin().
+ *
+ * \param mode primitive.
+ */ 
+static void radeon_select_Begin(GLenum mode)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (mode > GL_POLYGON) {
+      _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" );
+      return;
+   }
+
+   if (ctx->Driver.CurrentExecPrimitive != GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
+      return;
+   }
+
+   ctx->Driver.CurrentExecPrimitive = mode;
+
+   vb.vCount = 0;
+   vb.lineReset = GL_TRUE;
+   vb.partialLineLoop = GL_FALSE;
+}
+
+/**
+ * \brief Process glEnd().
+ */
+static void radeon_select_End(void)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if ( (ctx->Driver.CurrentExecPrimitive == GL_LINE_LOOP ||
+	 (ctx->Driver.CurrentExecPrimitive == GL_POLYGON && 
+	  ctx->Polygon.FrontMode == GL_LINE))
+	&& vb.vCount == 2 )
+   {
+      /* draw the last line segment */
+      if (vb.partialLineLoop)
+	 select_line(vb.vBuffer + 1, vb.vBuffer + 0);
+      else
+	 select_line(vb.vBuffer + 2, vb.vBuffer + 0);
+   }
+
+   ctx->Driver.CurrentExecPrimitive = GL_POLYGON+1;
+}
+
+
+/**
+ * \brief Flush vertices.
+ * 
+ * \param ctx GL context.
+ * \param flags not used.
+ *
+ * Nothing much to do here, besides marking the vertices as flushed, as we
+ * don't buffer anything.
+ */
+static void radeonSelectFlushVertices( GLcontext *ctx, GLuint flags )
+{
+   ctx->Driver.NeedFlush = 0;
+}
+
+/**
+ * \brief Install the select callbacks.
+ *
+ * \param ctx GL context.
+ *
+ * Installs the glBegin()/glEnd() associated select callbacks into the glapi
+ * table.
+ */
+void radeon_select_Install( GLcontext *ctx )
+{
+   struct _glapi_table *exec = ctx->Exec;
+
+   exec->Color3f = radeon_select_Color3f;
+   exec->Color3fv = radeon_select_Color3fv;
+   exec->Color4f = radeon_select_Color4f;
+   exec->Color4fv = radeon_select_Color4fv;
+   exec->TexCoord2f = radeon_select_TexCoord2f;
+   exec->TexCoord2fv = radeon_select_TexCoord2fv;
+   exec->Vertex2f = radeon_select_Vertex2f;
+   exec->Vertex2fv = radeon_select_Vertex2fv;
+   exec->Vertex3f = radeon_select_Vertex3f;
+   exec->Vertex3fv = radeon_select_Vertex3fv;
+   exec->Begin = radeon_select_Begin;
+   exec->End = radeon_select_End;
+
+   ctx->Driver.FlushVertices = radeonSelectFlushVertices;
+}
+/*@}*/
+
+
+
+/**
+ * \brief Set rasterization mode.
+ *
+ * \param ctx GL context.
+ * \param mode rasterization mode. Supports GL_RENDER or
+ *
+ * If mode is GL_RENDER, calls either radeonVtxfmtInit() or
+ * radeon_noop_Install depending on whether the application has focus
+ * (i.e., a fullscreen-cliprect) or not.  If mode is GL_SELECT, calls
+ * radeon_select_Install().
+ */
+static void radeonRenderMode( GLcontext *ctx, GLenum mode )
+{
+   switch (mode) {
+   case GL_RENDER:
+      radeonVtxfmtInit( ctx );
+      break;
+   case GL_SELECT:
+      radeon_select_Install( ctx );
+      break;
+   default:
+      break;
+   }
+}
+
+/**
+ * \brief Setup the GL context driver callbacks.
+ *
+ * \param ctx GL context.
+ *
+ * \sa Called by radeonCreateContext().
+ */
+void radeonInitSelect( GLcontext *ctx )
+{
+   ctx->Driver.RenderMode = radeonRenderMode;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_subset_tex.c b/src/mesa/drivers/dri/radeon/radeon_subset_tex.c
new file mode 100644
index 0000000000..e401779513
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_subset_tex.c
@@ -0,0 +1,1018 @@
+/**
+ * \file radeon_subset_tex.c
+ * \brief Texturing.
+ *
+ * \author Gareth Hughes <gareth@valinux.com>
+ * \author Brian Paul <brianp@valinux.com>
+ */
+
+/*
+ * Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+ *                      VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c,v 1.6 2002/09/16 18:05:20 eich Exp $ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "colormac.h"
+#include "context.h"
+#include "enums.h"
+#include "image.h"
+#include "simple_list.h"
+#include "texformat.h"
+#include "texstore.h"
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_subset.h"
+
+#include <errno.h>
+#include <stdio.h>
+
+
+/**
+ * \brief Destroy hardware state associated with a texture.
+ *
+ * \param rmesa Radeon context.
+ * \param t Radeon texture object to be destroyed.
+ *
+ * Frees the memory associated with the texture and if the texture is bound to
+ * a texture unit cleans the associated hardware state.
+ */
+void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
+{
+   if ( t->memBlock ) {
+      mmFreeMem( t->memBlock );
+      t->memBlock = NULL;
+   }
+
+   if ( t->tObj )
+      t->tObj->DriverData = NULL;
+
+   if ( rmesa ) {
+      if ( t == rmesa->state.texture.unit[0].texobj ) {
+         rmesa->state.texture.unit[0].texobj = NULL;
+	 remove_from_list( &rmesa->hw.tex[0] );
+	 make_empty_list( &rmesa->hw.tex[0] );
+      }
+   }
+
+   remove_from_list( t );
+   FREE( t );
+}
+
+
+/**
+ * \brief Keep track of swapped out texture objects.
+ * 
+ * \param rmesa Radeon context.
+ * \param t Radeon texture object.
+ *
+ * Frees the memory associated with the texture, marks all mipmap images in
+ * the texture as dirty and add it to the radeon_texture::swapped list.
+ */
+static void radeonSwapOutTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
+{
+   if ( t->memBlock ) {
+      mmFreeMem( t->memBlock );
+      t->memBlock = NULL;
+   }
+
+   t->dirty_images = ~0;
+   move_to_tail( &rmesa->texture.swapped, t );
+}
+
+
+/**
+ * Texture space has been invalidated.
+ *
+ * \param rmesa Radeon context.
+ * \param heap texture heap number.
+ * 
+ * Swaps out every texture in the specified heap.
+ */
+void radeonAgeTextures( radeonContextPtr rmesa, int heap )
+{
+   radeonTexObjPtr t, tmp;
+
+   foreach_s ( t, tmp, &rmesa->texture.objects[heap] ) 
+      radeonSwapOutTexObj( rmesa, t );
+}
+
+
+/***************************************************************/
+/** \name Texture image conversions
+ */
+/*@{*/
+
+/**
+ * \brief Upload texture image.
+ *
+ * \param rmesa Radeon context.
+ * \param t Radeon texture object.
+ * \param level level of the image to take the sub-image.
+ * \param x sub-image abscissa.
+ * \param y sub-image ordinate.
+ * \param width sub-image width.
+ * \param height sub-image height.
+ *
+ * Fills in a drmRadeonTexture and drmRadeonTexImage structures and uploads the
+ * texture via the DRM_RADEON_TEXTURE ioctl, aborting in case of failure.
+ */
+static void radeonUploadSubImage( radeonContextPtr rmesa,
+				  radeonTexObjPtr t, GLint level,
+				  GLint x, GLint y, GLint width, GLint height )
+{
+   struct gl_texture_image *texImage;
+   GLint ret;
+   drmRadeonTexture tex;
+   drmRadeonTexImage tmp;
+
+   level += t->firstLevel;
+   texImage = t->tObj->Image[level];
+
+   if ( !texImage || !texImage->Data ) 
+      return;
+
+   t->image[level].data = texImage->Data;
+
+   tex.offset = t->bufAddr;
+   tex.pitch = (t->image[0].width * texImage->TexFormat->TexelBytes) / 64;
+   tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK;
+   tex.width = texImage->Width;
+   tex.height = texImage->Height;
+   tex.image = &tmp;
+
+   memcpy( &tmp, &t->image[level], sizeof(drmRadeonTexImage) );
+
+   do {
+      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
+                                 &tex, sizeof(drmRadeonTexture) );
+   } while ( ret && errno == EAGAIN );
+
+   if ( ret ) {
+      UNLOCK_HARDWARE( rmesa );
+      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
+      exit( 1 );
+   }
+}
+
+/**
+ * \brief Upload texture images.
+ *
+ * This might require removing our own and/or other client's texture objects to
+ * make room for these images.
+ *
+ * \param rmesa Radeon context.
+ * \param tObj texture object to upload.
+ *
+ * Sets the matching hardware texture format. Calculates which mipmap levels to
+ * send, depending of the base image size, GL_TEXTURE_MIN_LOD,
+ * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL and the
+ * Radeon offset rules. Kicks out textures until the requested texture fits,
+ * sets the texture hardware state and, while holding the hardware lock,
+ * uploads any images that are new.
+ */
+static void radeonSetTexImages( radeonContextPtr rmesa,
+				struct gl_texture_object *tObj )
+{
+   radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
+   const struct gl_texture_image *baseImage = tObj->Image[tObj->BaseLevel];
+   GLint totalSize;
+   GLint texelsPerDword = 0, blitWidth = 0, blitPitch = 0;
+   GLint x, y, width, height;
+   GLint i;
+   GLint firstLevel, lastLevel, numLevels;
+   GLint log2Width, log2Height;
+   GLuint txformat = 0;
+
+   /* This code cannot be reached once we have lost focus
+    */
+   assert(rmesa->radeonScreen->buffers);
+
+   /* Set the hardware texture format
+    */
+   switch (baseImage->TexFormat->MesaFormat) {
+   case MESA_FORMAT_I8:
+      txformat = RADEON_TXFORMAT_I8;
+      texelsPerDword = 4;
+      blitPitch = 64;
+      break;
+   case MESA_FORMAT_RGBA8888:
+      txformat = RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+      texelsPerDword = 1;
+      blitPitch = 16;
+      break;
+   case MESA_FORMAT_RGB565:
+      txformat = RADEON_TXFORMAT_RGB565;
+      texelsPerDword = 2;
+      blitPitch = 32;
+      break;
+   default:
+      _mesa_problem(NULL, "unexpected texture format in radeonTexImage2D");
+      return;
+   }
+
+   t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
+		       RADEON_TXFORMAT_ALPHA_IN_MAP);
+   t->pp_txformat |= txformat;
+
+
+   /* Select the larger of the two widths for our global texture image
+    * coordinate space.  As the Radeon has very strict offset rules, we
+    * can't upload mipmaps directly and have to reference their location
+    * from the aligned start of the whole image.
+    */
+   blitWidth = MAX2( baseImage->Width, blitPitch );
+
+   /* Calculate mipmap offsets and dimensions.
+    */
+   totalSize = 0;
+   x = 0;
+   y = 0;
+
+   /* Compute which mipmap levels we really want to send to the hardware.
+    * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+    * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+    * Yes, this looks overly complicated, but it's all needed.
+    */
+   firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5);
+   firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+   lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5);
+   lastLevel = MAX2(lastLevel, tObj->BaseLevel);
+   lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
+   lastLevel = MIN2(lastLevel, tObj->MaxLevel);
+   lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+
+   /* save these values */
+   t->firstLevel = firstLevel;
+   t->lastLevel = lastLevel;
+
+   numLevels = lastLevel - firstLevel + 1;
+
+   log2Width = tObj->Image[firstLevel]->WidthLog2;
+   log2Height = tObj->Image[firstLevel]->HeightLog2;
+
+   for ( i = 0 ; i < numLevels ; i++ ) {
+      const struct gl_texture_image *texImage = tObj->Image[i + firstLevel];
+      if ( !texImage )
+	 break;
+
+      width = texImage->Width;
+      height = texImage->Height;
+
+      /* Texture images have a minimum pitch of 32 bytes (half of the
+       * 64-byte minimum pitch for blits).  For images that have a
+       * width smaller than this, we must pad each texture image
+       * scanline out to this amount.
+       */
+      if ( width < blitPitch / 2 ) {
+	 width = blitPitch / 2;
+      }
+
+      totalSize += width * height * baseImage->TexFormat->TexelBytes;
+      ASSERT( (totalSize & 31) == 0 );
+
+      while ( width < blitWidth && height > 1 ) {
+	 width *= 2;
+	 height /= 2;
+      }
+
+      ASSERT(i < RADEON_MAX_TEXTURE_LEVELS);
+      t->image[i].x = x;
+      t->image[i].y = y;
+      t->image[i].width  = width;
+      t->image[i].height = height;
+
+      /* While blits must have a pitch of at least 64 bytes, mipmaps
+       * must be aligned on a 32-byte boundary (just like each texture
+       * image scanline).
+       */
+      if ( width >= blitWidth ) {
+	 y += height;
+      } else {
+	 x += width;
+	 if ( x >= blitWidth ) {
+	    x = 0;
+	    y++;
+	 }
+      }
+   }
+
+   /* Align the total size of texture memory block.
+    */
+   t->totalSize = (totalSize + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+
+   /* Hardware state:
+    */
+   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
+   t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT;
+
+   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
+		       RADEON_TXFORMAT_HEIGHT_MASK);
+   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
+		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
+   t->dirty_state = TEX_ALL;
+
+   /* Update the local texture LRU.
+    */
+   move_to_head( &rmesa->texture.objects[0], t );
+
+   LOCK_HARDWARE( rmesa );
+
+   /* Kick out textures until the requested texture fits */
+   while ( !t->memBlock ) {
+      t->memBlock = mmAllocMem( rmesa->texture.heap[0], t->totalSize, 12, 0);
+	 
+      if (!t->memBlock)
+	 radeonSwapOutTexObj( rmesa, rmesa->texture.objects[0].prev );
+	 
+   }
+
+   /* Set the base offset of the texture image */
+   t->bufAddr = rmesa->radeonScreen->texOffset[0] + t->memBlock->ofs;
+   t->pp_txoffset = t->bufAddr;
+
+   /* Upload any images that are new 
+    */
+   for ( i = 0 ; i < numLevels ; i++ ) {
+      if ( t->dirty_images & (1 << i) ) {
+	 radeonUploadSubImage( rmesa, t, i, 0, 0,
+			       t->image[i].width, t->image[i].height );
+      }
+   }
+
+   rmesa->texture.age[0] = ++rmesa->sarea->texAge[0]; 
+   UNLOCK_HARDWARE( rmesa );
+   t->dirty_images = 0;
+}
+
+/*@}*/
+
+
+/******************************************************************/
+/** \name Texture combine functions
+ */
+/*@{*/
+
+enum {
+   RADEON_DISABLE	= 0, /**< \brief disabled */
+   RADEON_REPLACE	= 1, /**< \brief replace function */
+   RADEON_MODULATE	= 2, /**< \brief modulate function */
+   RADEON_DECAL		= 3, /**< \brief decal function */
+   RADEON_BLEND		= 4, /**< \brief blend function */
+   RADEON_MAX_COMBFUNC	= 5  /**< \brief max number of combine functions */
+} ;
+
+
+/**
+ * \brief Color combine function hardware state table.
+ */
+static GLuint radeon_color_combine[][RADEON_MAX_COMBFUNC] =
+{
+   /* Unit 0:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_CURRENT_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_REPLACE = 0x00802800
+       */
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_T0_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_MODULATE = 0x00800142
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_T0_COLOR |
+       RADEON_COLOR_ARG_C_ZERO |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_DECAL = 0x008c2d42
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_T0_COLOR |
+       RADEON_COLOR_ARG_C_T0_ALPHA |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_BLEND = 0x008c2902
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_TFACTOR_COLOR |
+       RADEON_COLOR_ARG_C_T0_COLOR |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+   },
+
+};
+
+/**
+ * \brief Alpha combine function hardware state table.
+ */
+static GLuint radeon_alpha_combine[][RADEON_MAX_COMBFUNC] =
+{
+   /* Unit 0:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_REPLACE = 0x00800500
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_T0_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_MODULATE = 0x00800051
+       */
+      (RADEON_ALPHA_ARG_A_CURRENT_ALPHA |
+       RADEON_ALPHA_ARG_B_T0_ALPHA |
+       RADEON_ALPHA_ARG_C_ZERO |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_DECAL = 0x00800100
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_BLEND = 0x00800051
+       */
+      (RADEON_ALPHA_ARG_A_CURRENT_ALPHA |
+       RADEON_ALPHA_ARG_B_TFACTOR_ALPHA |
+       RADEON_ALPHA_ARG_C_T0_ALPHA |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+   },
+
+};
+
+/*@}*/
+
+
+/******************************************************************/
+/** \name Texture unit state management
+ */
+/*@{*/
+
+/**
+ * \brief Update the texture environment.
+ *
+ * \param ctx GL context
+ * \param unit texture unit to update.
+ *
+ * Sets the state of the RADEON_TEX_PP_TXCBLEND and RADEON_TEX_PP_TXABLEND
+ * registers using the ::radeon_color_combine and ::radeon_alpha_combine tables,
+ * and informs of the state change.
+ */
+static void radeonUpdateTextureEnv( GLcontext *ctx, int unit )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   const struct gl_texture_object *tObj = texUnit->_Current;
+   const GLenum format = tObj->Image[tObj->BaseLevel]->Format;
+   GLuint color_combine = radeon_color_combine[unit][RADEON_DISABLE];
+   GLuint alpha_combine = radeon_alpha_combine[unit][RADEON_DISABLE];
+
+
+   /* Set the texture environment state.  Isn't this nice and clean?
+    * The Radeon will automagically set the texture alpha to 0xff when
+    * the texture format does not include an alpha component.  This
+    * reduces the amount of special-casing we have to do, alpha-only
+    * textures being a notable exception.
+    */
+   switch ( texUnit->EnvMode ) {
+   case GL_REPLACE:
+      switch ( format ) {
+      case GL_RGBA:
+      case GL_INTENSITY:
+	 color_combine = radeon_color_combine[unit][RADEON_REPLACE];
+	 alpha_combine = radeon_alpha_combine[unit][RADEON_REPLACE];
+	 break;
+      case GL_RGB:
+	 color_combine = radeon_color_combine[unit][RADEON_REPLACE];
+	 alpha_combine = radeon_alpha_combine[unit][RADEON_DISABLE];
+         break;
+      default:
+	 break;
+      }
+      break;
+
+   case GL_MODULATE:
+      switch ( format ) {
+      case GL_RGBA:
+      case GL_INTENSITY:
+	 color_combine = radeon_color_combine[unit][RADEON_MODULATE];
+	 alpha_combine = radeon_alpha_combine[unit][RADEON_MODULATE];
+	 break;
+      case GL_RGB:
+	 color_combine = radeon_color_combine[unit][RADEON_MODULATE];
+	 alpha_combine = radeon_alpha_combine[unit][RADEON_DISABLE];
+	 break;
+      default:
+	 break;
+      }
+      break;
+
+   case GL_DECAL:
+      switch ( format ) {
+      case GL_RGBA:
+      case GL_RGB:
+	 color_combine = radeon_color_combine[unit][RADEON_DECAL];
+	 alpha_combine = radeon_alpha_combine[unit][RADEON_DISABLE];
+	 break;
+      case GL_INTENSITY:
+	 color_combine = radeon_color_combine[unit][RADEON_DISABLE];
+	 alpha_combine = radeon_alpha_combine[unit][RADEON_DISABLE];
+	 break;
+      default:
+	 break;
+      }
+      break;
+
+   case GL_BLEND:
+      switch ( format ) {
+      case GL_RGBA:
+      case GL_RGB:
+	 color_combine = radeon_color_combine[unit][RADEON_BLEND];
+	 alpha_combine = radeon_alpha_combine[unit][RADEON_MODULATE];
+	 break;
+      case GL_INTENSITY:
+	 color_combine = radeon_color_combine[unit][RADEON_BLEND];
+	 alpha_combine = radeon_alpha_combine[unit][RADEON_BLEND];
+	 break;
+      default:
+	 break;
+      }
+      break;
+
+   default:
+      break;
+   }
+
+   if ( rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] != color_combine ||
+	rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] != alpha_combine ) {
+      RADEON_STATECHANGE( rmesa, tex[unit] );
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] = color_combine;
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] = alpha_combine;
+   }
+}
+
+
+#define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK |	\
+			      RADEON_MIN_FILTER_MASK | 		\
+			      RADEON_MAG_FILTER_MASK |		\
+			      RADEON_MAX_ANISO_MASK |		\
+			      RADEON_CLAMP_S_MASK | 		\
+			      RADEON_CLAMP_T_MASK)
+
+#define TEXOBJ_TXFORMAT_MASK (RADEON_TXFORMAT_WIDTH_MASK |	\
+			      RADEON_TXFORMAT_HEIGHT_MASK |	\
+			      RADEON_TXFORMAT_FORMAT_MASK |	\
+			      RADEON_TXFORMAT_ALPHA_IN_MAP)
+
+
+
+void radeonUpdateTextureState( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[0];
+
+   if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
+      struct gl_texture_object *tObj = texUnit->_Current;
+      radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+
+      /* Upload teximages (not pipelined)
+       */
+      if ( t->dirty_images ) {
+	 RADEON_FIREVERTICES( rmesa );
+	 radeonSetTexImages( rmesa, tObj );
+      }
+
+      /* Update state if this is a different texture object to last
+       * time.
+       */
+      if ( rmesa->state.texture.unit[0].texobj != t ) {
+	 rmesa->state.texture.unit[0].texobj = t;
+	 t->dirty_state |= 1<<0;
+	 move_to_head( &rmesa->texture.objects[0], t );
+      }
+
+      if (t->dirty_state) {
+	 GLuint *cmd = RADEON_DB_STATE( tex[0] );
+
+	 cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
+	 cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+	 cmd[TEX_PP_TXFILTER] |= t->pp_txfilter & TEXOBJ_TXFILTER_MASK;
+	 cmd[TEX_PP_TXFORMAT] |= t->pp_txformat & TEXOBJ_TXFORMAT_MASK;
+	 cmd[TEX_PP_TXOFFSET] = t->pp_txoffset;
+	 cmd[TEX_PP_BORDER_COLOR] = t->pp_border_color;
+	 
+	 RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.tex[0] );
+	 t->dirty_state = 0;
+      }
+
+      /* Newly enabled?
+       */
+      if (!(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & RADEON_TEX_0_ENABLE)) {
+	 RADEON_STATECHANGE( rmesa, ctx );
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= (RADEON_TEX_0_ENABLE | 
+					    RADEON_TEX_BLEND_0_ENABLE);
+
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_ST0;
+      }
+
+      radeonUpdateTextureEnv( ctx, 0 );
+   }
+   else if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<0)) {
+      /* Texture unit disabled */
+      rmesa->state.texture.unit[0].texobj = 0;
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= 
+	 ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << 0);
+
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_TCL_VTX_ST0 |
+						RADEON_TCL_VTX_Q0);
+   }
+}
+
+
+
+/**
+ * \brief Choose texture format.
+ *
+ * \param ctx GL context.
+ * \param internalFormat texture internal format.
+ * \param format pixel format. Not used.
+ * \param type pixel data type. Not used.
+ *
+ * \return pointer to chosen texture format.
+ *
+ * Returns a pointer to one of the Mesa texture formats which is supported by
+ * Radeon and matches the internal format.
+ */
+static const struct gl_texture_format *
+radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+                           GLenum format, GLenum type )
+{
+   switch ( internalFormat ) {
+   case GL_RGBA:
+   case GL_RGBA8:
+      return &_mesa_texformat_rgba8888;
+
+   case GL_RGB:
+   case GL_RGB5:
+      return &_mesa_texformat_rgb565;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY8:
+      return &_mesa_texformat_i8;
+
+   default:
+      _mesa_problem(ctx, "unexpected texture format in radeonChoosTexFormat");
+      return NULL;
+   }
+}
+
+/**
+ * \brief Allocate a Radeon texture object.
+ *
+ * \param texObj texture object.
+ *
+ * \return pointer to the device specific texture object on success, or NULL on failure.
+ *
+ * Allocates and initializes a radeon_tex_obj structure to connect it to the
+ * driver private data pointer in \p texObj.
+ */
+static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj )
+{
+   radeonTexObjPtr t;
+
+   t = CALLOC_STRUCT( radeon_tex_obj );
+   if (!t)
+      return NULL;
+
+   t->tObj = texObj;
+   texObj->DriverData = t;
+   make_empty_list( t );
+   t->dirty_images = ~0;
+   return t;
+}
+
+
+/**
+ * \brief Load a texture image.
+ *
+ * \param ctx GL context.
+ * \param texObj texture object
+ * \param target target texture.
+ * \param level level of detail number.
+ * \param internalFormat internal format.
+ * \param width texture image width.
+ * \param height texture image height.
+ * \param border border width.
+ * \param format pixel format.
+ * \param type pixel data type.
+ * \param pixels image data.
+ * \param packing passed to _mesa_store_teximage2d() unchanged.
+ * \param texImage passed to _mesa_store_teximage2d() unchanged.
+ * 
+ * If there is a device specific texture object associated with the given
+ * texture object then swaps that texture out. Calls _mesa_store_teximage2d()
+ * with all other parameters unchanged.
+ */
+static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+                              GLint internalFormat,
+                              GLint width, GLint height, GLint border,
+                              GLenum format, GLenum type, const GLvoid *pixels,
+                              const struct gl_pixelstore_attrib *packing,
+                              struct gl_texture_object *texObj,
+                              struct gl_texture_image *texImage )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonTexObjPtr t = (radeonTexObjPtr)texObj->DriverData;
+
+   if ( t ) 
+      radeonSwapOutTexObj( rmesa, t );
+
+   /* Note, this will call radeonChooseTextureFormat */
+   _mesa_store_teximage2d(ctx, target, level, internalFormat,
+                          width, height, border, format, type, pixels,
+                          &ctx->Unpack, texObj, texImage);
+}
+
+/**
+ * \brief Set texture environment parameters.
+ *
+ * \param ctx GL context.
+ * \param target texture environment.
+ * \param pname texture parameter. Accepted value is GL_TEXTURE_ENV_COLOR.
+ * \param param parameter value.
+ *
+ * Updates the current unit's RADEON_TEX_PP_TFACTOR register and informs of the
+ * state change.
+ */
+static void radeonTexEnv( GLcontext *ctx, GLenum target,
+			  GLenum pname, const GLfloat *param )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+   switch ( pname ) {
+   case GL_TEXTURE_ENV_COLOR: {
+      GLubyte c[4];
+      GLuint envColor;
+      UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
+      envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+      if ( rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] != envColor ) {
+	 RADEON_STATECHANGE( rmesa, tex[unit] );
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] = envColor;
+      }
+      break;
+   }
+
+   default:
+      return;
+   }
+}
+
+/**
+ * \brief Set texture parameter.
+ *
+ * \param ctx GL context.
+ * \param target target texture.
+ * \param texObj texture object.
+ * \param pname texture parameter.
+ * \param params parameter value.
+ * 
+ * Allocates the device specific texture object data if it doesn't exist
+ * already.
+ * 
+ * Updates the texture object radeon_tex_obj::pp_txfilter register and marks
+ * the texture state (radeon_tex_obj::dirty_state) as dirty.
+ */
+static void radeonTexParameter( GLcontext *ctx, GLenum target,
+				struct gl_texture_object *texObj,
+				GLenum pname, const GLfloat *params )
+{
+   radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
+   
+   if (!t)
+      t = radeonAllocTexObj( texObj );
+
+   switch ( pname ) {
+   case GL_TEXTURE_MIN_FILTER:
+      t->pp_txfilter &= ~RADEON_MIN_FILTER_MASK;
+      switch ( texObj->MinFilter ) {
+      case GL_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR;
+	 break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST_MIP_NEAREST;
+	 break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR_MIP_NEAREST;
+	 break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST_MIP_LINEAR;
+	 break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR_MIP_LINEAR;
+	 break;
+      }
+      break;
+
+   case GL_TEXTURE_MAG_FILTER:
+      t->pp_txfilter &= ~RADEON_MAG_FILTER_MASK;
+      switch ( texObj->MagFilter ) {
+      case GL_NEAREST:
+	 t->pp_txfilter |= RADEON_MAG_FILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 t->pp_txfilter |= RADEON_MAG_FILTER_LINEAR;
+	 break;
+      }
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+      t->pp_txfilter &= ~RADEON_CLAMP_S_MASK;
+      switch ( texObj->WrapS ) {
+      case GL_REPEAT:
+	 t->pp_txfilter |= RADEON_CLAMP_S_WRAP;
+	 break;
+      case GL_CLAMP_TO_EDGE:
+	 t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_LAST;
+	 break;
+      }
+      break;
+
+   case GL_TEXTURE_WRAP_T:
+      t->pp_txfilter &= ~RADEON_CLAMP_T_MASK;
+      switch ( texObj->WrapT ) {
+      case GL_REPEAT:
+	 t->pp_txfilter |= RADEON_CLAMP_T_WRAP;
+	 break;
+      case GL_CLAMP_TO_EDGE:
+	 t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_LAST;
+	 break;
+      }
+      break;
+
+   default:
+      return;
+   }
+
+   /* Mark this texobj as dirty (one bit per tex unit)
+    */
+   t->dirty_state = TEX_ALL;
+}
+
+/**
+ * \brief Bind texture.
+ *
+ * \param ctx GL context.
+ * \param target not used.
+ * \param texObj texture object.
+ * 
+ * Allocates the device specific texture data if it doesn't exist already.
+ */
+static void radeonBindTexture( GLcontext *ctx, GLenum target,
+			       struct gl_texture_object *texObj )
+{
+   if ( !texObj->DriverData ) 
+      radeonAllocTexObj( texObj );
+}
+
+/**
+ * \brief Delete texture.
+ *
+ * \param ctx GL context.
+ * \param texObj texture object.
+ *
+ * Fires any outstanding vertices and destroy the device specific texture
+ * object.
+ */ 
+static void radeonDeleteTexture( GLcontext *ctx,
+				 struct gl_texture_object *texObj )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
+
+   if ( t ) {
+      if ( rmesa ) 
+         RADEON_FIREVERTICES( rmesa );
+      radeonDestroyTexObj( rmesa, t );
+   }
+}
+
+/**
+ * \brief Initialize context texture object data.
+ * 
+ * \param ctx GL context.
+ *
+ * Called by radeonInitTextureFuncs() to setup the context initial texture
+ * objects.
+ */
+static void radeonInitTextureObjects( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct gl_texture_object *texObj;
+   GLuint tmp = ctx->Texture.CurrentUnit;
+
+   ctx->Texture.CurrentUnit = 0;
+
+   texObj = ctx->Texture.Unit[0].Current2D;
+   radeonBindTexture( ctx, GL_TEXTURE_2D, texObj );
+   move_to_tail( &rmesa->texture.swapped,
+		 (radeonTexObjPtr)texObj->DriverData );
+
+
+   ctx->Texture.CurrentUnit = tmp;
+}
+
+/**
+ * \brief Setup the GL context driver callbacks.
+ *
+ * \param ctx GL context.
+ *
+ * \sa Called by radeonCreateContext().
+ */
+void radeonInitTextureFuncs( GLcontext *ctx )
+{
+   ctx->Driver.ChooseTextureFormat	= radeonChooseTextureFormat;
+   ctx->Driver.TexImage2D		= radeonTexImage2D;
+
+   ctx->Driver.BindTexture		= radeonBindTexture;
+   ctx->Driver.CreateTexture		= NULL; /* FIXME: Is this used??? */
+   ctx->Driver.DeleteTexture		= radeonDeleteTexture;
+   ctx->Driver.PrioritizeTexture	= NULL;
+   ctx->Driver.ActiveTexture		= NULL;
+   ctx->Driver.UpdateTexturePalette	= NULL;
+
+   ctx->Driver.TexEnv			= radeonTexEnv;
+   ctx->Driver.TexParameter		= radeonTexParameter;
+
+   radeonInitTextureObjects( ctx );
+}
+
+/*@}*/
diff --git a/src/mesa/drivers/dri/radeon/radeon_subset_vtx.c b/src/mesa/drivers/dri/radeon/radeon_subset_vtx.c
new file mode 100644
index 0000000000..aa6ec73d5b
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_subset_vtx.c
@@ -0,0 +1,989 @@
+/**
+ * \file radeon_subset_vtx.c
+ * \brief Vertex buffering.
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/*
+ * Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+ *                      Tungsten Graphics Inc., Cedar Park, Texas.
+ * 
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ */
+
+/* $XFree86$ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "api_noop.h"
+#include "context.h"
+/*#include "mmath.h" */
+#include "mtypes.h"
+#include "enums.h"
+#include "glapi.h"
+#include "colormac.h"
+#include "state.h"
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_subset.h"
+
+/**
+ * \brief Union for vertex data.
+ */
+union vertex_dword { 
+	float f; /**< \brief floating point value */
+	int i;   /**< \brief integer point value */
+};
+
+
+/**
+ * \brief Maximum number of dwords per vertex.
+ *
+ * Defined as 10 to hold: \code xyzw rgba st \endcode
+ */
+#define MAX_VERTEX_DWORDS 10
+
+
+/**
+ * \brief Global vertex buffer data.
+ */
+static struct vb_t {
+   /**
+    * \brief Notification mechanism.  
+    *
+    * These are treated as a stack to allow us to do things like build quads in
+    * temporary storage and then emit them as triangles.
+    */
+   struct {
+      GLint vertspace;         /**< \brief free vertices count */
+      GLint initial_vertspace; /**< \brief total vertices count */
+      GLint *dmaptr;           /**< \brief */
+      void (*notify)( void );  /**< \brief notification callback */
+   } stack[2];
+
+   /**
+    * \brief Storage for current vertex.
+    */
+   union vertex_dword vertex[MAX_VERTEX_DWORDS];
+
+   /**
+    * \brief Temporary storage for quads, etc.
+    */
+   union vertex_dword vertex_store[MAX_VERTEX_DWORDS * 4];
+
+   /**
+    * \name Color/texture
+    *
+    * Pointers to either vertex or ctx->Current.Attrib, depending on whether
+    * color/texture participates in the current vertex.
+    */
+   /*@{*/
+   GLfloat *floatcolorptr; /**< \brief color */
+   GLfloat *texcoordptr;   /**< \brief texture */
+   /*@}*/
+
+   /**
+    * \brief Pointer to the GL context.
+    */
+   GLcontext *context;
+
+   /**
+    * \brief Active primitive.
+    *
+    * \note May differ from ctx->Driver.CurrentExecPrimitive.
+    */
+   /*@{*/
+   GLenum prim;          /**< \brief primitive */
+   GLuint vertex_format; /**< \brief vertex format */
+   GLint vertex_size;    /**< \brief vertex size */
+   GLboolean recheck;    /**< \brief set if it's needed to validate this information */
+   /*@}*/
+} vb;
+
+
+static void radeonFlushVertices( GLcontext *, GLuint );
+
+
+/**
+ * \brief Primitive information table.
+ */
+static struct prims_t { 
+   int start,  /**< \brief vertex count for the starting primitive */
+       incr,   /**< \brief vertex increment for a further primitive */
+       hwprim; /**< \brief hardware primitive */
+} prims[10] = {
+   { 1, 1, RADEON_CP_VC_CNTL_PRIM_TYPE_POINT },
+   { 2, 2, RADEON_CP_VC_CNTL_PRIM_TYPE_LINE }, 
+   { 2, 1, RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP },
+   { 2, 1, RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP },
+   { 3, 3, RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST },
+   { 3, 1, RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP },
+   { 3, 1, RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN }, 
+   { 4, 4, RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST },
+   { 4, 2, RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP }, 
+   { 3, 1, RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN }, 
+};
+
+
+/**
+ * \brief Finish the primitive in the vertex buffer.
+ *
+ * \param rmesa Radeon context.
+ *
+ * Truncates any redundant vertices off the end of the buffer, emit the
+ * remaining vertices and advances the current DMA region.
+ */
+static void finish_prim( radeonContextPtr rmesa )
+{
+   GLuint prim_end = vb.stack[0].initial_vertspace - vb.stack[0].vertspace;
+   
+   /* Too few vertices? (eg: 2 vertices for a triangles prim?)
+    */
+   if (prim_end < prims[vb.prim].start) 
+      return;
+
+   /* Drop redundant vertices off end of primitive.  (eg: 5 vertices
+    * for triangles prim?)
+    */
+   prim_end -= (prim_end - prims[vb.prim].start) % prims[vb.prim].incr;
+
+   radeonEmitVertexAOS( rmesa, vb.vertex_size, GET_START(&rmesa->dma.current) );
+
+   radeonEmitVbufPrim( rmesa, vb.vertex_format,
+		       prims[vb.prim].hwprim | rmesa->tcl.tcl_flag, 
+		       prim_end );
+
+   rmesa->dma.current.ptr = 
+      rmesa->dma.current.start += prim_end * vb.vertex_size * 4; 
+}
+
+
+/**
+ * \brief Copy a vertex from the current DMA region
+ *
+ * \param rmesa Radeon context.
+ * \param n vertex index relative to the current DMA region.
+ * \param dst destination pointer.
+ *
+ * Used internally by copy_dma_verts().
+ */
+static void copy_vertex( radeonContextPtr rmesa, GLuint n, GLfloat *dst )
+{
+   GLuint i;
+   GLfloat *src = (GLfloat *)(rmesa->dma.current.address + 
+			      rmesa->dma.current.ptr + 
+			      n * vb.vertex_size * 4);
+
+   for (i = 0 ; i < vb.vertex_size; i++) 
+      dst[i] = src[i];
+}
+
+
+/**
+ * \brief Copy last vertices from the current DMA buffer to resume in a new buffer.
+ *
+ * \param rmesa Radeon context.
+ * \param tmp destination buffer.
+ *
+ * Takes from the current DMA buffer the last vertices necessary to resume in a
+ * new buffer, according to the current primitive.  Uses internally
+ * copy_vertex() for the vertex copying.
+ * 
+ */
+static GLuint copy_dma_verts( radeonContextPtr rmesa, 
+			      GLfloat (*tmp)[MAX_VERTEX_DWORDS] )
+{
+   GLuint ovf, i;
+   GLuint nr = vb.stack[0].initial_vertspace - vb.stack[0].vertspace;
+
+   switch( vb.prim )
+   {
+   case GL_POINTS:
+      return 0;
+   case GL_LINES:
+      ovf = nr&1;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_LINE_STRIP:
+      if (nr == 0) 
+	 return 0;
+      copy_vertex( rmesa, nr-1, tmp[0] );
+      return 1;
+   case GL_LINE_LOOP:
+   case GL_TRIANGLE_FAN:
+   case GL_POLYGON:
+      if (nr == 0) 
+	 return 0;
+      else if (nr == 1) {
+	 copy_vertex( rmesa, 0, tmp[0] );
+	 return 1;
+      } else {
+	 copy_vertex( rmesa, 0, tmp[0] );
+	 copy_vertex( rmesa, nr-1, tmp[1] );
+	 return 2;
+      }
+   case GL_TRIANGLES:
+      ovf = nr % 3;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_QUADS:
+      ovf = nr % 4;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_TRIANGLE_STRIP:
+   case GL_QUAD_STRIP:
+      ovf = MIN2(nr, 2);
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   default:
+      return 0;
+   }
+}
+
+static void notify_wrap_buffer( void );
+
+/**
+ * \brief Resets the vertex buffer notification mechanism.
+ *
+ * Fills in vb_t::stack with the values from the current DMA region in
+ * radeon_dma::current and sets the notification callback to
+ * notify_wrap_buffer().
+ */
+static void reset_notify( void )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( vb.context );
+
+   vb.stack[0].dmaptr = (int *)(rmesa->dma.current.address +
+				rmesa->dma.current.ptr);
+   vb.stack[0].vertspace = ((rmesa->dma.current.end - rmesa->dma.current.ptr) / 
+			    (vb.vertex_size * 4));
+   vb.stack[0].vertspace &= ~1;	/* even numbers only -- avoid tristrip parity */
+   vb.stack[0].initial_vertspace = vb.stack[0].vertspace;
+   vb.stack[0].notify = notify_wrap_buffer;
+}      
+
+/**
+ * \brief Full buffer notification callback.
+ *
+ * Makes a copy of the necessary vertices of the current buffer via
+ * copy_dma_verts(), gets and resets new buffer via radeon and re-emits the
+ * saved vertices.
+ */
+static void notify_wrap_buffer( void )
+{
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat tmp[3][MAX_VERTEX_DWORDS];
+   GLuint i, nrverts = 0;
+
+   /* Copy vertices out of dma:
+    */
+   nrverts = copy_dma_verts( rmesa, tmp );
+   finish_prim( rmesa );
+
+   /* Get new buffer
+    */
+   radeonRefillCurrentDmaRegion( rmesa );
+
+   /* Reset vertspace[0], dmaptr
+    */
+   reset_notify();
+
+   /* Reemit saved vertices
+    */
+   for (i = 0 ; i < nrverts; i++) {
+      memcpy( vb.stack[0].dmaptr, tmp[i], vb.vertex_size * 4 );
+      vb.stack[0].dmaptr += vb.vertex_size;
+      vb.stack[0].vertspace--;
+   }
+}
+
+
+static void notify_noop( void )
+{
+   vb.stack[0].dmaptr = (int *)vb.vertex;
+   vb.stack[0].notify = notify_noop;
+   vb.stack[0].vertspace = 1;
+}
+
+/**
+ * \brief Pop the notification mechanism stack.
+ *
+ * Simply copy the second stack array element into the first.
+ *
+ * \sa vb_t::stack and push_notify().
+ */
+static void pop_notify( void )
+{
+   vb.stack[0] = vb.stack[1];
+}
+
+/**
+ * \brief Push the notification mechanism stack.
+ *
+ * \param notify new notify callback for the stack head.
+ * \param space space available for vertices in \p store.
+ * \param store buffer where to store the vertices.
+ * 
+ * Copy the second stack array element into the first and makes the stack head
+ * use the given resources.
+ * 
+ * \sa vb_t::stack and pop_notify().
+ */
+static void push_notify( void (*notify)( void ), int space, 
+			 union vertex_dword *store )
+{
+   vb.stack[1] = vb.stack[0];
+   vb.stack[0].notify = notify;
+   vb.stack[0].initial_vertspace = space;
+   vb.stack[0].vertspace = space;
+   vb.stack[0].dmaptr = (int *)store;
+}
+
+
+/**
+ * \brief Emit a stored vertex (in vb_t::vertex_store) to DMA.
+ *
+ * \param v vertex index.
+ *
+ * Adds the vertex into the current vertex buffer and calls the notification
+ * callback vb_t::notify().
+ */
+static void emit_vertex( int v )
+{
+   int i, *tmp = (int *)vb.vertex_store + v * vb.vertex_size;
+   
+   for (i = 0 ; i < vb.vertex_size ; i++) 
+      *vb.stack[0].dmaptr++ = *tmp++;
+
+   if (--vb.stack[0].vertspace == 0)
+      vb.stack[0].notify();
+}
+
+
+/**
+ * \brief Emit a quad (in vb_t::vertex_store) to DMA as two triangles.
+ *
+ * \param v0 first vertex index.
+ * \param v1 second vertex index.
+ * \param v2 third vertex index.
+ * \param v3 fourth vertex index.
+ *
+ * Calls emit_vertex() to emit the triangles' vertices.
+ */
+static void emit_quad( int v0, int v1, int v2, int v3 )
+{
+   emit_vertex( v0 ); emit_vertex( v1 ); emit_vertex( v3 );
+   emit_vertex( v1 ); emit_vertex( v2 ); emit_vertex( v3 );
+}
+
+/**
+ * \brief Every fourth vertex in a quad primitive, this is called to emit it.
+ *
+ * Pops the notification stack, calls emit_quad() and pushes the notification
+ * stack again, with itself and the vb_t::vertex_store to process another four
+ * vertices.
+ */
+static void notify_quad( void )
+{
+   pop_notify();
+   emit_quad( 0, 1, 2, 3 ); 
+   push_notify( notify_quad, 4, vb.vertex_store );
+}
+
+static void notify_qstrip1( void );
+
+/**
+ * \brief After the 4th vertex, emit either a quad or a flipped quad each two
+ * vertices.
+ *
+ * Pops the notification stack, calls emit_quad() with the flipped vertices and
+ * pushes the notification stack again, with notify_qstrip1() and the
+ * vb_t::vertex_store to process another two vertices.
+ *
+ * \sa notify_qstrip1().
+ */
+static void notify_qstrip0( void )
+{
+   pop_notify();
+   emit_quad( 0, 1, 3, 2 );
+   push_notify( notify_qstrip1, 2, vb.vertex_store );
+}
+
+/**
+ * \brief After the 4th vertex, emit either a quad or a flipped quad each two
+ * vertices.
+ *
+ * Pops the notification stack, calls emit_quad() with the straight vertices
+ * and pushes the notification stack again, with notify_qstrip0() and the
+ * vb_t::vertex_store to process another two vertices.
+ *
+ * \sa notify_qstrip0().
+ */
+static void notify_qstrip1( void )
+{
+   pop_notify();
+   emit_quad( 2, 3, 1, 0 ); 
+   push_notify( notify_qstrip0, 2, vb.vertex_store + 2*vb.vertex_size );
+}
+
+/**
+ * \brief Emit the saved vertex (but hang on to it for later).
+ *
+ * Continue processing this primitive as a linestrip.
+ *
+ * Pops the notification stack and calls emit_quad with the first vertex.
+ */
+static void notify_lineloop0( void )
+{
+   pop_notify();
+   emit_vertex(0);
+}
+
+/**
+ * \brief Invalidate the current vertex format.
+ *
+ * \param ctx GL context.
+ *
+ * Sets the vb_t::recheck flag.
+ */
+void radeonVtxfmtInvalidate( GLcontext *ctx )
+{
+   vb.recheck = GL_TRUE;
+}
+
+
+/**
+ * \brief Validate the vertex format from the context.
+ *
+ * \param ctx GL context.
+ *
+ * Signals a new primitive and determines the appropriate vertex format and
+ * size. Points vb_t::floatcolorptr and vb_t::texcoordptr to the current vertex
+ * and sets them to the current color and texture attributes.
+ *
+ * Clears the vb_t::recheck flag on exit.
+ */
+static void radeonVtxfmtValidate( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   GLuint ind = (RADEON_CP_VC_FRMT_Z |
+		 RADEON_CP_VC_FRMT_FPCOLOR | 
+		 RADEON_CP_VC_FRMT_FPALPHA);
+
+   if (ctx->Driver.NeedFlush)
+      ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush );
+
+   if (ctx->Texture.Unit[0]._ReallyEnabled) 
+      ind |= RADEON_CP_VC_FRMT_ST0;
+
+   RADEON_NEWPRIM(rmesa);
+   vb.vertex_format = ind;
+   vb.vertex_size = 3;
+
+   /* Would prefer to use ubyte floats in the vertex:
+    */
+   vb.floatcolorptr = &vb.vertex[vb.vertex_size].f;
+   vb.vertex_size += 4;
+   vb.floatcolorptr[0] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0];
+   vb.floatcolorptr[1] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1];
+   vb.floatcolorptr[2] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2];
+   vb.floatcolorptr[3] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3];
+   
+   if (ind & RADEON_CP_VC_FRMT_ST0) {
+      vb.texcoordptr = &vb.vertex[vb.vertex_size].f;
+      vb.vertex_size += 2;
+      vb.texcoordptr[0] = ctx->Current.Attrib[VERT_ATTRIB_TEX0][0];
+      vb.texcoordptr[1] = ctx->Current.Attrib[VERT_ATTRIB_TEX0][1];   
+   } 
+   else
+      vb.texcoordptr = ctx->Current.Attrib[VERT_ATTRIB_TEX0];
+
+   vb.recheck = GL_FALSE;
+   ctx->Driver.NeedFlush = FLUSH_UPDATE_CURRENT;
+}
+
+
+#define RESET_STIPPLE() do {			\
+   RADEON_STATECHANGE( rmesa, lin );		\
+   radeonEmitState( rmesa );			\
+} while (0)
+
+#define AUTO_STIPPLE( mode )  do {		\
+   RADEON_STATECHANGE( rmesa, lin );		\
+   if (mode)					\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |=	\
+	 RADEON_LINE_PATTERN_AUTO_RESET;	\
+   else						\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
+	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
+   radeonEmitState( rmesa );			\
+} while (0)
+
+
+/**
+ * \brief Process glBegin().
+ *
+ * \param mode primitive.
+ */
+static void radeon_Begin( GLenum mode )
+{
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint se_cntl;
+   
+   if (mode > GL_POLYGON) {
+      _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" );
+      return;
+   }
+
+   if (ctx->Driver.CurrentExecPrimitive != GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
+      return;
+   }
+   
+   if (ctx->NewState) 
+      _mesa_update_state( ctx );
+
+   if (rmesa->NewGLState)
+      radeonValidateState( ctx );
+
+   if (vb.recheck) 
+      radeonVtxfmtValidate( ctx );
+
+   /* Do we need to grab a new DMA region for the vertices?
+    */
+   if (rmesa->dma.current.ptr + 12*vb.vertex_size*4 > rmesa->dma.current.end) {
+      RADEON_NEWPRIM( rmesa );
+      radeonRefillCurrentDmaRegion( rmesa );
+   }
+
+   reset_notify();
+   vb.prim = ctx->Driver.CurrentExecPrimitive = mode;
+   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL] | RADEON_FLAT_SHADE_VTX_LAST;
+
+   if (ctx->Line.StippleFlag && 
+       (mode == GL_LINES || 
+	mode == GL_LINE_LOOP ||
+	mode == GL_LINE_STRIP))
+      RESET_STIPPLE();
+
+   switch( mode ) {
+   case GL_LINES:
+      if (ctx->Line.StippleFlag) 
+	 AUTO_STIPPLE( GL_TRUE );
+      break;
+   case GL_LINE_LOOP:
+      vb.prim = GL_LINE_STRIP;
+      push_notify( notify_lineloop0, 1, vb.vertex_store );
+      break;
+   case GL_QUADS:
+      vb.prim = GL_TRIANGLES;
+      push_notify( notify_quad, 4, vb.vertex_store );
+      break;
+   case GL_QUAD_STRIP:
+      if (ctx->_TriangleCaps & DD_FLATSHADE) {
+	 vb.prim = GL_TRIANGLES;
+	 push_notify( notify_qstrip0, 4, vb.vertex_store );
+      }
+      break;
+   case GL_POLYGON:
+      if (ctx->_TriangleCaps & DD_FLATSHADE)
+	 se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
+      break;
+   default:
+      break;
+   }
+
+   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
+   }
+}
+
+
+/**
+ * \brief Process glEnd().
+ *
+ */
+static void radeon_End( void )
+{
+   GLcontext *ctx = vb.context;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (ctx->Driver.CurrentExecPrimitive == GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" );
+      return;
+   }
+
+   /* Need to finish a line loop?
+    */
+   if (ctx->Driver.CurrentExecPrimitive == GL_LINE_LOOP) 
+      emit_vertex( 0 );
+
+   /* Need to pop off quads/quadstrip/etc notification?
+    */
+   if (vb.stack[0].notify != notify_wrap_buffer)
+      pop_notify();
+
+   finish_prim( rmesa );
+
+   if (ctx->Driver.CurrentExecPrimitive == GL_LINES && ctx->Line.StippleFlag) 
+      AUTO_STIPPLE( GL_FALSE );
+	  
+   ctx->Driver.CurrentExecPrimitive = GL_POLYGON+1;
+   notify_noop();
+}
+
+
+
+/**
+ * \brief Flush vertices.
+ *
+ * \param ctx GL context.
+ * \param flags flags.
+ *
+ * If FLUSH_UPDATE_CURRENT is et in \p flags then the current vertex attributes
+ * in the GL context is updated from vb_t::floatcolorptr and vb_t::texcoordptr.
+ */
+static void radeonFlushVertices( GLcontext *ctx, GLuint flags )
+{
+   if (flags & FLUSH_UPDATE_CURRENT) {
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0] = vb.floatcolorptr[0];
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1] = vb.floatcolorptr[1];
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2] = vb.floatcolorptr[2];
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = vb.floatcolorptr[3];
+
+      if (vb.vertex_format & RADEON_CP_VC_FRMT_ST0) {
+	 ctx->Current.Attrib[VERT_ATTRIB_TEX0][0] = vb.texcoordptr[0];
+	 ctx->Current.Attrib[VERT_ATTRIB_TEX0][1] = vb.texcoordptr[1];
+	 ctx->Current.Attrib[VERT_ATTRIB_TEX0][2] = 0.0F;
+	 ctx->Current.Attrib[VERT_ATTRIB_TEX0][3] = 1.0F;
+      }
+   }
+
+   ctx->Driver.NeedFlush &= ~FLUSH_STORED_VERTICES;
+}
+
+
+/**
+ * \brief Set current vertex coordinates.
+ *
+ * \param x x vertex coordinate.
+ * \param y y vertex coordinate.
+ * \param z z vertex coordinate.
+ * 
+ * Set the current vertex coordinates. If run out of space in this buffer call
+ * the notification callback.
+ */
+static __inline__ void radeon_Vertex3f( GLfloat x, GLfloat y, GLfloat z )
+{
+   int i;
+
+   *vb.stack[0].dmaptr++ = *(int *)&x;
+   *vb.stack[0].dmaptr++ = *(int *)&y;
+   *vb.stack[0].dmaptr++ = *(int *)&z;
+
+   for (i = 3; i < vb.vertex_size; i++) 
+      *vb.stack[0].dmaptr++ = vb.vertex[i].i;
+
+   if (--vb.stack[0].vertspace == 0)
+      vb.stack[0].notify();
+}
+
+/**
+ * \brief Set current vertex color.
+ *
+ * \param r red color component.
+ * \param g gree color component.
+ * \param b blue color component.
+ * \param a alpha color component.
+ *
+ * Sets the current vertex color via vb_t::floatcolorptr.
+ */
+static __inline__  void radeon_Color4f( GLfloat r, GLfloat g,
+					GLfloat b, GLfloat a )
+{
+   GLfloat *dest = vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = a;
+}
+
+/**
+ * \brief Set current vertex texture coordinates.
+ *
+ * \param s texture coordinate.
+ * \param t texture coordinate.
+ *
+ * Sets the current vertex color via vb_t::texcoordptr.
+ */
+static __inline__ void radeon_TexCoord2f( GLfloat s, GLfloat t )
+{
+   GLfloat *dest = vb.texcoordptr;
+   dest[0] = s;
+   dest[1] = t;
+}
+
+/**
+ * Calls radeon_Vertex3f(), which is expanded inline by the compiler to be
+ * efficient.
+ */
+static void radeon_Vertex3fv( const GLfloat *v )
+{
+   radeon_Vertex3f( v[0], v[1], v[2] );
+}
+
+/**
+ * Calls radeon_Vertex3f(), which is expanded inline by the compiler to be
+ * efficient.
+ */
+static void radeon_Vertex2f( GLfloat x, GLfloat y )
+{
+   radeon_Vertex3f( x, y, 0 );
+}
+
+/**
+ * Calls radeon_Vertex3f(), which is expanded inline by the compiler to be
+ * efficient.
+ */
+static void radeon_Vertex2fv( const GLfloat *v )
+{
+   radeon_Vertex3f( v[0], v[1], 0 );
+}
+
+/**
+ * Calls radeon_Vertex3f(), which is expanded inline by the compiler to be
+ * efficient.
+ */
+static void radeon_Color4fv( const GLfloat *v )
+{
+   radeon_Color4f( v[0], v[1], v[2], v[3] );
+}
+
+/**
+ * Calls radeon_Color4f(), which is expanded inline by the compiler to be
+ * efficient.
+ */
+static void radeon_Color3f( GLfloat r, GLfloat g, GLfloat b )
+{
+   radeon_Color4f( r, g, b, 1.0 );
+}
+
+/**
+ * Calls radeon_Color4f(), which is expanded inline by the compiler to be
+ * efficient.
+ */
+static void radeon_Color3fv( const GLfloat *v )
+{
+   radeon_Color4f( v[0], v[1], v[2], 1.0 );
+}
+
+/**
+ * Calls radeon_TexCoord2f(), which is expanded inline by the compiler to be
+ * efficient.
+ */
+static void radeon_TexCoord2fv( const GLfloat *v )
+{
+   radeon_TexCoord2f( v[0], v[1] );
+}
+
+
+/**
+ * No-op.
+ */
+void radeonVtxfmtUnbindContext( GLcontext *ctx )
+{
+}
+
+/**
+ * No-op.
+ */
+void radeonVtxfmtMakeCurrent( GLcontext *ctx )
+{
+}
+
+/**
+ * No-op.
+ */
+void radeonVtxfmtDestroy( GLcontext *ctx )
+{
+}
+
+/**
+ * \brief Software rendering fallback.
+ *
+ * \param ctx GL context.
+ * \param bit fallback bitmask.
+ * \param mode enable or disable.
+ * 
+ * Does nothing except display a warning message if \p mode is set.
+ */
+void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   if (mode)
+      fprintf(stderr, "Warning: hit nonexistant fallback path!\n");
+}
+
+/**
+ * \brief Software TCL fallback.
+ *
+ * \param ctx GL context.
+ * \param bit fallback bitmask.
+ * \param mode enable or disable.
+ * 
+ * Does nothing except display a warning message if \p mode is set.
+ */
+void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   if (mode)
+      fprintf(stderr, "Warning: hit nonexistant fallback path!\n");
+}
+
+/**
+ * \brief Called by radeonPointsBitmap() to disable TCL.
+ *
+ * \param rmesa Radeon context.
+ * \param flag whether to enable or disable TCL.
+ * 
+ * Updates radeon_tcl_info::tcl_flag.
+ */
+void radeonSubsetVtxEnableTCL( radeonContextPtr rmesa, GLboolean flag )
+{
+   rmesa->tcl.tcl_flag = flag ? RADEON_CP_VC_CNTL_TCL_ENABLE : 0;
+}
+
+
+
+/**********************************************************************/
+/** \name        Noop mode for operation without focus                */
+/**********************************************************************/
+/*@{*/
+
+
+/**
+ * \brief Process glBegin().
+ *
+ * \param mode primitive.
+ */ 
+static void radeon_noop_Begin(GLenum mode)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (mode > GL_POLYGON) {
+      _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" );
+      return;
+   }
+
+   if (ctx->Driver.CurrentExecPrimitive != GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
+      return;
+   }
+
+   ctx->Driver.CurrentExecPrimitive = mode;
+}
+
+/**
+ * \brief Process glEnd().
+ */
+static void radeon_noop_End(void)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   ctx->Driver.CurrentExecPrimitive = GL_POLYGON+1;
+}
+
+
+/**
+ * \brief Install the noop callbacks.
+ *
+ * \param ctx GL context.
+ *
+ * Installs the noop callbacks into the glapi table.  These functions
+ * will not attempt to emit any DMA vertices, but will keep internal
+ * GL state updated.  Borrows heavily from the select code.
+ */
+static void radeon_noop_Install( GLcontext *ctx )
+{
+   ctx->Exec->Begin = radeon_noop_Begin;
+   ctx->Exec->End = radeon_noop_End;
+
+   vb.texcoordptr = ctx->Current.Attrib[VERT_ATTRIB_TEX0];
+   vb.floatcolorptr = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
+
+   notify_noop();
+}
+
+
+/**
+ * \brief Setup the GL context callbacks.
+ * 
+ * \param ctx GL context.
+ * 
+ * Setups the GL context callbacks and links _glapi_table entries related to
+ * the glBegin()/glEnd() pairs to the functions in this module.
+ * 
+ * Called by radeonCreateContext() and radeonRenderMode().
+ */
+void radeonVtxfmtInit( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct _glapi_table *exec = ctx->Exec;
+
+   exec->Color3f = radeon_Color3f;
+   exec->Color3fv = radeon_Color3fv;
+   exec->Color4f = radeon_Color4f;
+   exec->Color4fv = radeon_Color4fv;
+   exec->TexCoord2f = radeon_TexCoord2f;
+   exec->TexCoord2fv = radeon_TexCoord2fv;
+   exec->Vertex2f = radeon_Vertex2f;
+   exec->Vertex2fv = radeon_Vertex2fv;
+   exec->Vertex3f = radeon_Vertex3f;
+   exec->Vertex3fv = radeon_Vertex3fv;
+   exec->Begin = radeon_Begin;
+   exec->End = radeon_End;
+
+   vb.context = ctx;
+   
+   ctx->Driver.FlushVertices = radeonFlushVertices;
+   ctx->Driver.CurrentExecPrimitive = GL_POLYGON+1;
+
+   if (rmesa->radeonScreen->buffers) {
+      radeonVtxfmtValidate( ctx );
+      notify_noop();
+   }
+   else 
+      radeon_noop_Install( ctx );
+}
+
+
+/*@}*/
+
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
new file mode 100644
index 0000000000..926b1523d6
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -0,0 +1,1332 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "enums.h"
+#include "imports.h"
+#include "macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_imm_exec.h"
+#include "tnl/t_pipeline.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_swtcl.h"
+#include "radeon_tcl.h"
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+
+#define RADEON_XYZW_BIT		0x01
+#define RADEON_RGBA_BIT		0x02
+#define RADEON_SPEC_BIT		0x04
+#define RADEON_TEX0_BIT		0x08
+#define RADEON_TEX1_BIT		0x10
+#define RADEON_PTEX_BIT		0x20
+#define RADEON_MAX_SETUP	0x40
+
+static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
+static void flush_last_swtcl_prim_compat( radeonContextPtr rmesa );
+
+static struct {
+   void                (*emit)( GLcontext *, GLuint, GLuint, void *, GLuint );
+   interp_func		interp;
+   copy_pv_func	        copy_pv;
+   GLboolean           (*check_tex_sizes)( GLcontext *ctx );
+   GLuint               vertex_size;
+   GLuint               vertex_stride_shift;
+   GLuint               vertex_format;
+} setup_tab[RADEON_MAX_SETUP];
+
+
+#define TINY_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR)
+
+#define NOTEX_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC)
+
+#define TEX0_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC |	\
+					 RADEON_CP_VC_FRMT_ST0)
+
+#define TEX1_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC |	\
+					 RADEON_CP_VC_FRMT_ST0 |	\
+					 RADEON_CP_VC_FRMT_ST1)
+
+#define PROJ_TEX1_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC |	\
+					 RADEON_CP_VC_FRMT_ST0 |	\
+					 RADEON_CP_VC_FRMT_Q0 |         \
+					 RADEON_CP_VC_FRMT_ST1 |	\
+					 RADEON_CP_VC_FRMT_Q1)
+
+#define TEX2_VERTEX_FORMAT 0
+#define TEX3_VERTEX_FORMAT 0
+#define PROJ_TEX3_VERTEX_FORMAT 0
+
+#define DO_XYZW (IND & RADEON_XYZW_BIT)
+#define DO_RGBA (IND & RADEON_RGBA_BIT)
+#define DO_SPEC (IND & RADEON_SPEC_BIT)
+#define DO_FOG  (IND & RADEON_SPEC_BIT)
+#define DO_TEX0 (IND & RADEON_TEX0_BIT)
+#define DO_TEX1 (IND & RADEON_TEX1_BIT)
+#define DO_TEX2 0
+#define DO_TEX3 0
+#define DO_PTEX (IND & RADEON_PTEX_BIT)
+
+#define VERTEX radeonVertex
+#define VERTEX_COLOR radeon_color_t
+#define GET_VIEWPORT_MAT() 0
+#define GET_TEXSOURCE(n)  n
+#define GET_VERTEX_FORMAT() RADEON_CONTEXT(ctx)->swtcl.vertex_format
+#define GET_VERTEX_STORE() RADEON_CONTEXT(ctx)->swtcl.verts
+#define GET_VERTEX_STRIDE_SHIFT() RADEON_CONTEXT(ctx)->swtcl.vertex_stride_shift
+#define GET_UBYTE_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteColor
+#define GET_UBYTE_SPEC_COLOR_STORE() &RADEON_CONTEXT(ctx)->UbyteSecondaryColor
+
+#define HAVE_HW_VIEWPORT    1
+/* Tiny vertices don't seem to work atm - haven't looked into why.
+ */
+#define HAVE_HW_DIVIDE      (IND & ~(RADEON_XYZW_BIT|RADEON_RGBA_BIT))
+#define HAVE_TINY_VERTICES  1
+#define HAVE_RGBA_COLOR     1
+#define HAVE_NOTEX_VERTICES 1
+#define HAVE_TEX0_VERTICES  1
+#define HAVE_TEX1_VERTICES  1
+#define HAVE_TEX2_VERTICES  0
+#define HAVE_TEX3_VERTICES  0
+#define HAVE_PTEX_VERTICES  1
+
+#define CHECK_HW_DIVIDE    (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE| \
+                                                    DD_TRI_UNFILLED)))
+
+#define IMPORT_QUALIFIER
+#define IMPORT_FLOAT_COLORS radeon_import_float_colors
+#define IMPORT_FLOAT_SPEC_COLORS radeon_import_float_spec_colors
+
+#define INTERP_VERTEX setup_tab[RADEON_CONTEXT(ctx)->swtcl.SetupIndex].interp
+#define COPY_PV_VERTEX setup_tab[RADEON_CONTEXT(ctx)->swtcl.SetupIndex].copy_pv
+
+
+/***********************************************************************
+ *         Generate  pv-copying and translation functions              *
+ ***********************************************************************/
+
+#define TAG(x) radeon_##x
+#define IND ~0
+#include "tnl_dd/t_dd_vb.c"
+#undef IND
+
+
+/***********************************************************************
+ *             Generate vertex emit and interp functions               *
+ ***********************************************************************/
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_TEX1_BIT|\
+             RADEON_PTEX_BIT)
+#define TAG(x) x##_wgpt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT|RADEON_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT|RADEON_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT|RADEON_TEX1_BIT|RADEON_PTEX_BIT)
+#define TAG(x) x##_wgfspt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+
+/***********************************************************************
+ *                         Initialization 
+ ***********************************************************************/
+
+static void init_setup_tab( void )
+{
+   init_wg();
+   init_wgt0();
+   init_wgpt0();
+   init_wgt0t1();
+   init_wgpt0t1();
+   init_wgfs();
+   init_wgfst0();
+   init_wgfspt0();
+   init_wgfst0t1();
+   init_wgfspt0t1();
+}
+
+
+
+void radeonPrintSetupFlags(char *msg, GLuint flags )
+{
+   fprintf(stderr, "%s(%x): %s%s%s%s%s%s\n",
+	   msg,
+	   (int)flags,
+	   (flags & RADEON_XYZW_BIT)      ? " xyzw," : "",
+	   (flags & RADEON_RGBA_BIT)     ? " rgba," : "",
+	   (flags & RADEON_SPEC_BIT)     ? " spec/fog," : "",
+	   (flags & RADEON_TEX0_BIT)     ? " tex-0," : "",
+	   (flags & RADEON_TEX1_BIT)     ? " tex-1," : "",
+	   (flags & RADEON_PTEX_BIT)     ? " proj-tex," : "");
+}
+
+
+static void radeonRenderStart( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (!setup_tab[rmesa->swtcl.SetupIndex].check_tex_sizes(ctx)) {
+      GLuint ind = rmesa->swtcl.SetupIndex |= (RADEON_PTEX_BIT|RADEON_RGBA_BIT);
+
+      /* Projective textures are handled nicely; just have to change
+       * up to the new vertex format.
+       */
+      if (setup_tab[ind].vertex_format != rmesa->swtcl.vertex_format) {
+	 RADEON_NEWPRIM(rmesa);
+	 rmesa->swtcl.vertex_format = setup_tab[ind].vertex_format;
+	 rmesa->swtcl.vertex_size = setup_tab[ind].vertex_size;
+	 rmesa->swtcl.vertex_stride_shift = setup_tab[ind].vertex_stride_shift;
+      }
+
+      if (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	 tnl->Driver.Render.Interp = setup_tab[rmesa->swtcl.SetupIndex].interp;
+	 tnl->Driver.Render.CopyPV = setup_tab[rmesa->swtcl.SetupIndex].copy_pv;
+      }
+   }
+   
+   if (rmesa->dma.flush != 0 && 
+       rmesa->dma.flush != flush_last_swtcl_prim_compat &&
+       rmesa->dma.flush != flush_last_swtcl_prim)
+      rmesa->dma.flush( rmesa );
+}
+
+
+void radeonBuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+			   GLuint newinputs )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   GLubyte *v = ((GLubyte *)rmesa->swtcl.verts + 
+		 (start << rmesa->swtcl.vertex_stride_shift));
+   GLuint stride = 1 << rmesa->swtcl.vertex_stride_shift;
+
+   newinputs |= rmesa->swtcl.SetupNewInputs;
+   rmesa->swtcl.SetupNewInputs = 0;
+
+   if (!newinputs)
+      return;
+
+   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, v, stride );
+}
+
+void radeonChooseVertexState( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint ind = (RADEON_XYZW_BIT | RADEON_RGBA_BIT);
+
+   if (!rmesa->TclFallback || rmesa->Fallback)
+      return;
+
+   if (ctx->Fog.Enabled || (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR))
+      ind |= RADEON_SPEC_BIT;
+
+   if (ctx->Texture._EnabledUnits & 0x2)
+      /* unit 1 enabled */
+      ind |= RADEON_TEX0_BIT|RADEON_TEX1_BIT;
+   else if (ctx->Texture._EnabledUnits & 0x1)
+      /* unit 0 enabled */
+      ind |= RADEON_TEX0_BIT;
+
+   rmesa->swtcl.SetupIndex = ind;
+
+   if (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED)) {
+      tnl->Driver.Render.Interp = radeon_interp_extras;
+      tnl->Driver.Render.CopyPV = radeon_copy_pv_extras;
+   }
+   else {
+      tnl->Driver.Render.Interp = setup_tab[ind].interp;
+      tnl->Driver.Render.CopyPV = setup_tab[ind].copy_pv;
+   }
+
+   if (setup_tab[ind].vertex_format != rmesa->swtcl.vertex_format) {
+      RADEON_NEWPRIM(rmesa);
+      rmesa->swtcl.vertex_format = setup_tab[ind].vertex_format;
+      rmesa->swtcl.vertex_size = setup_tab[ind].vertex_size;
+      rmesa->swtcl.vertex_stride_shift = setup_tab[ind].vertex_stride_shift;
+   }
+
+   {
+      GLuint se_coord_fmt, needproj;
+
+      /* HW perspective divide is a win, but tiny vertex formats are a
+       * bigger one.
+       */
+      if (setup_tab[ind].vertex_format == TINY_VERTEX_FORMAT ||
+	  (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	 needproj = GL_TRUE;
+	 se_coord_fmt = (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+			 RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
+			 RADEON_TEX1_W_ROUTING_USE_Q1);
+      }
+      else {
+	 needproj = GL_FALSE;
+	 se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+			 RADEON_TEX1_W_ROUTING_USE_Q1);
+      }
+
+      if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
+	 RADEON_STATECHANGE( rmesa, set );
+	 rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+      }
+      _tnl_need_projected_coords( ctx, needproj );
+   }
+}
+
+
+/* Flush vertices in the current dma region.
+ */
+static void flush_last_swtcl_prim( radeonContextPtr rmesa  )
+{
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   rmesa->dma.flush = 0;
+
+   if (rmesa->dma.current.buf) {
+      struct radeon_dma_region *current = &rmesa->dma.current;
+      GLuint current_offset = (rmesa->radeonScreen->agp_buffer_offset +
+			       current->buf->buf->idx * RADEON_BUFFER_SIZE + 
+			       current->start);
+
+      assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+
+      assert (current->start + 
+	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+	      current->ptr);
+
+      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+	 radeonEmitVertexAOS( rmesa,
+			      rmesa->swtcl.vertex_size,
+			      current_offset);
+
+	 radeonEmitVbufPrim( rmesa,
+			     rmesa->swtcl.vertex_format,
+			     rmesa->swtcl.hw_primitive,
+			     rmesa->swtcl.numverts);
+      }
+
+      rmesa->swtcl.numverts = 0;
+      current->start = current->ptr;
+   }
+}
+
+
+static void flush_last_swtcl_prim_compat( radeonContextPtr rmesa )
+{
+   struct radeon_dma_region *current = &rmesa->dma.current;
+
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s buf %p start %d ptr %d\n", 
+	      __FUNCTION__,
+	      current->buf,
+	      current->start,
+	      current->ptr);
+
+   assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   assert (current->start + 
+	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+	   current->ptr);
+   assert (current->start == 0);
+
+   rmesa->dma.flush = 0;
+
+   if (current->ptr && current->buf) {
+      assert (current->buf->refcount == 1);
+
+      radeonCompatEmitPrimitive( rmesa,
+				 rmesa->swtcl.vertex_format,
+				 rmesa->swtcl.hw_primitive,
+				 rmesa->swtcl.numverts);
+      
+      /* The buffer has been released:
+       */
+      FREE(current->buf);
+      current->buf = 0;
+      current->start = 0;
+      current->ptr = current->end;
+
+   }
+
+   rmesa->swtcl.numverts = 0;
+}
+
+
+/* Alloc space in the current dma region.
+ */
+static __inline void *radeonAllocDmaLowVerts( radeonContextPtr rmesa,
+					      int nverts, int vsize )
+{
+   GLuint bytes = vsize * nverts;
+
+   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+      radeonRefillCurrentDmaRegion( rmesa );
+
+   if (!rmesa->dma.flush) {
+      rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+      if (rmesa->dri.drmMinor == 1)
+	 rmesa->dma.flush = flush_last_swtcl_prim_compat;
+      else
+	 rmesa->dma.flush = flush_last_swtcl_prim;
+   }
+
+   assert( vsize == rmesa->swtcl.vertex_size * 4 );
+   assert( rmesa->dma.flush == flush_last_swtcl_prim ||
+	   rmesa->dma.flush == flush_last_swtcl_prim_compat);
+   assert (rmesa->dma.current.start + 
+	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+	   rmesa->dma.current.ptr);
+
+
+   {
+      char *head = rmesa->dma.current.address + rmesa->dma.current.ptr;
+      rmesa->dma.current.ptr += bytes;
+      rmesa->swtcl.numverts += nverts;
+      return head;
+   }
+
+}
+
+
+
+
+void radeon_emit_contiguous_verts( GLcontext *ctx, GLuint start, GLuint count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint vertex_size = rmesa->swtcl.vertex_size * 4;
+   CARD32 *dest = radeonAllocDmaLowVerts( rmesa, count-start, vertex_size );
+   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, dest, 
+					    vertex_size );
+}
+
+
+
+void radeon_emit_indexed_verts( GLcontext *ctx, GLuint start, GLuint count )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   radeonAllocDmaRegionVerts( rmesa, 
+			      &rmesa->swtcl.indexed_verts, 
+			      count - start,
+			      rmesa->swtcl.vertex_size * 4, 
+			      64);
+
+   setup_tab[rmesa->swtcl.SetupIndex].emit( 
+      ctx, start, count, 
+      rmesa->swtcl.indexed_verts.address + rmesa->swtcl.indexed_verts.start, 
+      rmesa->swtcl.vertex_size * 4 );
+}
+
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    0
+#define HAVE_ELTS        1
+
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   0,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN,
+   0,
+   0,
+   0
+};
+
+static __inline void radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
+{
+   RADEON_NEWPRIM( rmesa );
+   rmesa->swtcl.hw_primitive = hw_prim[prim];
+   assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
+}
+
+static __inline void radeonEltPrimitive( radeonContextPtr rmesa, GLenum prim )
+{
+   RADEON_NEWPRIM( rmesa );
+   rmesa->swtcl.hw_primitive = hw_prim[prim] | RADEON_CP_VC_CNTL_PRIM_WALK_IND;
+}
+
+
+static void VERT_FALLBACK( GLcontext *ctx,
+			   GLuint start,
+			   GLuint count,
+			   GLuint flags )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
+   tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
+   tnl->Driver.Render.PrimTabVerts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
+   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_BIT_CLIP;
+}
+
+static void ELT_FALLBACK( GLcontext *ctx,
+			  GLuint start,
+			  GLuint count,
+			  GLuint flags )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
+   tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
+   tnl->Driver.Render.PrimTabElts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
+   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_BIT_CLIP;
+}
+
+
+#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define ELTS_VARS  GLushort *dest
+#define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
+#define ELT_INIT(prim) radeonEltPrimitive( rmesa, prim )
+#define NEW_PRIMITIVE()  RADEON_NEWPRIM( rmesa )
+#define NEW_BUFFER()  radeonRefillCurrentDmaRegion( rmesa )
+#define GET_CURRENT_VB_MAX_VERTS() \
+  (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
+#define GET_SUBSEQUENT_VB_MAX_VERTS() \
+  ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
+
+#if RADEON_OLD_PACKETS
+# define GET_CURRENT_VB_MAX_ELTS() \
+  ((RADEON_CMD_BUF_SZ - (rmesa->store.cmd_used + 24)) / 2)
+#else
+# define GET_CURRENT_VB_MAX_ELTS() \
+  ((RADEON_CMD_BUF_SZ - (rmesa->store.cmd_used + 16)) / 2)
+#endif
+#define GET_SUBSEQUENT_VB_MAX_ELTS() \
+  ((RADEON_CMD_BUF_SZ - 1024) / 2)
+
+
+
+/* How do you extend an existing primitive?
+ */
+#define ALLOC_ELTS(nr)							\
+do {									\
+   if (rmesa->dma.flush == radeonFlushElts &&				\
+       rmesa->store.cmd_used + nr*2 < RADEON_CMD_BUF_SZ) {		\
+									\
+      dest = (GLushort *)(rmesa->store.cmd_buf +			\
+			  rmesa->store.cmd_used);			\
+      rmesa->store.cmd_used += nr*2;					\
+   }									\
+   else {								\
+      if (rmesa->dma.flush) {						\
+	 rmesa->dma.flush( rmesa );					\
+      }									\
+									\
+      radeonEmitVertexAOS( rmesa,					\
+			   rmesa->swtcl.vertex_size,			\
+			   (rmesa->radeonScreen->agp_buffer_offset +		\
+			    rmesa->swtcl.indexed_verts.buf->buf->idx * 	\
+			    RADEON_BUFFER_SIZE +			\
+			    rmesa->swtcl.indexed_verts.start));		\
+									\
+      dest = radeonAllocEltsOpenEnded( rmesa,				\
+				       rmesa->swtcl.vertex_format,	\
+				       rmesa->swtcl.hw_primitive,	\
+				       nr );				\
+   }									\
+} while (0)
+
+#define ALLOC_ELTS_NEW_PRIMITIVE(nr) ALLOC_ELTS( nr )
+
+#ifdef MESA_BIG_ENDIAN
+/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
+#define EMIT_ELT(offset, x) do {				\
+	int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );	\
+	GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );	\
+	(des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); } while (0)
+#else
+#define EMIT_ELT(offset, x) (dest)[offset] = (GLushort) (x)
+#endif
+#define EMIT_TWO_ELTS(offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
+#define INCR_ELTS( nr ) dest += nr
+#define RELEASE_ELT_VERTS() \
+  radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ )
+#define EMIT_VERTS( ctx, j, nr ) \
+  radeon_emit_contiguous_verts(ctx, j, (j)+(nr))
+#define EMIT_INDEXED_VERTS( ctx, start, count ) \
+  radeon_emit_indexed_verts( ctx, start, count )
+
+
+#define TAG(x) radeon_dma_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+static GLboolean radeon_run_render( GLcontext *ctx,
+				    struct gl_pipeline_stage *stage )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i, length, flags = 0;
+   render_func *tab = TAG(render_tab_verts);
+
+   if (rmesa->swtcl.indexed_verts.buf && (!VB->Elts || stage->changed_inputs)) 
+      RELEASE_ELT_VERTS();
+   	
+   if (VB->ClipOrMask ||	     /* No clipping */
+       rmesa->swtcl.RenderIndex != 0 ||    /* No per-vertex manipulations */
+       ctx->Line.StippleFlag)        /* GH: THIS IS A HACK!!! */
+      return GL_TRUE;		
+
+   if (rmesa->dri.drmMinor < 3) {
+      /* drm 1.1 doesn't support vertex primitives starting in the
+       * middle of a buffer.  It doesn't support sane indexed vertices
+       * either.  drm 1.2 fixes both of these problems, but we don't have a
+       * compatibility layer to that version yet.  
+       */
+      return GL_TRUE;
+   }
+
+   tnl->Driver.Render.Start( ctx );
+
+   if (VB->Elts) {
+      tab = TAG(render_tab_elts);
+      if (!rmesa->swtcl.indexed_verts.buf)
+	 if (!TAG(emit_elt_verts)(ctx, 0, VB->Count))
+	    return GL_TRUE;	/* too many vertices */
+   }
+
+   for (i = 0 ; !(flags & PRIM_LAST) ; i += length)
+   {
+      flags = VB->Primitive[i];
+      length = VB->PrimitiveLength[i];
+
+      if (RADEON_DEBUG & DEBUG_PRIMS)
+	 fprintf(stderr, "radeon_render.c: prim %s %d..%d\n", 
+		 _mesa_lookup_enum_by_nr(flags & PRIM_MODE_MASK), 
+		 i, i+length);
+
+      if (length)
+	 tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+   }
+
+   tnl->Driver.Render.Finish( ctx );
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+
+
+static void radeon_check_render( GLcontext *ctx,
+				 struct gl_pipeline_stage *stage )
+{
+   GLuint inputs = VERT_BIT_POS | VERT_BIT_CLIP | VERT_BIT_COLOR0;
+
+   if (ctx->RenderMode == GL_RENDER) {
+      if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+	 inputs |= VERT_BIT_COLOR1;
+
+      if (ctx->Texture.Unit[0]._ReallyEnabled)
+	 inputs |= VERT_BIT_TEX0;
+
+      if (ctx->Texture.Unit[1]._ReallyEnabled)
+	 inputs |= VERT_BIT_TEX1;
+
+      if (ctx->Fog.Enabled)
+	 inputs |= VERT_BIT_FOG;
+   }
+
+   stage->inputs = inputs;
+}
+
+
+static void dtr( struct gl_pipeline_stage *stage )
+{
+   (void)stage;
+}
+
+
+const struct gl_pipeline_stage _radeon_render_stage =
+{
+   "radeon render",
+   (_DD_NEW_SEPARATE_SPECULAR |
+    _NEW_TEXTURE|
+    _NEW_FOG|
+    _NEW_RENDERMODE),		/* re-check (new inputs) */
+   0,				/* re-run (always runs) */
+   GL_TRUE,			/* active */
+   0, 0,			/* inputs (set in check_render), outputs */
+   0, 0,			/* changed_inputs, private */
+   dtr,				/* destructor */
+   radeon_check_render,		/* check - initially set to alloc data */
+   radeon_run_render		/* run */
+};
+
+
+/**************************************************************************/
+
+/* Radeon texture rectangle expects coords in 0..1 range, not 0..dimension
+ * as in the extension spec.  Need to translate here.
+ *
+ * Note that swrast expects 0..dimension, so if a fallback is active,
+ * don't do anything.  (Maybe need to configure swrast to match hw)
+ */
+struct texrect_stage_data {
+   GLvector4f texcoord[MAX_TEXTURE_UNITS];
+};
+
+#define TEXRECT_STAGE_DATA(stage) ((struct texrect_stage_data *)stage->privatePtr)
+
+
+static GLboolean run_texrect_stage( GLcontext *ctx,
+				    struct gl_pipeline_stage *stage )
+{
+   struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i;
+
+   if (rmesa->Fallback)
+      return GL_TRUE;
+
+   for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) {
+      if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT))
+	 continue;
+   
+      if (stage->changed_inputs & VERT_BIT_TEX(i)) {
+	 struct gl_texture_object *texObj = ctx->Texture.Unit[i].CurrentRect;
+	 struct gl_texture_image *texImage = texObj->Image[texObj->BaseLevel];
+	 const GLfloat iw = 1.0/texImage->Width;
+	 const GLfloat ih = 1.0/texImage->Height;
+	 GLfloat *in = (GLfloat *)VB->TexCoordPtr[i]->data;
+	 GLint instride = VB->TexCoordPtr[i]->stride;
+	 GLfloat (*out)[4] = store->texcoord[i].data;
+	 GLint j;
+	 
+	 for (j = 0 ; j < VB->Count ; j++) {
+	    out[j][0] = in[0] * iw;
+	    out[j][1] = in[1] * ih;
+	    in = (GLfloat *)((GLubyte *)in + instride);
+	 }
+      }
+
+      VB->TexCoordPtr[i] = &store->texcoord[i];
+   }
+
+   return GL_TRUE;
+}
+
+
+/* Called the first time stage->run() is invoked.
+ */
+static GLboolean alloc_texrect_data( GLcontext *ctx,
+				     struct gl_pipeline_stage *stage )
+{
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   struct texrect_stage_data *store;
+   GLuint i;
+
+   stage->privatePtr = CALLOC(sizeof(*store));
+   store = TEXRECT_STAGE_DATA(stage);
+   if (!store)
+      return GL_FALSE;
+
+   for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
+      _mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 );
+
+   /* Now run the stage.
+    */
+   stage->run = run_texrect_stage;
+   return stage->run( ctx, stage );
+}
+
+
+static void check_texrect( GLcontext *ctx,
+			   struct gl_pipeline_stage *stage )
+{
+   GLuint flags = 0;
+
+   if (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT)
+      flags |= VERT_BIT_TEX0;
+
+   if (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT)
+      flags |= VERT_BIT_TEX1;
+
+   stage->inputs = flags;
+   stage->outputs = flags;
+   stage->active = (flags != 0);
+}
+
+
+static void free_texrect_data( struct gl_pipeline_stage *stage )
+{
+   struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
+   GLuint i;
+
+   if (store) {
+      for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
+	 if (store->texcoord[i].data)
+	    _mesa_vector4f_free( &store->texcoord[i] );
+      FREE( store );
+      stage->privatePtr = 0;
+   }
+}
+
+
+const struct gl_pipeline_stage _radeon_texrect_stage =
+{
+   "radeon texrect stage",			/* name */
+   _NEW_TEXTURE,	/* check_state */
+   _NEW_TEXTURE,	/* run_state */
+   GL_TRUE,				/* active? */
+   0,					/* inputs */
+   0,					/* outputs */
+   0,					/* changed_inputs */
+   NULL,				/* private data */
+   free_texrect_data,			/* destructor */
+   check_texrect,			/* check */
+   alloc_texrect_data,			/* run -- initially set to init */
+};
+
+
+/**************************************************************************/
+
+
+static const GLuint reduced_hw_prim[GL_POLYGON+1] = {
+   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+};
+
+static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim );
+static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim );
+static void radeonResetLineStipple( GLcontext *ctx );
+
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#undef LOCAL_VARS
+#define CTX_ARG radeonContextPtr rmesa
+#define CTX_ARG2 rmesa
+#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, size * 4 )
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
+   const GLuint shift = rmesa->swtcl.vertex_stride_shift;	\
+   const char *radeonverts = (char *)rmesa->swtcl.verts;
+#define VERT(x) (radeonVertex *)(radeonverts + (x << shift))
+#define VERTEX radeonVertex 
+#undef TAG
+#define TAG(x) radeon_##x
+#include "tnl_dd/t_dd_triemit.h"
+
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define QUAD( a, b, c, d ) radeon_quad( rmesa, a, b, c, d )
+#define TRI( a, b, c )     radeon_triangle( rmesa, a, b, c )
+#define LINE( a, b )       radeon_line( rmesa, a, b )
+#define POINT( a )         radeon_point( rmesa, a )
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define RADEON_TWOSIDE_BIT	0x01
+#define RADEON_UNFILLED_BIT	0x02
+#define RADEON_OFFSET_BIT	0x04 /* drmMinor == 1 */
+#define RADEON_MAX_TRIFUNC	0x08
+
+
+static struct {
+   points_func	        points;
+   line_func		line;
+   triangle_func	triangle;
+   quad_func		quad;
+} rast_tab[RADEON_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK  0
+#define DO_OFFSET   (IND & RADEON_OFFSET_BIT)
+#define DO_UNFILLED (IND & RADEON_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & RADEON_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_RGBA   1
+#define HAVE_SPEC   1
+#define HAVE_INDEX  0
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define TAB rast_tab
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (rmesa->swtcl.verts + (e<<rmesa->swtcl.vertex_stride_shift))
+
+#define VERT_SET_RGBA( v, c )    v->ui[coloroffset] = LE32_TO_CPU(*(GLuint *)c)
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    color[idx] = CPU_TO_LE32(v[idx]->ui[coloroffset])
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = LE32_TO_CPU(color[idx])
+
+#define VERT_SET_SPEC( v0, c )   if (havespec) {			\
+					v0->v.specular.red   = (c)[0];	\
+					v0->v.specular.green = (c)[1];	\
+					v0->v.specular.blue  = (c)[2]; }
+#define VERT_COPY_SPEC( v0, v1 ) if (havespec) {					\
+					v0->v.specular.red   = v1->v.specular.red;	\
+					v0->v.specular.green = v1->v.specular.green;	\
+					v0->v.specular.blue  = v1->v.specular.blue; }
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = CPU_TO_LE32(v[idx]->ui[5])
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = LE32_TO_CPU(spec[idx])
+
+#undef LOCAL_VARS
+#undef TAG
+#undef INIT
+
+#define LOCAL_VARS(n)							\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
+   GLuint color[n], spec[n];						\
+   GLuint coloroffset = (rmesa->swtcl.vertex_size == 4 ? 3 : 4);	\
+   GLboolean havespec = (rmesa->swtcl.vertex_size > 4);			\
+   (void) color; (void) spec; (void) coloroffset; (void) havespec;
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
+#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#undef TAG
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT|RADEON_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT|RADEON_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_UNFILLED_BIT|RADEON_OFFSET_BIT)
+#define TAG(x) x##_unfilled_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT|RADEON_UNFILLED_BIT|RADEON_OFFSET_BIT)
+#define TAG(x) x##_twoside_unfilled_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_twoside();
+   init_unfilled();
+   init_twoside_unfilled();
+   init_offset();
+   init_twoside_offset();
+   init_unfilled_offset();
+   init_twoside_unfilled_offset();
+}
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define VERT(x) (radeonVertex *)(radeonverts + (x << shift))
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      radeon_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   radeon_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   radeon_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   radeon_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#undef INIT
+#define INIT(x) do {					\
+   radeonRenderPrimitive( ctx, x );			\
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
+   const GLuint shift = rmesa->swtcl.vertex_stride_shift;		\
+   const char *radeonverts = (char *)rmesa->swtcl.verts;		\
+   const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+   const GLboolean stipple = ctx->Line.StippleFlag;		\
+   (void) elt; (void) stipple;
+#define RESET_STIPPLE	if ( stipple ) radeonResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) radeon_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) radeon_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+void radeonChooseRenderState( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint index = 0;
+   GLuint flags = ctx->_TriangleCaps;
+
+   if (!rmesa->TclFallback || rmesa->Fallback) 
+      return;
+
+   if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
+   if (flags & DD_TRI_UNFILLED)      index |= RADEON_UNFILLED_BIT;
+   if ((flags & DD_TRI_OFFSET) &&
+       rmesa->dri.drmMinor == 1)  index |= RADEON_OFFSET_BIT;
+
+   if (index != rmesa->swtcl.RenderIndex) {
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = radeon_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = radeon_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = radeon_fast_clipped_poly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+      }
+
+      rmesa->swtcl.RenderIndex = index;
+   }
+}
+
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+
+static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (rmesa->swtcl.hw_primitive != hwprim) {
+      RADEON_NEWPRIM( rmesa );
+      rmesa->swtcl.hw_primitive = hwprim;
+   }
+}
+
+static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   rmesa->swtcl.render_primitive = prim;
+   if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
+      radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
+}
+
+static void radeonRenderFinish( GLcontext *ctx )
+{
+}
+
+static void radeonResetLineStipple( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, lin );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "glBlendEquation",
+   "glBlendFunc",
+   "RADEON_NO_RAST",
+   "Mixing GL_CLAMP_TO_BORDER and GL_CLAMP (or GL_MIRROR_CLAMP_ATI)"
+};
+
+
+static const char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->Fallback;
+
+   if (mode) {
+      rmesa->Fallback |= bit;
+      if (oldfallback == 0) {
+	 RADEON_FIREVERTICES( rmesa );
+	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
+	 _swsetup_Wakeup( ctx );
+	 _tnl_need_projected_coords( ctx, GL_TRUE );
+	 rmesa->swtcl.RenderIndex = ~0;
+         if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+            fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+   else {
+      rmesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = radeonRenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
+	 tnl->Driver.Render.Finish = radeonRenderFinish;
+	 tnl->Driver.Render.BuildVertices = radeonBuildVertices;
+	 tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
+	 if (rmesa->TclFallback) {
+	    /* These are already done if rmesa->TclFallback goes to
+	     * zero above. But not if it doesn't (RADEON_NO_TCL for
+	     * example?)
+	     */
+	    radeonChooseVertexState( ctx );
+	    radeonChooseRenderState( ctx );
+	 }
+         if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+            fprintf(stderr, "Radeon end rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+}
+
+
+void radeonFlushVertices( GLcontext *ctx, GLuint flags )
+{
+   _tnl_flush_vertices( ctx, flags );
+
+   if (flags & FLUSH_STORED_VERTICES)
+      RADEON_NEWPRIM( RADEON_CONTEXT( ctx ) );
+}
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void radeonInitSwtcl( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint size = TNL_CONTEXT(ctx)->vb.Size;
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      init_setup_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.Render.Start = radeonRenderStart;
+   tnl->Driver.Render.Finish = radeonRenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+   tnl->Driver.Render.BuildVertices = radeonBuildVertices;
+
+   rmesa->swtcl.verts = ALIGN_MALLOC( size * 16 * 4, 32 );
+   rmesa->swtcl.RenderIndex = ~0;
+   rmesa->swtcl.render_primitive = GL_TRIANGLES;
+   rmesa->swtcl.hw_primitive = 0;
+}
+
+
+void radeonDestroySwtcl( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (rmesa->swtcl.indexed_verts.buf) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+			      __FUNCTION__ );
+
+   if (rmesa->swtcl.verts) {
+      ALIGN_FREE(rmesa->swtcl.verts);
+      rmesa->swtcl.verts = 0;
+   }
+
+   if (rmesa->UbyteSecondaryColor.Ptr) {
+      ALIGN_FREE(rmesa->UbyteSecondaryColor.Ptr);
+      rmesa->UbyteSecondaryColor.Ptr = 0;
+   }
+
+   if (rmesa->UbyteColor.Ptr) {
+      ALIGN_FREE(rmesa->UbyteColor.Ptr);
+      rmesa->UbyteColor.Ptr = 0;
+   }
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.h b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
new file mode 100644
index 0000000000..c45e711c0b
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
@@ -0,0 +1,77 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef __RADEON_TRIS_H__
+#define __RADEON_TRIS_H__
+
+#include "mtypes.h"
+#include "swrast/swrast.h"
+#include "radeon_context.h"
+
+extern void radeonInitSwtcl( GLcontext *ctx );
+extern void radeonDestroySwtcl( GLcontext *ctx );
+
+extern void radeonFlushVertices( GLcontext *ctx, GLuint flags );
+extern void radeonChooseRenderState( GLcontext *ctx );
+extern void radeonChooseVertexState( GLcontext *ctx );
+
+extern void radeonCheckTexSizes( GLcontext *ctx );
+
+extern void radeonBuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+				 GLuint newinputs );
+
+extern void radeonPrintSetupFlags(char *msg, GLuint flags );
+
+
+extern void radeon_emit_contiguous_verts( GLcontext *ctx,
+					  GLuint start,
+					  GLuint count );
+
+extern void radeon_emit_indexed_verts( GLcontext *ctx,
+				       GLuint start,
+				       GLuint count );
+
+extern void radeon_translate_vertex( GLcontext *ctx, 
+				     const radeonVertex *src, 
+				     SWvertex *dst );
+
+extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
+
+extern void radeon_import_float_colors( GLcontext *ctx );
+extern void radeon_import_float_spec_colors( GLcontext *ctx );
+
+
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
new file mode 100644
index 0000000000..651194a804
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -0,0 +1,527 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "light.h"
+#include "mtypes.h"
+#include "enums.h"
+
+#include "array_cache/acache.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+
+
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_LOOP   0
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        1
+
+
+#define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
+#define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
+#define HW_LINE_LOOP        0
+#define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
+#define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+#define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
+#define HW_TRIANGLE_STRIP_1 0
+#define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
+#define HW_QUADS            0
+#define HW_QUAD_STRIP       0
+#define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
+
+
+static GLboolean discrete_prim[0x10] = {
+   0,				/* 0 none */
+   1,				/* 1 points */
+   1,				/* 2 lines */
+   0,				/* 3 line_strip */
+   1,				/* 4 tri_list */
+   0,				/* 5 tri_fan */
+   0,				/* 6 tri_type2 */
+   1,				/* 7 rect list (unused) */
+   1,				/* 8 3vert point */
+   1,				/* 9 3vert line */
+   0,
+   0,
+   0,
+   0,
+   0,
+   0,
+};
+   
+
+#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define ELT_TYPE  GLushort
+
+#define ELT_INIT(prim, hw_prim) \
+   radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
+
+#define GET_MESA_ELTS() rmesa->tcl.Elts
+
+
+/* Don't really know how many elts will fit in what's left of cmdbuf,
+ * as there is state to emit, etc:
+ */
+
+/* Testing on isosurf shows a maximum around here.  Don't know if it's
+ * the card or driver or kernel module that is causing the behaviour.
+ */
+#define GET_MAX_HW_ELTS() 300
+
+
+#define RESET_STIPPLE() do {			\
+   RADEON_STATECHANGE( rmesa, lin );		\
+   radeonEmitState( rmesa );			\
+} while (0)
+
+#define AUTO_STIPPLE( mode )  do {		\
+   RADEON_STATECHANGE( rmesa, lin );		\
+   if (mode)					\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |=	\
+	 RADEON_LINE_PATTERN_AUTO_RESET;	\
+   else						\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
+	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
+   radeonEmitState( rmesa );			\
+} while (0)
+
+
+
+#define ALLOC_ELTS(nr)	radeonAllocElts( rmesa, nr )
+
+static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) 
+{
+   if (rmesa->dma.flush)
+      rmesa->dma.flush( rmesa );
+
+   radeonEmitAOS( rmesa,
+		rmesa->tcl.aos_components,
+		rmesa->tcl.nr_aos_components, 0 );
+
+   return radeonAllocEltsOpenEnded( rmesa,
+				    rmesa->tcl.vertex_format, 
+				    rmesa->tcl.hw_primitive, nr );
+}
+
+#define CLOSE_ELTS()  RADEON_NEWPRIM( rmesa )
+
+
+
+/* TODO: Try to extend existing primitive if both are identical,
+ * discrete and there are no intervening state changes.  (Somewhat
+ * duplicates changes to DrawArrays code)
+ */
+static void EMIT_PRIM( GLcontext *ctx, 
+		       GLenum prim, 
+		       GLuint hwprim, 
+		       GLuint start, 
+		       GLuint count)	
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   radeonTclPrimitive( ctx, prim, hwprim );
+   
+   radeonEmitAOS( rmesa,
+		  rmesa->tcl.aos_components,
+		  rmesa->tcl.nr_aos_components,
+		  start );
+   
+   /* Why couldn't this packet have taken an offset param?
+    */
+   radeonEmitVbufPrim( rmesa,
+		       rmesa->tcl.vertex_format,
+		       rmesa->tcl.hw_primitive,
+		       count - start );
+}
+
+
+
+/* Try & join small primitives
+ */
+#if 0
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
+#else
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )			\
+  ((NR) < 20 ||							\
+   ((NR) < 40 &&						\
+    rmesa->tcl.hw_primitive == (PRIM|				\
+			    RADEON_CP_VC_CNTL_PRIM_WALK_IND|	\
+			    RADEON_CP_VC_CNTL_TCL_ENABLE)))
+#endif
+
+#ifdef MESA_BIG_ENDIAN
+/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
+#define EMIT_ELT(dest, offset, x) do {				\
+	int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );	\
+	GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );	\
+	(des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); } while (0)
+#else
+#define EMIT_ELT(dest, offset, x) (dest)[offset] = (GLushort) (x)
+#endif
+
+#define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
+
+
+
+#define TAG(x) tcl_##x
+#include "tnl_dd/t_dd_dmatmp2.h"
+
+/**********************************************************************/
+/*                          External entrypoints                     */
+/**********************************************************************/
+
+void radeonEmitPrimitive( GLcontext *ctx, 
+			  GLuint first,
+			  GLuint last,
+			  GLuint flags )
+{
+   tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void radeonEmitEltPrimitive( GLcontext *ctx, 
+			     GLuint first,
+			     GLuint last,
+			     GLuint flags )
+{
+   tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void radeonTclPrimitive( GLcontext *ctx, 
+			 GLenum prim,
+			 int hw_prim )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint se_cntl;
+   GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
+
+   if (newprim != rmesa->tcl.hw_primitive ||
+       !discrete_prim[hw_prim&0xf]) {
+      RADEON_NEWPRIM( rmesa );
+      rmesa->tcl.hw_primitive = newprim;
+   }
+
+   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
+   se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
+
+   if (prim == GL_POLYGON && (ctx->_TriangleCaps & DD_FLATSHADE)) 
+      se_cntl |= RADEON_FLAT_SHADE_VTX_0;
+   else
+      se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
+
+   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
+   }
+}
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+/* TCL render.
+ */
+static GLboolean radeon_run_tcl_render( GLcontext *ctx,
+					struct gl_pipeline_stage *stage )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i,flags = 0,length;
+
+   /* TODO: separate this from the swtnl pipeline 
+    */
+   if (rmesa->TclFallback)
+      return GL_TRUE;	/* fallback to software t&l */
+
+   if (VB->Count == 0)
+      return GL_FALSE;
+
+   radeonReleaseArrays( ctx, stage->changed_inputs );
+   radeonEmitArrays( ctx, stage->inputs );
+
+   rmesa->tcl.Elts = VB->Elts;
+
+   for (i = VB->FirstPrimitive ; !(flags & PRIM_LAST) ; i += length)
+   {
+      flags = VB->Primitive[i];
+      length = VB->PrimitiveLength[i];
+
+      if (RADEON_DEBUG & DEBUG_PRIMS)
+	 fprintf(stderr, "%s: prim %s %d..%d\n", 
+		 __FUNCTION__,
+		 _mesa_lookup_enum_by_nr(flags & PRIM_MODE_MASK), 
+		 i, i+length);
+
+      if (!length)
+	 continue;
+
+      if (rmesa->tcl.Elts)
+	 radeonEmitEltPrimitive( ctx, i, i+length, flags );
+      else
+	 radeonEmitPrimitive( ctx, i, i+length, flags );
+   }
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+
+
+static void radeon_check_tcl_render( GLcontext *ctx,
+				     struct gl_pipeline_stage *stage )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint inputs = VERT_BIT_POS;
+
+   if (ctx->RenderMode == GL_RENDER) {
+      /* Make all this event-driven:
+       */
+      if (ctx->Light.Enabled) {
+	 inputs |= VERT_BIT_NORMAL;
+
+	 if (1 || ctx->Light.ColorMaterialEnabled) {
+	    inputs |= VERT_BIT_COLOR0;
+	 }
+      }
+      else {
+	 inputs |= VERT_BIT_COLOR0;
+	 
+	 if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+	    inputs |= VERT_BIT_COLOR1;
+	 }
+      }
+
+      if (ctx->Texture.Unit[0]._ReallyEnabled) {
+	 if (ctx->Texture.Unit[0].TexGenEnabled) {
+	    if (rmesa->TexGenNeedNormals[0]) {
+	       inputs |= VERT_BIT_NORMAL;
+	    }
+	 } else {
+	    inputs |= VERT_BIT_TEX0;
+	 }
+      }
+
+      if (ctx->Texture.Unit[1]._ReallyEnabled) {
+	 if (ctx->Texture.Unit[1].TexGenEnabled) {
+	    if (rmesa->TexGenNeedNormals[1]) {
+	       inputs |= VERT_BIT_NORMAL;
+	    }
+	 } else {
+	    inputs |= VERT_BIT_TEX1;
+	 }
+      }
+
+      stage->inputs = inputs;
+      stage->active = 1;
+   }
+   else
+      stage->active = 0;
+}
+
+static void radeon_init_tcl_render( GLcontext *ctx,
+				    struct gl_pipeline_stage *stage )
+{
+   stage->check = radeon_check_tcl_render;
+   stage->check( ctx, stage );
+}
+
+static void dtr( struct gl_pipeline_stage *stage )
+{
+   (void)stage;
+}
+
+
+/* Initial state for tcl stage.  
+ */
+const struct gl_pipeline_stage _radeon_tcl_stage =
+{
+   "radeon render",
+   (_DD_NEW_SEPARATE_SPECULAR |
+    _NEW_LIGHT|
+    _NEW_TEXTURE|
+    _NEW_FOG|
+    _NEW_RENDERMODE),		/* re-check (new inputs) */
+   0,				/* re-run (always runs) */
+   GL_TRUE,			/* active */
+   0, 0,			/* inputs (set in check_render), outputs */
+   0, 0,			/* changed_inputs, private */
+   dtr,				/* destructor */
+   radeon_init_tcl_render,	/* check - initially set to alloc data */
+   radeon_run_tcl_render	/* run */
+};
+
+
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+
+/*-----------------------------------------------------------------------
+ * Manage TCL fallbacks
+ */
+
+
+static void transition_to_swtnl( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint se_cntl;
+
+   RADEON_NEWPRIM( rmesa );
+   rmesa->swtcl.vertex_format = 0;
+
+   radeonChooseVertexState( ctx );
+   radeonChooseRenderState( ctx );
+
+   _mesa_validate_all_lighting_tables( ctx ); 
+
+   tnl->Driver.NotifyMaterialChange = 
+      _mesa_validate_all_lighting_tables;
+
+   radeonReleaseArrays( ctx, ~0 );
+
+   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
+   se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
+	 
+   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
+   }
+}
+
+
+static void transition_to_hwtnl( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+			  RADEON_TEX1_W_ROUTING_USE_Q1);
+
+   if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+      _tnl_need_projected_coords( ctx, GL_FALSE );
+   }
+
+   radeonUpdateMaterial( ctx );
+
+   tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+
+   if ( rmesa->dma.flush )			
+      rmesa->dma.flush( rmesa );	
+
+   rmesa->dma.flush = 0;
+   rmesa->swtcl.vertex_format = 0;
+   
+   if (rmesa->swtcl.indexed_verts.buf) 
+      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+			      __FUNCTION__ );
+
+   if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+      fprintf(stderr, "Radeon end tcl fallback\n");
+}
+
+static char *fallbackStrings[] = {
+   "Rasterization fallback",
+   "Unfilled triangles",
+   "Twosided lighting, differing materials",
+   "Materials in VB (maybe between begin/end)",
+   "Texgen unit 0",
+   "Texgen unit 1",
+   "Texgen unit 2",
+   "User disable"
+};
+
+
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+
+void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->TclFallback;
+
+   if (mode) {
+      rmesa->TclFallback |= bit;
+      if (oldfallback == 0) {
+	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
+		    getFallbackString( bit ));
+	 transition_to_swtnl( ctx );
+      }
+   }
+   else {
+      rmesa->TclFallback &= ~bit;
+      if (oldfallback == bit) {
+	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+	    fprintf(stderr, "Radeon end tcl fallback %s\n",
+		    getFallbackString( bit ));
+	 transition_to_hwtnl( ctx );
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.h b/src/mesa/drivers/dri/radeon/radeon_tcl.h
new file mode 100644
index 0000000000..1e97d32148
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.h
@@ -0,0 +1,70 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Grahpics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef __RADEON_TCL_H__
+#define __RADEON_TCL_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "radeon_context.h"
+
+extern void radeonTclPrimitive( GLcontext *ctx, GLenum prim, int hw_prim );
+extern void radeonEmitEltPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+				    GLuint flags );
+extern void radeonEmitPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+				 GLuint flags );
+
+extern void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+					      
+#define RADEON_TCL_FALLBACK_RASTER            0x1 /* rasterization */
+#define RADEON_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
+#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
+#define RADEON_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
+#define RADEON_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
+#define RADEON_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
+#define RADEON_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
+#define RADEON_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
+#define RADEON_TCL_FALLBACK_TEXRECT_0         0x100 /* texture rectangle */
+#define RADEON_TCL_FALLBACK_TEXRECT_1         0x200 /* texture rectangle */
+#define RADEON_TCL_FALLBACK_TEXRECT_2         0x400 /* texture rectangle */
+
+#define RADEON_MAX_TCL_VERTSIZE (15*4)
+
+#define TCL_FALLBACK( ctx, bit, mode )	radeonTclFallback( ctx, bit, mode )
+
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
new file mode 100644
index 0000000000..e068202c5e
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -0,0 +1,733 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c,v 1.6 2002/09/16 18:05:20 eich Exp $ */
+/*
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Brian Paul <brianp@valinux.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "colormac.h"
+#include "context.h"
+#include "enums.h"
+#include "image.h"
+#include "simple_list.h"
+#include "texformat.h"
+#include "texstore.h"
+#include "teximage.h"
+
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_swtcl.h"
+#include "radeon_tex.h"
+
+
+
+/**
+ * Set the texture wrap modes.
+ * 
+ * \param t Texture object whose wrap modes are to be set
+ * \param swrap Wrap mode for the \a s texture coordinate
+ * \param twrap Wrap mode for the \a t texture coordinate
+ */
+
+static void radeonSetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap )
+{
+   GLboolean  is_clamp = GL_FALSE;
+   GLboolean  is_clamp_to_border = GL_FALSE;
+
+   t->pp_txfilter &= ~(RADEON_CLAMP_S_MASK | RADEON_CLAMP_T_MASK | RADEON_BORDER_MODE_D3D);
+
+   switch ( swrap ) {
+   case GL_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_S_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_ATI:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_ATI:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_LAST;
+      break;
+   default:
+      _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
+   }
+
+   switch ( twrap ) {
+   case GL_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_T_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_T_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_ATI:
+      t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_ATI:
+      t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_LAST;
+      break;
+   default:
+      _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
+   }
+
+   if ( is_clamp_to_border ) {
+      t->pp_txfilter |= RADEON_BORDER_MODE_D3D;
+   }
+
+   t->border_fallback = (is_clamp && is_clamp_to_border);
+}
+
+static void radeonSetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max )
+{
+   t->pp_txfilter &= ~RADEON_MAX_ANISO_MASK;
+
+   if ( max == 1.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_1_TO_1;
+   } else if ( max <= 2.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_2_TO_1;
+   } else if ( max <= 4.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_4_TO_1;
+   } else if ( max <= 8.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_8_TO_1;
+   } else {
+      t->pp_txfilter |= RADEON_MAX_ANISO_16_TO_1;
+   }
+}
+
+/**
+ * Set the texture magnification and minification modes.
+ * 
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ */
+
+static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
+{
+   GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK);
+
+   t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK);
+
+   if ( anisotropy == RADEON_MAX_ANISO_1_TO_1 ) {
+      switch ( minf ) {
+      case GL_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR;
+	 break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST_MIP_NEAREST;
+	 break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR_MIP_NEAREST;
+	 break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST_MIP_LINEAR;
+	 break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR_MIP_LINEAR;
+	 break;
+      }
+   } else {
+      switch ( minf ) {
+      case GL_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_LINEAR;
+	 break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+      case GL_LINEAR_MIPMAP_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST;
+	 break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+      case GL_LINEAR_MIPMAP_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR;
+	 break;
+      }
+   }
+
+   switch ( magf ) {
+   case GL_NEAREST:
+      t->pp_txfilter |= RADEON_MAG_FILTER_NEAREST;
+      break;
+   case GL_LINEAR:
+      t->pp_txfilter |= RADEON_MAG_FILTER_LINEAR;
+      break;
+   }
+}
+
+static void radeonSetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] )
+{
+   t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+}
+
+
+/**
+ * Allocate space for and load the mesa images into the texture memory block.
+ * This will happen before drawing with a new texture, or drawing with a
+ * texture after it was swapped out or teximaged again.
+ */
+
+static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj )
+{
+   radeonTexObjPtr t;
+
+   t = CALLOC_STRUCT( radeon_tex_obj );
+   texObj->DriverData = t;
+   if ( t != NULL ) {
+      if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+	 fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, texObj, t );
+      }
+
+      /* Initialize non-image-dependent parts of the state:
+       */
+      t->base.tObj = texObj;
+      t->border_fallback = GL_FALSE;
+
+      t->pp_txfilter = RADEON_BORDER_MODE_OGL;
+      t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+			RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
+
+      make_empty_list( & t->base );
+
+      radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
+      radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
+      radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+      radeonSetTexBorderColor( t, texObj->_BorderChan );
+   }
+
+   return t;
+}
+
+
+static const struct gl_texture_format *
+radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+                           GLenum format, GLenum type )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   const GLboolean do32bpt = ( rmesa->radeonScreen->cpp == 4 );
+
+   switch ( internalFormat ) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      if ( format == GL_BGRA ) {
+	 if ( type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
+	    return &_mesa_texformat_argb8888;
+	 }
+         else if ( type == GL_UNSIGNED_SHORT_4_4_4_4_REV ) {
+            return &_mesa_texformat_argb4444;
+	 }
+         else if ( type == GL_UNSIGNED_SHORT_1_5_5_5_REV ) {
+	    return &_mesa_texformat_argb1555;
+	 }
+      }
+      return do32bpt ? &_mesa_texformat_rgba8888 : &_mesa_texformat_argb4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
+	 return &_mesa_texformat_rgb565;
+      }
+      return do32bpt ? &_mesa_texformat_rgba8888 : &_mesa_texformat_rgb565;
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return do32bpt ? &_mesa_texformat_rgba8888 : &_mesa_texformat_argb4444;
+
+   case GL_RGBA4:
+   case GL_RGBA2:
+      return &_mesa_texformat_argb4444;
+
+   case GL_RGB5_A1:
+      return &_mesa_texformat_argb1555;
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return do32bpt ? &_mesa_texformat_rgba8888 : &_mesa_texformat_rgb565;
+
+   case GL_RGB5:
+   case GL_RGB4:
+   case GL_R3_G3_B2:
+      return &_mesa_texformat_rgb565;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      return &_mesa_texformat_al88;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      return &_mesa_texformat_al88;
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return &_mesa_texformat_al88;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      return &_mesa_texformat_i8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+          type == GL_UNSIGNED_BYTE)
+         return &_mesa_texformat_ycbcr;
+      else
+         return &_mesa_texformat_ycbcr_rev;
+
+   default:
+      _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__);
+      return NULL;
+   }
+
+   return NULL; /* never get here */
+}
+
+
+static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level,
+                              GLint internalFormat,
+                              GLint width, GLint border,
+                              GLenum format, GLenum type, const GLvoid *pixels,
+                              const struct gl_pixelstore_attrib *packing,
+                              struct gl_texture_object *texObj,
+                              struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) radeonAllocTexObj( texObj );
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+         return;
+      }
+   }
+
+   /* Note, this will call ChooseTextureFormat */
+   _mesa_store_teximage1d(ctx, target, level, internalFormat,
+                          width, border, format, type, pixels,
+                          &ctx->Unpack, texObj, texImage);
+
+   t->dirty_images[0] |= (1 << level);
+}
+
+
+static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
+                                 GLint xoffset,
+                                 GLsizei width,
+                                 GLenum format, GLenum type,
+                                 const GLvoid *pixels,
+                                 const struct gl_pixelstore_attrib *packing,
+                                 struct gl_texture_object *texObj,
+                                 struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) radeonAllocTexObj( texObj );
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+         return;
+      }
+   }
+
+   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+			     format, type, pixels, packing, texObj,
+			     texImage);
+
+   t->dirty_images[0] |= (1 << level);
+}
+
+
+static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+                              GLint internalFormat,
+                              GLint width, GLint height, GLint border,
+                              GLenum format, GLenum type, const GLvoid *pixels,
+                              const struct gl_pixelstore_attrib *packing,
+                              struct gl_texture_object *texObj,
+                              struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+   GLuint face;
+
+   /* which cube face or ordinary 2D image */
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+      ASSERT(face < 6);
+      break;
+   default:
+      face = 0;
+   }
+
+   if ( t != NULL ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) radeonAllocTexObj( texObj );
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+         return;
+      }
+   }
+
+   /* Note, this will call ChooseTextureFormat */
+   _mesa_store_teximage2d(ctx, target, level, internalFormat,
+                          width, height, border, format, type, pixels,
+                          &ctx->Unpack, texObj, texImage);
+
+   t->dirty_images[face] |= (1 << level);
+}
+
+
+static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+                                 GLint xoffset, GLint yoffset,
+                                 GLsizei width, GLsizei height,
+                                 GLenum format, GLenum type,
+                                 const GLvoid *pixels,
+                                 const struct gl_pixelstore_attrib *packing,
+                                 struct gl_texture_object *texObj,
+                                 struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+   GLuint face;
+
+
+   /* which cube face or ordinary 2D image */
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+      ASSERT(face < 6);
+      break;
+   default:
+      face = 0;
+   }
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) radeonAllocTexObj( texObj );
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+         return;
+      }
+   }
+
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+			     height, format, type, pixels, packing, texObj,
+			     texImage);
+
+   t->dirty_images[face] |= (1 << level);
+}
+
+
+
+#define SCALED_FLOAT_TO_BYTE( x, scale ) \
+		(((GLuint)((255.0F / scale) * (x))) / 2)
+
+static void radeonTexEnv( GLcontext *ctx, GLenum target,
+			  GLenum pname, const GLfloat *param )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+   if ( RADEON_DEBUG & DEBUG_STATE ) {
+      fprintf( stderr, "%s( %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   switch ( pname ) {
+   case GL_TEXTURE_ENV_COLOR: {
+      GLubyte c[4];
+      GLuint envColor;
+      UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
+      envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+      if ( rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] != envColor ) {
+	 RADEON_STATECHANGE( rmesa, tex[unit] );
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] = envColor;
+      }
+      break;
+   }
+
+   case GL_TEXTURE_LOD_BIAS_EXT: {
+      GLfloat bias;
+      GLuint b;
+
+      /* The Radeon's LOD bias is a signed 2's complement value with a
+       * range of -1.0 <= bias < 4.0.  We break this into two linear
+       * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping
+       * [0.0,4.0] to [0,127].
+       */
+      bias = CLAMP( *param, -1.0, 4.0 );
+      if ( bias == 0 ) {
+	 b = 0;
+      } else if ( bias > 0 ) {
+	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 4.0 )) << RADEON_LOD_BIAS_SHIFT;
+      } else {
+	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 1.0 )) << RADEON_LOD_BIAS_SHIFT;
+      }
+      if ( (rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] & RADEON_LOD_BIAS_MASK) != b ) {
+	 RADEON_STATECHANGE( rmesa, tex[unit] );
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] &= ~RADEON_LOD_BIAS_MASK;
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] |= (b & RADEON_LOD_BIAS_MASK);
+      }
+      break;
+   }
+
+   default:
+      return;
+   }
+}
+
+
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+
+static void radeonTexParameter( GLcontext *ctx, GLenum target,
+				struct gl_texture_object *texObj,
+				GLenum pname, const GLfloat *params )
+{
+   radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
+
+   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+      fprintf( stderr, "%s( %s )\n", __FUNCTION__,
+	       _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   if ( ( target != GL_TEXTURE_2D ) &&
+	( target != GL_TEXTURE_1D ) )
+      return;
+
+   switch ( pname ) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+      radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
+      radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+      radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
+      break;
+
+   case GL_TEXTURE_BORDER_COLOR:
+      radeonSetTexBorderColor( t, texObj->_BorderChan );
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      /* This isn't the most efficient solution but there doesn't appear to
+       * be a nice alternative.  Since there's no LOD clamping,
+       * we just have to rely on loading the right subset of mipmap levels
+       * to simulate a clamped LOD.
+       */
+      driSwapOutTextureObject( (driTextureObject *) t );
+      break;
+
+   default:
+      return;
+   }
+
+   /* Mark this texobj as dirty (one bit per tex unit)
+    */
+   t->dirty_state = TEX_ALL;
+}
+
+
+
+static void radeonBindTexture( GLcontext *ctx, GLenum target,
+			       struct gl_texture_object *texObj )
+{
+   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+      fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, texObj,
+	       ctx->Texture.CurrentUnit );
+   }
+
+   if ( target == GL_TEXTURE_2D || target == GL_TEXTURE_1D ) {
+      if ( texObj->DriverData == NULL ) {
+	 radeonAllocTexObj( texObj );
+      }
+   }
+}
+
+static void radeonDeleteTexture( GLcontext *ctx,
+				 struct gl_texture_object *texObj )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+      fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, texObj,
+	       _mesa_lookup_enum_by_nr( texObj->Target ) );
+   }
+
+   if ( t != NULL ) {
+      if ( rmesa ) {
+         RADEON_FIREVERTICES( rmesa );
+      }
+
+      driDestroyTextureObject( t );
+   }
+}
+
+/* Need:  
+ *  - Same GEN_MODE for all active bits
+ *  - Same EyePlane/ObjPlane for all active bits when using Eye/Obj
+ *  - STRQ presumably all supported (matrix means incoming R values
+ *    can end up in STQ, this has implications for vertex support,
+ *    presumably ok if maos is used, though?)
+ *  
+ * Basically impossible to do this on the fly - just collect some
+ * basic info & do the checks from ValidateState().
+ */
+static void radeonTexGen( GLcontext *ctx,
+			  GLenum coord,
+			  GLenum pname,
+			  const GLfloat *params )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+}
+
+
+void radeonInitTextureFuncs( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+
+   ctx->Driver.ChooseTextureFormat	= radeonChooseTextureFormat;
+   ctx->Driver.TexImage1D		= radeonTexImage1D;
+   ctx->Driver.TexImage2D		= radeonTexImage2D;
+   ctx->Driver.TexImage3D		= _mesa_store_teximage3d;
+   ctx->Driver.TexSubImage1D		= radeonTexSubImage1D;
+   ctx->Driver.TexSubImage2D		= radeonTexSubImage2D;
+   ctx->Driver.TexSubImage3D		= _mesa_store_texsubimage3d;
+   ctx->Driver.CopyTexImage1D		= _swrast_copy_teximage1d;
+   ctx->Driver.CopyTexImage2D		= _swrast_copy_teximage2d;
+   ctx->Driver.CopyTexSubImage1D	= _swrast_copy_texsubimage1d;
+   ctx->Driver.CopyTexSubImage2D	= _swrast_copy_texsubimage2d;
+   ctx->Driver.CopyTexSubImage3D 	= _swrast_copy_texsubimage3d;
+   ctx->Driver.TestProxyTexImage	= _mesa_test_proxy_teximage;
+
+   ctx->Driver.BindTexture		= radeonBindTexture;
+   ctx->Driver.CreateTexture		= NULL; /* FIXME: Is this used??? */
+   ctx->Driver.DeleteTexture		= radeonDeleteTexture;
+   ctx->Driver.IsTextureResident	= driIsTextureResident;
+   ctx->Driver.PrioritizeTexture	= NULL;
+   ctx->Driver.ActiveTexture		= NULL;
+   ctx->Driver.UpdateTexturePalette	= NULL;
+
+   ctx->Driver.TexEnv			= radeonTexEnv;
+   ctx->Driver.TexParameter		= radeonTexParameter;
+   ctx->Driver.TexGen                   = radeonTexGen;
+
+   driInitTextureObjects( ctx, & rmesa->swapped,
+			  DRI_TEXMGR_DO_TEXTURE_1D
+			  | DRI_TEXMGR_DO_TEXTURE_2D );
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h
new file mode 100644
index 0000000000..ce079baec2
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.h
@@ -0,0 +1,53 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_tex.h,v 1.3 2002/02/22 21:45:01 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __RADEON_TEX_H__
+#define __RADEON_TEX_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+extern void radeonUpdateTextureState( GLcontext *ctx );
+
+extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t,
+				  GLuint face );
+
+extern void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t );
+
+extern void radeonInitTextureFuncs( GLcontext *ctx );
+
+#endif
+#endif /* __RADEON_TEX_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c
new file mode 100644
index 0000000000..3adc2a951c
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_texmem.c
@@ -0,0 +1,378 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c,v 1.7 2002/12/16 16:18:59 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation on the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
+SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+#include <errno.h>
+
+#include "glheader.h"
+#include "imports.h"
+#include "context.h"
+#include "macros.h"
+#include "simple_list.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+
+
+/**
+ * Destroy any device-dependent state associated with the texture.  This may
+ * include NULLing out hardware state that points to the texture.
+ */
+void
+radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
+{
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, t, t->base.tObj );
+   }
+
+   if ( rmesa != NULL ) {
+      unsigned   i;
+
+
+      for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
+	 if ( t == rmesa->state.texture.unit[i].texobj ) {
+	    rmesa->state.texture.unit[i].texobj = NULL;
+	    remove_from_list( &rmesa->hw.tex[i] );
+	    make_empty_list( &rmesa->hw.tex[i] );
+	 }
+      }
+   }
+}
+
+
+/* ------------------------------------------------------------
+ * Texture image conversions
+ */
+
+
+static void radeonUploadRectSubImage( radeonContextPtr rmesa,
+				      radeonTexObjPtr t, 
+				      struct gl_texture_image *texImage,
+				      GLint x, GLint y, 
+				      GLint width, GLint height )
+{
+   const struct gl_texture_format *texFormat = texImage->TexFormat;
+   int blit_format, dstPitch, done;
+
+   switch ( texFormat->TexelBytes ) {
+   case 1:
+      blit_format = RADEON_GMC_DST_8BPP_CI;
+      break;
+   case 2:
+      blit_format = RADEON_GMC_DST_16BPP;
+      break;
+   case 4:
+      blit_format = RADEON_GMC_DST_32BPP;
+      break;
+   default:
+      fprintf( stderr, "radeonUploadRectSubImage: unknown blit_format (texelbytes=%d)\n", 
+      	       texFormat->TexelBytes);
+      return;
+   }
+
+   t->image[0][0].data = texImage->Data;
+
+   /* Currently don't need to cope with small pitches.
+    */
+   width = texImage->Width;
+   height = texImage->Height;
+   dstPitch = t->pp_txpitch + 32;
+
+   {	/* FIXME: prefer AGP-texturing if possible */
+      /* Data not in agp memory, or bad pitch.
+       */
+      for (done = 0; done < height ; ) {
+	 struct radeon_dma_region region;
+	 int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
+	 int src_pitch;
+	 char *tex;
+
+         src_pitch = texImage->RowStride * texFormat->TexelBytes;
+
+	 tex = (char *)texImage->Data + done * src_pitch;
+
+	 memset(&region, 0, sizeof(region));
+	 radeonAllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
+
+	 /* Copy texdata to dma:
+	  */
+	 if (0)
+	    fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
+		    __FUNCTION__, src_pitch, dstPitch);
+
+	 if (src_pitch == dstPitch) {
+	    memcpy( region.address, tex, lines * src_pitch );
+	 } 
+	 else {
+	    char *buf = region.address;
+	    int i;
+	    for (i = 0 ; i < lines ; i++) {
+	       memcpy( buf, tex, src_pitch );
+	       buf += dstPitch;
+	       tex += src_pitch;
+	    }
+	 }
+
+	 radeonEmitWait( rmesa, RADEON_WAIT_3D );
+
+	 
+
+	 /* Blit to framebuffer
+	  */
+	 radeonEmitBlit( rmesa, 
+		       blit_format, 
+ 		       dstPitch, GET_START( &region ),    
+ 		       dstPitch, t->bufAddr, 
+		       0, 0, 
+		       0, done, 
+		       width, lines );
+	 
+	 radeonEmitWait( rmesa, RADEON_WAIT_2D );
+
+	 radeonReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
+	 done += lines;
+      }
+   }
+}
+
+
+/**
+ * Upload the texture image associated with texture \a t at the specified
+ * level at the address relative to \a start.
+ */
+static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, 
+			    GLint hwlevel,
+			    GLint x, GLint y, GLint width, GLint height,
+			    GLuint face )
+{
+   struct gl_texture_image *texImage = NULL;
+   GLuint offset;
+   GLint imageWidth, imageHeight;
+   GLint ret;
+   drmRadeonTexture tex;
+   drmRadeonTexImage tmp;
+   const int level = hwlevel + t->base.firstLevel;
+
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", 
+	       __FUNCTION__, t, t->base.tObj, level, width, height, face );
+   }
+
+   ASSERT(face < 6);
+
+   /* Ensure we have a valid texture to upload */
+   if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
+      _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+      return;
+   }
+
+   switch (face) {
+   case 0:
+      texImage = t->base.tObj->Image[level];
+      break;
+   case 1:
+      texImage = t->base.tObj->NegX[level];
+      break;
+   case 2:
+      texImage = t->base.tObj->PosY[level];
+      break;
+   case 3:
+      texImage = t->base.tObj->NegY[level];
+      break;
+   case 4:
+      texImage = t->base.tObj->PosZ[level];
+      break;
+   case 5:
+      texImage = t->base.tObj->NegZ[level];
+      break;
+   }
+
+   if ( !texImage ) {
+      if ( RADEON_DEBUG & DEBUG_TEXTURE )
+	 fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
+      return;
+   }
+   if ( !texImage->Data ) {
+      if ( RADEON_DEBUG & DEBUG_TEXTURE )
+	 fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
+      return;
+   }
+
+
+   if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+      assert(level == 0);
+      assert(hwlevel == 0);
+      if ( RADEON_DEBUG & DEBUG_TEXTURE )
+	 fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
+      radeonUploadRectSubImage( rmesa, t, texImage, x, y, width, height );
+      return;
+   }
+
+   imageWidth = texImage->Width;
+   imageHeight = texImage->Height;
+
+   offset = t->bufAddr;
+
+   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+      GLint imageX = 0;
+      GLint imageY = 0;
+      GLint blitX = t->image[face][hwlevel].x;
+      GLint blitY = t->image[face][hwlevel].y;
+      GLint blitWidth = t->image[face][hwlevel].width;
+      GLint blitHeight = t->image[face][hwlevel].height;
+      fprintf( stderr, "   upload image: %d,%d at %d,%d\n",
+	       imageWidth, imageHeight, imageX, imageY );
+      fprintf( stderr, "   upload  blit: %d,%d at %d,%d\n",
+	       blitWidth, blitHeight, blitX, blitY );
+      fprintf( stderr, "       blit ofs: 0x%07x level: %d/%d\n",
+	       (GLuint)offset, hwlevel, level );
+   }
+
+   t->image[face][hwlevel].data = texImage->Data;
+
+   /* Init the DRM_RADEON_TEXTURE command / drmRadeonTexture struct.
+    * NOTE: we're always use a 1KB-wide blit and I8 texture format.
+    * We used to use 1, 2 and 4-byte texels and used to use the texture
+    * width to dictate the blit width - but that won't work for compressed
+    * textures. (Brian)
+    */
+   tex.offset = offset;
+   tex.pitch = BLIT_WIDTH_BYTES / 64;
+   tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+   if (texImage->TexFormat->TexelBytes) {
+      tex.width = imageWidth * texImage->TexFormat->TexelBytes; /* in bytes */
+      tex.height = imageHeight;
+   }
+   else {
+      tex.width = imageWidth; /* compressed */
+      tex.height = imageHeight;
+      if (tex.height < 4)
+         tex.height = 4;
+   }
+   tex.image = &tmp;
+
+   /* copy (x,y,width,height,data) */
+   memcpy( &tmp, &t->image[face][hwlevel], sizeof(drmRadeonTexImage) );
+
+   LOCK_HARDWARE( rmesa );
+   do {
+      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
+                                 &tex, sizeof(drmRadeonTexture) );
+   } while ( ret && errno == EAGAIN );
+
+   UNLOCK_HARDWARE( rmesa );
+
+   if ( ret ) {
+      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
+      fprintf( stderr, "   offset=0x%08x\n",
+	       offset );
+      fprintf( stderr, "   image width=%d height=%d\n",
+	       imageWidth, imageHeight );
+      fprintf( stderr, "    blit width=%d height=%d data=%p\n",
+	       t->image[face][hwlevel].width, t->image[face][hwlevel].height,
+	       t->image[face][hwlevel].data );
+      exit( 1 );
+   }
+}
+
+
+/**
+ * Upload the texture images associated with texture \a t.  This might
+ * require the allocation of texture memory.
+ * 
+ * \param rmesa Context pointer
+ * \param t Texture to be uploaded
+ * \param face Cube map face to be uploaded.  Zero for non-cube maps.
+ */
+
+int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint face )
+{
+   const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+
+   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+      fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
+	       rmesa->glCtx, t->base.tObj, t->base.totalSize,
+	       t->base.firstLevel, t->base.lastLevel );
+   }
+
+   if ( !t || t->base.totalSize == 0 )
+      return 0;
+
+   LOCK_HARDWARE( rmesa );
+
+   if ( t->base.memBlock == NULL ) {
+      int heap;
+
+      heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
+				 (driTextureObject *) t );
+      if ( heap == -1 ) {
+	 UNLOCK_HARDWARE( rmesa );
+	 return -1;
+      }
+
+      /* Set the base offset of the texture image */
+      t->bufAddr = rmesa->radeonScreen->texOffset[heap] 
+	   + t->base.memBlock->ofs;
+      t->pp_txoffset = t->bufAddr;
+
+
+      /* Mark this texobj as dirty on all units:
+       */
+      t->dirty_state = TEX_ALL;
+   }
+
+
+   /* Let the world know we've used this memory recently.
+    */
+   driUpdateTextureLRU( (driTextureObject *) t );
+   UNLOCK_HARDWARE( rmesa );
+
+
+   /* Upload any images that are new */
+   if (t->base.dirty_images[face]) {
+      int i;
+      for ( i = 0 ; i < numLevels ; i++ ) {
+         if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
+            uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
+			    t->image[face][i].height, face );
+         }
+      }
+      t->base.dirty_images[face] = 0;
+   }
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
new file mode 100644
index 0000000000..6dccd31180
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -0,0 +1,1628 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c,v 1.6 2002/12/16 16:18:59 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "colormac.h"
+#include "context.h"
+#include "macros.h"
+#include "texformat.h"
+#include "enums.h"
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_swtcl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+
+
+#define RADEON_TXFORMAT_AL88      RADEON_TXFORMAT_AI88
+#define RADEON_TXFORMAT_YCBCR     RADEON_TXFORMAT_YVYU422
+#define RADEON_TXFORMAT_YCBCR_REV RADEON_TXFORMAT_VYUY422
+
+#define _COLOR(f) \
+    [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, 0 }
+#define _ALPHA(f) \
+    [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _YUV(f) \
+   [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, RADEON_YUV_TO_RGB }
+#define _INVALID(f) \
+    [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
+#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_YCBCR_REV) \
+			     && (tx_table[f].format != 0xffffffff) )
+
+static const struct {
+   GLuint format, filter;
+}
+tx_table[] =
+{
+   _ALPHA(RGBA8888),
+   _ALPHA(ARGB8888),
+   _INVALID(RGB888),
+   _COLOR(RGB565),
+   _ALPHA(ARGB4444),
+   _ALPHA(ARGB1555),
+   _ALPHA(AL88),
+   _INVALID(A8),
+   _INVALID(L8),
+   _COLOR(I8),
+   _INVALID(CI8),
+   _YUV(YCBCR),
+   _YUV(YCBCR_REV),
+};
+
+#undef _COLOR
+#undef _ALPHA
+#undef _INVALID
+
+/**
+ * This function computes the number of bytes of storage needed for
+ * the given texture object (all mipmap levels, all cube faces).
+ * The \c image[face][level].x/y/width/height parameters for upload/blitting
+ * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
+ * too.
+ * 
+ * \param rmesa Context pointer
+ * \param tObj GL texture object whose images are to be posted to
+ *                 hardware state.
+ */
+static void radeonSetTexImages( radeonContextPtr rmesa,
+				struct gl_texture_object *tObj )
+{
+   radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
+   const struct gl_texture_image *baseImage = tObj->Image[tObj->BaseLevel];
+   GLint curOffset;
+   GLint i;
+   GLint firstLevel=0, lastLevel=0, numLevels;
+   GLint log2Width, log2Height, log2Depth;
+
+   /* Set the hardware texture format
+    */
+
+   t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
+		       RADEON_TXFORMAT_ALPHA_IN_MAP);
+   t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
+
+   if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
+      t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
+      t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
+   }
+   else {
+      _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
+      return;
+   }
+
+
+
+   /* Compute which mipmap levels we really want to send to the hardware.
+    * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+    * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+    * Yes, this looks overly complicated, but it's all needed.
+    */
+   switch (tObj->Target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+      firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
+      firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+      lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
+      lastLevel = MAX2(lastLevel, tObj->BaseLevel);
+      lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
+      lastLevel = MIN2(lastLevel, tObj->MaxLevel);
+      lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+      log2Width = tObj->Image[firstLevel]->WidthLog2;
+      log2Height = tObj->Image[firstLevel]->HeightLog2;
+      log2Depth = 0;
+      break;
+   case GL_TEXTURE_RECTANGLE_NV:
+      firstLevel = lastLevel = 0;
+      log2Width = log2Height = 1; /* ? */
+      log2Depth = 0;
+      break;
+   default:
+      return;
+   }
+
+   /* save these values */
+   t->base.firstLevel = firstLevel;
+   t->base.lastLevel = lastLevel;
+
+   numLevels = lastLevel - firstLevel + 1;
+
+   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
+
+   /* Calculate mipmap offsets and dimensions for blitting (uploading)
+    * The idea is that we lay out the mipmap levels within a block of
+    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
+    */
+   curOffset = 0;
+
+   for (i = 0; i < numLevels; i++) {
+      const struct gl_texture_image *texImage;
+      GLuint size;
+
+      texImage = tObj->Image[i + firstLevel];
+      if ( !texImage )
+	 break;
+
+      /* find image size in bytes */
+      if (texImage->IsCompressed) {
+         size = texImage->CompressedSize;
+      }
+      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+      	 size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63)
+      	         & ~63) * texImage->Height;
+      }
+      else {
+         int w = texImage->Width * texImage->TexFormat->TexelBytes;
+         if (w < 32)
+            w = 32;
+         size = w * texImage->Height * texImage->Depth;
+      }
+      assert(size > 0);
+
+      if (curOffset & 0x1f) {
+         /* align to 32-byte offset */
+         curOffset = (curOffset + 0x1f) & ~0x1f;
+      }
+
+      t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+      t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+      t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
+      t->image[0][i].height = size / t->image[0][i].width;
+
+#if 0
+      /* for debugging only and only  applicable to non-rectangle targets */
+      assert(size % t->image[0][i].width == 0);
+      assert(t->image[0][i].x == 0
+             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
+#endif
+
+      if (0)
+         fprintf(stderr,
+                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+                 i, texImage->Width, texImage->Height,
+                 t->image[0][i].x, t->image[0][i].y,
+                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
+
+      curOffset += size;
+
+   }
+
+   /* Align the total size of texture memory block.
+    */
+   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+
+   /* Hardware state:
+    */
+   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
+   t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT;
+
+   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
+		       RADEON_TXFORMAT_HEIGHT_MASK |
+                       RADEON_TXFORMAT_CUBIC_MAP_ENABLE);
+   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
+		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
+
+   t->pp_txsize = (((tObj->Image[firstLevel]->Width - 1) << 0) |
+                   ((tObj->Image[firstLevel]->Height - 1) << 16));
+
+   /* Only need to round to nearest 32 for textures, but the blitter
+    * requires 64-byte aligned pitches, and we may/may not need the
+    * blitter.   NPOT only!
+    */
+   if (baseImage->IsCompressed)
+      t->pp_txpitch = (tObj->Image[firstLevel]->Width + 63) & ~(63);
+   else
+      t->pp_txpitch = ((tObj->Image[firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63);
+   t->pp_txpitch -= 32;
+
+   t->dirty_state = TEX_ALL;
+
+   /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */
+}
+
+
+
+/* ================================================================
+ * Texture combine functions
+ */
+
+#define RADEON_DISABLE		0
+#define RADEON_REPLACE		1
+#define RADEON_MODULATE		2
+#define RADEON_DECAL		3
+#define RADEON_BLEND		4
+#define RADEON_ADD		5
+#define RADEON_MAX_COMBFUNC	6
+
+static GLuint radeon_color_combine[][RADEON_MAX_COMBFUNC] =
+{
+   /* Unit 0:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_CURRENT_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_REPLACE = 0x00802800
+       */
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_T0_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_MODULATE = 0x00800142
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_T0_COLOR |
+       RADEON_COLOR_ARG_C_ZERO |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_DECAL = 0x008c2d42
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_T0_COLOR |
+       RADEON_COLOR_ARG_C_T0_ALPHA |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_BLEND = 0x008c2902
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_TFACTOR_COLOR |
+       RADEON_COLOR_ARG_C_T0_COLOR |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_ADD = 0x00812802
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_T0_COLOR |
+       RADEON_COMP_ARG_B |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+   },
+
+   /* Unit 1:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_CURRENT_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_REPLACE = 0x00803000
+       */
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_T1_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_MODULATE = 0x00800182
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_T1_COLOR |
+       RADEON_COLOR_ARG_C_ZERO |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_DECAL = 0x008c3582
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_T1_COLOR |
+       RADEON_COLOR_ARG_C_T1_ALPHA |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_BLEND = 0x008c3102
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_TFACTOR_COLOR |
+       RADEON_COLOR_ARG_C_T1_COLOR |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_ADD = 0x00813002
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_T1_COLOR |
+       RADEON_COMP_ARG_B |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+   },
+
+   /* Unit 2:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_CURRENT_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_REPLACE = 0x00803800
+       */
+      (RADEON_COLOR_ARG_A_ZERO |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_T2_COLOR |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_MODULATE = 0x008001c2
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_T2_COLOR |
+       RADEON_COLOR_ARG_C_ZERO |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_DECAL = 0x008c3dc2
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_T2_COLOR |
+       RADEON_COLOR_ARG_C_T2_ALPHA |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_BLEND = 0x008c3902
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_TFACTOR_COLOR |
+       RADEON_COLOR_ARG_C_T2_COLOR |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_ADD = 0x00813802
+       */
+      (RADEON_COLOR_ARG_A_CURRENT_COLOR |
+       RADEON_COLOR_ARG_B_ZERO |
+       RADEON_COLOR_ARG_C_T2_COLOR |
+       RADEON_COMP_ARG_B |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+   }
+};
+
+static GLuint radeon_alpha_combine[][RADEON_MAX_COMBFUNC] =
+{
+   /* Unit 0:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_REPLACE = 0x00800500
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_T0_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_MODULATE = 0x00800051
+       */
+      (RADEON_ALPHA_ARG_A_CURRENT_ALPHA |
+       RADEON_ALPHA_ARG_B_T0_ALPHA |
+       RADEON_ALPHA_ARG_C_ZERO |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_DECAL = 0x00800100
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_BLEND = 0x00800051
+       */
+      (RADEON_ALPHA_ARG_A_CURRENT_ALPHA |
+       RADEON_ALPHA_ARG_B_TFACTOR_ALPHA |
+       RADEON_ALPHA_ARG_C_T0_ALPHA |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_ADD = 0x00800051
+       */
+      (RADEON_ALPHA_ARG_A_CURRENT_ALPHA |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_T0_ALPHA |
+       RADEON_COMP_ARG_B |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+   },
+
+   /* Unit 1:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_REPLACE = 0x00800600
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_T1_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_MODULATE = 0x00800061
+       */
+      (RADEON_ALPHA_ARG_A_CURRENT_ALPHA |
+       RADEON_ALPHA_ARG_B_T1_ALPHA |
+       RADEON_ALPHA_ARG_C_ZERO |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_DECAL = 0x00800100
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_BLEND = 0x00800061
+       */
+      (RADEON_ALPHA_ARG_A_CURRENT_ALPHA |
+       RADEON_ALPHA_ARG_B_TFACTOR_ALPHA |
+       RADEON_ALPHA_ARG_C_T1_ALPHA |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_ADD = 0x00800061
+       */
+      (RADEON_ALPHA_ARG_A_CURRENT_ALPHA |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_T1_ALPHA |
+       RADEON_COMP_ARG_B |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+   },
+
+   /* Unit 2:
+    */
+   {
+      /* Disable combiner stage
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_REPLACE = 0x00800700
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_T2_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_MODULATE = 0x00800071
+       */
+      (RADEON_ALPHA_ARG_A_CURRENT_ALPHA |
+       RADEON_ALPHA_ARG_B_T2_ALPHA |
+       RADEON_ALPHA_ARG_C_ZERO |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_DECAL = 0x00800100
+       */
+      (RADEON_ALPHA_ARG_A_ZERO |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_BLEND = 0x00800071
+       */
+      (RADEON_ALPHA_ARG_A_CURRENT_ALPHA |
+       RADEON_ALPHA_ARG_B_TFACTOR_ALPHA |
+       RADEON_ALPHA_ARG_C_T2_ALPHA |
+       RADEON_BLEND_CTL_BLEND |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+
+      /* GL_ADD = 0x00800021
+       */
+      (RADEON_ALPHA_ARG_A_CURRENT_ALPHA |
+       RADEON_ALPHA_ARG_B_ZERO |
+       RADEON_ALPHA_ARG_C_T2_ALPHA |
+       RADEON_COMP_ARG_B |
+       RADEON_BLEND_CTL_ADD |
+       RADEON_SCALE_1X |
+       RADEON_CLAMP_TX),
+   }
+};
+
+
+/* GL_ARB_texture_env_combine support
+ */
+
+/* The color tables have combine functions for GL_SRC_COLOR,
+ * GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint radeon_texture_color[][RADEON_MAX_TEXTURE_UNITS] =
+{
+   {
+      RADEON_COLOR_ARG_A_T0_COLOR,
+      RADEON_COLOR_ARG_A_T1_COLOR,
+      RADEON_COLOR_ARG_A_T2_COLOR
+   },
+   {
+      RADEON_COLOR_ARG_A_T0_COLOR | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T1_COLOR | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T2_COLOR | RADEON_COMP_ARG_A
+   },
+   {
+      RADEON_COLOR_ARG_A_T0_ALPHA,
+      RADEON_COLOR_ARG_A_T1_ALPHA,
+      RADEON_COLOR_ARG_A_T2_ALPHA
+   },
+   {
+      RADEON_COLOR_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A
+   },
+};
+
+static GLuint radeon_tfactor_color[] =
+{
+   RADEON_COLOR_ARG_A_TFACTOR_COLOR,
+   RADEON_COLOR_ARG_A_TFACTOR_COLOR | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_TFACTOR_ALPHA,
+   RADEON_COLOR_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A
+};
+
+static GLuint radeon_primary_color[] =
+{
+   RADEON_COLOR_ARG_A_DIFFUSE_COLOR,
+   RADEON_COLOR_ARG_A_DIFFUSE_COLOR | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_DIFFUSE_ALPHA,
+   RADEON_COLOR_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A
+};
+
+static GLuint radeon_previous_color[] =
+{
+   RADEON_COLOR_ARG_A_CURRENT_COLOR,
+   RADEON_COLOR_ARG_A_CURRENT_COLOR | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_CURRENT_ALPHA,
+   RADEON_COLOR_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A
+};
+
+/* GL_ZERO table - indices 0-3
+ * GL_ONE  table - indices 1-4
+ */
+static GLuint radeon_zero_color[] =
+{
+   RADEON_COLOR_ARG_A_ZERO,
+   RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_ZERO,
+   RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_ZERO
+};
+
+
+/* The alpha tables only have GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint radeon_texture_alpha[][RADEON_MAX_TEXTURE_UNITS] =
+{
+   {
+      RADEON_ALPHA_ARG_A_T0_ALPHA,
+      RADEON_ALPHA_ARG_A_T1_ALPHA,
+      RADEON_ALPHA_ARG_A_T2_ALPHA
+   },
+   {
+      RADEON_ALPHA_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_ALPHA_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_ALPHA_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A
+   },
+};
+
+static GLuint radeon_tfactor_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_TFACTOR_ALPHA,
+   RADEON_ALPHA_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A
+};
+
+static GLuint radeon_primary_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA,
+   RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A
+};
+
+static GLuint radeon_previous_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_CURRENT_ALPHA,
+   RADEON_ALPHA_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A
+};
+
+/* GL_ZERO table - indices 0-1
+ * GL_ONE  table - indices 1-2
+ */
+static GLuint radeon_zero_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_ZERO,
+   RADEON_ALPHA_ARG_A_ZERO | RADEON_COMP_ARG_A,
+   RADEON_ALPHA_ARG_A_ZERO
+};
+
+
+/* Extract the arg from slot A, shift it into the correct argument slot
+ * and set the corresponding complement bit.
+ */
+#define RADEON_COLOR_ARG( n, arg )			\
+do {							\
+   color_combine |=					\
+      ((color_arg[n] & RADEON_COLOR_ARG_MASK)		\
+       << RADEON_COLOR_ARG_##arg##_SHIFT);		\
+   color_combine |=					\
+      ((color_arg[n] >> RADEON_COMP_ARG_SHIFT)		\
+       << RADEON_COMP_ARG_##arg##_SHIFT);		\
+} while (0)
+
+#define RADEON_ALPHA_ARG( n, arg )			\
+do {							\
+   alpha_combine |=					\
+      ((alpha_arg[n] & RADEON_ALPHA_ARG_MASK)		\
+       << RADEON_ALPHA_ARG_##arg##_SHIFT);		\
+   alpha_combine |=					\
+      ((alpha_arg[n] >> RADEON_COMP_ARG_SHIFT)		\
+       << RADEON_COMP_ARG_##arg##_SHIFT);		\
+} while (0)
+
+
+/* ================================================================
+ * Texture unit state management
+ */
+
+static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint color_combine, alpha_combine;
+
+   /* texUnit->_Current can be NULL if and only if the texture unit is
+    * not actually enabled.
+    */
+   assert( (texUnit->_ReallyEnabled == 0)
+	   || (texUnit->_Current != NULL) );
+
+   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, ctx, unit );
+   }
+
+   /* Set the texture environment state.  Isn't this nice and clean?
+    * The chip will automagically set the texture alpha to 0xff when
+    * the texture format does not include an alpha component.  This
+    * reduces the amount of special-casing we have to do, alpha-only
+    * textures being a notable exception.
+    */
+   if ( !texUnit->_ReallyEnabled ) {
+      /* Don't cache these results.
+       */
+      rmesa->state.texture.unit[unit].format = 0;
+      rmesa->state.texture.unit[unit].envMode = 0;
+      color_combine = radeon_color_combine[unit][RADEON_DISABLE];
+      alpha_combine = radeon_alpha_combine[unit][RADEON_DISABLE];
+   }
+   else {
+      const struct gl_texture_object *tObj = texUnit->_Current;
+      const GLenum format = tObj->Image[tObj->BaseLevel]->Format;
+      GLuint color_arg[3], alpha_arg[3];
+      GLuint i, numColorArgs = 0, numAlphaArgs = 0;
+      GLuint RGBshift = texUnit->CombineScaleShiftRGB;
+      GLuint Ashift = texUnit->CombineScaleShiftA;
+
+      switch ( texUnit->EnvMode ) {
+      case GL_REPLACE:
+         switch ( format ) {
+	 case GL_RGBA:
+         case GL_LUMINANCE_ALPHA:
+         case GL_INTENSITY:
+	    color_combine = radeon_color_combine[unit][RADEON_REPLACE];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_REPLACE];
+	    break;
+	 case GL_ALPHA:
+	    color_combine = radeon_color_combine[unit][RADEON_DISABLE];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_REPLACE];
+	    break;
+	 case GL_LUMINANCE:
+	 case GL_RGB:
+	 case GL_YCBCR_MESA:
+	    color_combine = radeon_color_combine[unit][RADEON_REPLACE];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_DISABLE];
+	    break;
+	 case GL_COLOR_INDEX:
+	 default:
+	    return GL_FALSE;
+	 }
+	 break;
+
+      case GL_MODULATE:
+	 switch ( format ) {
+	 case GL_RGBA:
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_INTENSITY:
+	    color_combine = radeon_color_combine[unit][RADEON_MODULATE];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_MODULATE];
+	    break;
+	 case GL_ALPHA:
+	    color_combine = radeon_color_combine[unit][RADEON_DISABLE];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_MODULATE];
+	    break;
+	 case GL_RGB:
+	 case GL_LUMINANCE:
+	 case GL_YCBCR_MESA:
+	    color_combine = radeon_color_combine[unit][RADEON_MODULATE];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_DISABLE];
+	    break;
+	 case GL_COLOR_INDEX:
+	 default:
+	    return GL_FALSE;
+	 }
+	 break;
+
+      case GL_DECAL:
+	 switch ( format ) {
+	 case GL_RGBA:
+	 case GL_RGB:
+	 case GL_YCBCR_MESA:
+	    color_combine = radeon_color_combine[unit][RADEON_DECAL];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_DISABLE];
+	    break;
+	 case GL_ALPHA:
+	 case GL_LUMINANCE:
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_INTENSITY:
+	    color_combine = radeon_color_combine[unit][RADEON_DISABLE];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_DISABLE];
+	    break;
+	 case GL_COLOR_INDEX:
+	 default:
+	    return GL_FALSE;
+	 }
+	 break;
+
+      case GL_BLEND:
+	 switch ( format ) {
+	 case GL_RGBA:
+	 case GL_RGB:
+	 case GL_LUMINANCE:
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_YCBCR_MESA:
+	    color_combine = radeon_color_combine[unit][RADEON_BLEND];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_MODULATE];
+	    break;
+	 case GL_ALPHA:
+	    color_combine = radeon_color_combine[unit][RADEON_DISABLE];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_MODULATE];
+	    break;
+	 case GL_INTENSITY:
+	    color_combine = radeon_color_combine[unit][RADEON_BLEND];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_BLEND];
+	    break;
+	 case GL_COLOR_INDEX:
+	 default:
+	    return GL_FALSE;
+	 }
+	 break;
+
+      case GL_ADD:
+	 switch ( format ) {
+	 case GL_RGBA:
+	 case GL_RGB:
+	 case GL_LUMINANCE:
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_YCBCR_MESA:
+	    color_combine = radeon_color_combine[unit][RADEON_ADD];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_MODULATE];
+	    break;
+	 case GL_ALPHA:
+	    color_combine = radeon_color_combine[unit][RADEON_DISABLE];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_MODULATE];
+	    break;
+	 case GL_INTENSITY:
+	    color_combine = radeon_color_combine[unit][RADEON_ADD];
+	    alpha_combine = radeon_alpha_combine[unit][RADEON_ADD];
+	    break;
+	 case GL_COLOR_INDEX:
+	 default:
+	    return GL_FALSE;
+	 }
+	 break;
+
+      case GL_COMBINE:
+	 /* Don't cache these results.
+	  */
+	 rmesa->state.texture.unit[unit].format = 0;
+	 rmesa->state.texture.unit[unit].envMode = 0;
+
+	 /* Step 0:
+	  * Calculate how many arguments we need to process.
+	  */
+	 switch ( texUnit->CombineModeRGB ) {
+	 case GL_REPLACE:
+	    numColorArgs = 1;
+	    break;
+	 case GL_MODULATE:
+	 case GL_ADD:
+	 case GL_ADD_SIGNED:
+	 case GL_SUBTRACT:
+	 case GL_DOT3_RGB:
+	 case GL_DOT3_RGBA:
+	 case GL_DOT3_RGB_EXT:
+	 case GL_DOT3_RGBA_EXT:
+	    numColorArgs = 2;
+	    break;
+	 case GL_INTERPOLATE:
+	 case GL_MODULATE_ADD_ATI:
+	 case GL_MODULATE_SIGNED_ADD_ATI:
+	 case GL_MODULATE_SUBTRACT_ATI:
+	    numColorArgs = 3;
+	    break;
+	 default:
+	    return GL_FALSE;
+	 }
+
+	 switch ( texUnit->CombineModeA ) {
+	 case GL_REPLACE:
+	    numAlphaArgs = 1;
+	    break;
+	 case GL_MODULATE:
+	 case GL_ADD:
+	 case GL_ADD_SIGNED:
+	 case GL_SUBTRACT:
+	    numAlphaArgs = 2;
+	    break;
+	 case GL_INTERPOLATE:
+	 case GL_MODULATE_ADD_ATI:
+	 case GL_MODULATE_SIGNED_ADD_ATI:
+	 case GL_MODULATE_SUBTRACT_ATI:
+	    numAlphaArgs = 3;
+	    break;
+	 default:
+	    return GL_FALSE;
+	 }
+
+	 /* Step 1:
+	  * Extract the color and alpha combine function arguments.
+	  */
+	 for ( i = 0 ; i < numColorArgs ; i++ ) {
+	    const GLuint op = texUnit->CombineOperandRGB[i] - GL_SRC_COLOR;
+	    assert(op >= 0);
+	    assert(op <= 3);
+	    switch ( texUnit->CombineSourceRGB[i] ) {
+	    case GL_TEXTURE:
+	       color_arg[i] = radeon_texture_color[op][unit];
+	       break;
+	    case GL_CONSTANT:
+	       color_arg[i] = radeon_tfactor_color[op];
+	       break;
+	    case GL_PRIMARY_COLOR:
+	       color_arg[i] = radeon_primary_color[op];
+	       break;
+	    case GL_PREVIOUS:
+	       color_arg[i] = radeon_previous_color[op];
+	       break;
+	    case GL_ZERO:
+	       color_arg[i] = radeon_zero_color[op];
+	       break;
+	    case GL_ONE:
+	       color_arg[i] = radeon_zero_color[op+1];
+	       break;
+	    default:
+	       return GL_FALSE;
+	    }
+	 }
+
+	 for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+	    const GLuint op = texUnit->CombineOperandA[i] - GL_SRC_ALPHA;
+	    assert(op >= 0);
+	    assert(op <= 1);
+	    switch ( texUnit->CombineSourceA[i] ) {
+	    case GL_TEXTURE:
+	       alpha_arg[i] = radeon_texture_alpha[op][unit];
+	       break;
+	    case GL_CONSTANT:
+	       alpha_arg[i] = radeon_tfactor_alpha[op];
+	       break;
+	    case GL_PRIMARY_COLOR:
+	       alpha_arg[i] = radeon_primary_alpha[op];
+	       break;
+	    case GL_PREVIOUS:
+	       alpha_arg[i] = radeon_previous_alpha[op];
+	       break;
+	    case GL_ZERO:
+	       alpha_arg[i] = radeon_zero_alpha[op];
+	       break;
+	    case GL_ONE:
+	       alpha_arg[i] = radeon_zero_alpha[op+1];
+	       break;
+	    default:
+	       return GL_FALSE;
+	    }
+	 }
+
+	 /* Step 2:
+	  * Build up the color and alpha combine functions.
+	  */
+	 switch ( texUnit->CombineModeRGB ) {
+	 case GL_REPLACE:
+	    color_combine = (RADEON_COLOR_ARG_A_ZERO |
+			     RADEON_COLOR_ARG_B_ZERO |
+			     RADEON_BLEND_CTL_ADD |
+			     RADEON_CLAMP_TX);
+	    RADEON_COLOR_ARG( 0, C );
+	    break;
+	 case GL_MODULATE:
+	    color_combine = (RADEON_COLOR_ARG_C_ZERO |
+			     RADEON_BLEND_CTL_ADD |
+			     RADEON_CLAMP_TX);
+	    RADEON_COLOR_ARG( 0, A );
+	    RADEON_COLOR_ARG( 1, B );
+	    break;
+	 case GL_ADD:
+	    color_combine = (RADEON_COLOR_ARG_B_ZERO |
+			     RADEON_COMP_ARG_B |
+			     RADEON_BLEND_CTL_ADD |
+			     RADEON_CLAMP_TX);
+	    RADEON_COLOR_ARG( 0, A );
+	    RADEON_COLOR_ARG( 1, C );
+	    break;
+	 case GL_ADD_SIGNED:
+	    color_combine = (RADEON_COLOR_ARG_B_ZERO |
+			     RADEON_COMP_ARG_B |
+			     RADEON_BLEND_CTL_ADDSIGNED |
+			     RADEON_CLAMP_TX);
+	    RADEON_COLOR_ARG( 0, A );
+	    RADEON_COLOR_ARG( 1, C );
+	    break;
+	 case GL_SUBTRACT:
+	    color_combine = (RADEON_COLOR_ARG_B_ZERO |
+			     RADEON_COMP_ARG_B |
+			     RADEON_BLEND_CTL_SUBTRACT |
+			     RADEON_CLAMP_TX);
+	    RADEON_COLOR_ARG( 0, A );
+	    RADEON_COLOR_ARG( 1, C );
+	    break;
+	 case GL_INTERPOLATE:
+	    color_combine = (RADEON_BLEND_CTL_BLEND |
+			     RADEON_CLAMP_TX);
+	    RADEON_COLOR_ARG( 0, B );
+	    RADEON_COLOR_ARG( 1, A );
+	    RADEON_COLOR_ARG( 2, C );
+	    break;
+
+	 case GL_DOT3_RGB_EXT:
+	 case GL_DOT3_RGBA_EXT:
+	    /* The EXT version of the DOT3 extension does not support the
+	     * scale factor, but the ARB version (and the version in OpenGL
+	     * 1.3) does.
+	     */
+	    RGBshift = 0;
+	    Ashift = 0;
+	    /* FALLTHROUGH */
+
+	 case GL_DOT3_RGB:
+	 case GL_DOT3_RGBA:
+	    /* The R100 / RV200 only support a 1X multiplier in hardware
+	     * w/the ARB version.
+	     */
+	    if ( RGBshift != (RADEON_SCALE_1X >> RADEON_SCALE_SHIFT) ) {
+	       return GL_FALSE;
+	    }
+
+	    RGBshift += 2;
+	    Ashift = RGBshift;
+
+	    color_combine = (RADEON_COLOR_ARG_C_ZERO |
+			     RADEON_BLEND_CTL_DOT3 |
+			     RADEON_CLAMP_TX);
+	    RADEON_COLOR_ARG( 0, A );
+	    RADEON_COLOR_ARG( 1, B );
+	    break;
+
+	 case GL_MODULATE_ADD_ATI:
+	    color_combine = (RADEON_BLEND_CTL_ADD |
+			     RADEON_CLAMP_TX);
+	    RADEON_COLOR_ARG( 0, A );
+	    RADEON_COLOR_ARG( 1, C );
+	    RADEON_COLOR_ARG( 2, B );
+	    break;
+	 case GL_MODULATE_SIGNED_ADD_ATI:
+	    color_combine = (RADEON_BLEND_CTL_ADDSIGNED |
+			     RADEON_CLAMP_TX);
+	    RADEON_COLOR_ARG( 0, A );
+	    RADEON_COLOR_ARG( 1, C );
+	    RADEON_COLOR_ARG( 2, B );
+	    break;
+	 case GL_MODULATE_SUBTRACT_ATI:
+	    color_combine = (RADEON_BLEND_CTL_SUBTRACT |
+			     RADEON_CLAMP_TX);
+	    RADEON_COLOR_ARG( 0, A );
+	    RADEON_COLOR_ARG( 1, C );
+	    RADEON_COLOR_ARG( 2, B );
+	    break;
+	 default:
+	    return GL_FALSE;
+	 }
+
+	 switch ( texUnit->CombineModeA ) {
+	 case GL_REPLACE:
+	    alpha_combine = (RADEON_ALPHA_ARG_A_ZERO |
+			     RADEON_ALPHA_ARG_B_ZERO |
+			     RADEON_BLEND_CTL_ADD |
+			     RADEON_CLAMP_TX);
+	    RADEON_ALPHA_ARG( 0, C );
+	    break;
+	 case GL_MODULATE:
+	    alpha_combine = (RADEON_ALPHA_ARG_C_ZERO |
+			     RADEON_BLEND_CTL_ADD |
+			     RADEON_CLAMP_TX);
+	    RADEON_ALPHA_ARG( 0, A );
+	    RADEON_ALPHA_ARG( 1, B );
+	    break;
+	 case GL_ADD:
+	    alpha_combine = (RADEON_ALPHA_ARG_B_ZERO |
+			     RADEON_COMP_ARG_B |
+			     RADEON_BLEND_CTL_ADD |
+			     RADEON_CLAMP_TX);
+	    RADEON_ALPHA_ARG( 0, A );
+	    RADEON_ALPHA_ARG( 1, C );
+	    break;
+	 case GL_ADD_SIGNED:
+	    alpha_combine = (RADEON_ALPHA_ARG_B_ZERO |
+			     RADEON_COMP_ARG_B |
+			     RADEON_BLEND_CTL_ADDSIGNED |
+			     RADEON_CLAMP_TX);
+	    RADEON_ALPHA_ARG( 0, A );
+	    RADEON_ALPHA_ARG( 1, C );
+	    break;
+	 case GL_SUBTRACT:
+	    alpha_combine = (RADEON_COLOR_ARG_B_ZERO |
+			     RADEON_COMP_ARG_B |
+			     RADEON_BLEND_CTL_SUBTRACT |
+			     RADEON_CLAMP_TX);
+	    RADEON_ALPHA_ARG( 0, A );
+	    RADEON_ALPHA_ARG( 1, C );
+	    break;
+	 case GL_INTERPOLATE:
+	    alpha_combine = (RADEON_BLEND_CTL_BLEND |
+			     RADEON_CLAMP_TX);
+	    RADEON_ALPHA_ARG( 0, B );
+	    RADEON_ALPHA_ARG( 1, A );
+	    RADEON_ALPHA_ARG( 2, C );
+	    break;
+
+	 case GL_MODULATE_ADD_ATI:
+	    alpha_combine = (RADEON_BLEND_CTL_ADD |
+			     RADEON_CLAMP_TX);
+	    RADEON_ALPHA_ARG( 0, A );
+	    RADEON_ALPHA_ARG( 1, C );
+	    RADEON_ALPHA_ARG( 2, B );
+	    break;
+	 case GL_MODULATE_SIGNED_ADD_ATI:
+	    alpha_combine = (RADEON_BLEND_CTL_ADDSIGNED |
+			     RADEON_CLAMP_TX);
+	    RADEON_ALPHA_ARG( 0, A );
+	    RADEON_ALPHA_ARG( 1, C );
+	    RADEON_ALPHA_ARG( 2, B );
+	    break;
+	 case GL_MODULATE_SUBTRACT_ATI:
+	    alpha_combine = (RADEON_BLEND_CTL_SUBTRACT |
+			     RADEON_CLAMP_TX);
+	    RADEON_ALPHA_ARG( 0, A );
+	    RADEON_ALPHA_ARG( 1, C );
+	    RADEON_ALPHA_ARG( 2, B );
+	    break;
+	 default:
+	    return GL_FALSE;
+	 }
+
+	 if ( (texUnit->CombineModeRGB == GL_DOT3_RGB_EXT)
+	      || (texUnit->CombineModeRGB == GL_DOT3_RGB) ) {
+	    alpha_combine |= RADEON_DOT_ALPHA_DONT_REPLICATE;
+	 }
+
+	 /* Step 3:
+	  * Apply the scale factor.
+	  */
+	 color_combine |= (RGBshift << RADEON_SCALE_SHIFT);
+	 alpha_combine |= (Ashift   << RADEON_SCALE_SHIFT);
+
+	 /* All done!
+	  */
+	 break;
+
+      default:
+	 return GL_FALSE;
+      }
+   }
+
+   if ( rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] != color_combine ||
+	rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] != alpha_combine ) {
+      RADEON_STATECHANGE( rmesa, tex[unit] );
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] = color_combine;
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] = alpha_combine;
+   }
+
+   return GL_TRUE;
+}
+
+#define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK |	\
+			      RADEON_MIN_FILTER_MASK | 		\
+			      RADEON_MAG_FILTER_MASK |		\
+			      RADEON_MAX_ANISO_MASK |		\
+			      RADEON_YUV_TO_RGB |		\
+			      RADEON_YUV_TEMPERATURE_MASK |	\
+			      RADEON_CLAMP_S_MASK | 		\
+			      RADEON_CLAMP_T_MASK | 		\
+			      RADEON_BORDER_MODE_D3D )
+
+#define TEXOBJ_TXFORMAT_MASK (RADEON_TXFORMAT_WIDTH_MASK |	\
+			      RADEON_TXFORMAT_HEIGHT_MASK |	\
+			      RADEON_TXFORMAT_FORMAT_MASK |	\
+                              RADEON_TXFORMAT_F5_WIDTH_MASK |	\
+                              RADEON_TXFORMAT_F5_HEIGHT_MASK |	\
+			      RADEON_TXFORMAT_ALPHA_IN_MAP |	\
+			      RADEON_TXFORMAT_CUBIC_MAP_ENABLE |	\
+                              RADEON_TXFORMAT_NON_POWER2)
+
+
+static void import_tex_obj_state( radeonContextPtr rmesa,
+				  int unit,
+				  radeonTexObjPtr texobj )
+{
+   GLuint *cmd = RADEON_DB_STATE( tex[unit] );
+
+   cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
+   cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
+
+   if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+      GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
+      txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
+      txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.txr[unit] );
+   }
+
+   texobj->dirty_state &= ~(1<<unit);
+}
+
+
+
+
+static void set_texgen_matrix( radeonContextPtr rmesa, 
+			       GLuint unit,
+			       const GLfloat *s_plane,
+			       const GLfloat *t_plane )
+{
+   static const GLfloat scale_identity[4] = { 1,1,1,1 };
+
+   if (!TEST_EQ_4V( s_plane, scale_identity) ||
+       !TEST_EQ_4V( t_plane, scale_identity)) {
+      rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE<<unit;
+      rmesa->TexGenMatrix[unit].m[0]  = s_plane[0];
+      rmesa->TexGenMatrix[unit].m[4]  = s_plane[1];
+      rmesa->TexGenMatrix[unit].m[8]  = s_plane[2];
+      rmesa->TexGenMatrix[unit].m[12] = s_plane[3];
+
+      rmesa->TexGenMatrix[unit].m[1]  = t_plane[0];
+      rmesa->TexGenMatrix[unit].m[5]  = t_plane[1];
+      rmesa->TexGenMatrix[unit].m[9]  = t_plane[2];
+      rmesa->TexGenMatrix[unit].m[13] = t_plane[3];
+      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+}
+
+/* Ignoring the Q texcoord for now.
+ *
+ * Returns GL_FALSE if fallback required.  
+ */
+static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
+{  
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+   GLuint tmp = rmesa->TexGenEnabled;
+
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+   rmesa->TexGenNeedNormals[unit] = 0;
+
+   if ((texUnit->TexGenEnabled & (S_BIT|T_BIT)) == 0) {
+      /* Disabled, no fallback:
+       */
+      rmesa->TexGenEnabled |= 
+	 (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+      return GL_TRUE;
+   }
+   else if (texUnit->TexGenEnabled & Q_BIT) {
+      /* Very easy to do this, in fact would remove a fallback case
+       * elsewhere, but I haven't done it yet...  Fallback: 
+       */
+      fprintf(stderr, "fallback Q_BIT\n");
+      return GL_FALSE;
+   }
+   else if ((texUnit->TexGenEnabled & (S_BIT|T_BIT)) != (S_BIT|T_BIT) ||
+	    texUnit->GenModeS != texUnit->GenModeT) {
+      /* Mixed modes, fallback:
+       */
+      /* fprintf(stderr, "fallback mixed texgen\n"); */
+      return GL_FALSE;
+   }
+   else
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
+
+   switch (texUnit->GenModeS) {
+   case GL_OBJECT_LINEAR:
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift;
+      set_texgen_matrix( rmesa, unit, 
+			 texUnit->ObjectPlaneS,
+			 texUnit->ObjectPlaneT);
+      break;
+
+   case GL_EYE_LINEAR:
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift;
+      set_texgen_matrix( rmesa, unit, 
+			 texUnit->EyePlaneS,
+			 texUnit->EyePlaneT);
+      break;
+
+   case GL_REFLECTION_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_REFLECT<<inputshift;
+      break;
+
+   case GL_NORMAL_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_NORMAL<<inputshift;
+      break;
+
+   case GL_SPHERE_MAP:
+   default:
+      /* Unsupported mode, fallback:
+       */
+      /*  fprintf(stderr, "fallback unsupported texgen\n"); */
+      return GL_FALSE;
+   }
+
+   if (tmp != rmesa->TexGenEnabled) {
+      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+
+   return GL_TRUE;
+}
+
+
+static void disable_tex( GLcontext *ctx, int unit )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) {
+      /* Texture unit disabled */
+      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+	 /* The old texture is no longer bound to this texture unit.
+	  * Mark it as such.
+	  */
+
+	 rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
+	 rmesa->state.texture.unit[unit].texobj = NULL;
+      }
+
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= 
+	  ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit);
+
+      RADEON_STATECHANGE( rmesa, tcl );
+      switch (unit) {
+      case 0:
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_TCL_VTX_ST0 |
+						   RADEON_TCL_VTX_Q0);
+	    break;
+      case 1:
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_TCL_VTX_ST1 |
+						   RADEON_TCL_VTX_Q1);
+	 break;
+      default:
+	 break;
+      }
+
+
+      if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
+	 TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+	 rmesa->recheck_texgen[unit] = GL_TRUE;
+      }
+
+
+
+      {
+	 GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+	 GLuint tmp = rmesa->TexGenEnabled;
+
+	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+	 rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+	 rmesa->TexGenNeedNormals[unit] = 0;
+	 rmesa->TexGenEnabled |= 
+	     (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+
+	 if (tmp != rmesa->TexGenEnabled) {
+	    rmesa->recheck_texgen[unit] = GL_TRUE;
+	    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+	 }
+      }
+   }
+}
+
+static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+
+   /* Need to load the 2d images associated with this unit.
+    */
+   if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
+      t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
+      t->base.dirty_images[0] = ~0;
+   }
+
+   ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
+
+   if ( t->base.dirty_images[0] ) {
+      RADEON_FIREVERTICES( rmesa );
+      radeonSetTexImages( rmesa, tObj );
+      radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
+      if ( !t->base.memBlock ) 
+	return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+
+   if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) {
+      t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+      t->base.dirty_images[0] = ~0;
+   }
+
+   ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+
+   if ( t->base.dirty_images[0] ) {
+      RADEON_FIREVERTICES( rmesa );
+      radeonSetTexImages( rmesa, tObj );
+      radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
+      if ( !t->base.memBlock /* && !rmesa->prefer_agp_client_texturing  FIXME */ ) {
+	 fprintf(stderr, "%s: upload failed\n", __FUNCTION__);
+	 return GL_FALSE;
+      }
+   }
+
+   return GL_TRUE;
+}
+
+
+static GLboolean update_tex_common( GLcontext *ctx, int unit )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+   GLenum format;
+
+   /* Fallback if there's a texture border */
+   if ( tObj->Image[tObj->BaseLevel]->Border > 0 ) {
+      fprintf(stderr, "%s: border\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   /* Update state if this is a different texture object to last
+    * time.
+    */
+   if ( rmesa->state.texture.unit[unit].texobj != t ) {
+      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+	 /* The old texture is no longer bound to this texture unit.
+	  * Mark it as such.
+	  */
+
+	 rmesa->state.texture.unit[unit].texobj->base.bound &= 
+	     ~(1UL << unit);
+      }
+
+      rmesa->state.texture.unit[unit].texobj = t;
+      t->base.bound |= (1UL << unit);
+      t->dirty_state |= 1<<unit;
+      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
+   }
+
+
+   /* Newly enabled?
+    */
+   if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
+	  (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+
+      RADEON_STATECHANGE( rmesa, tcl );
+
+      if (unit == 0)
+	  rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_ST0;
+      else 
+	  rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_ST1;
+
+      rmesa->recheck_texgen[unit] = GL_TRUE;
+   }
+
+   if (t->dirty_state & (1<<unit)) {
+      import_tex_obj_state( rmesa, unit, t );
+   }
+
+   if (rmesa->recheck_texgen[unit]) {
+      GLboolean fallback = !radeon_validate_texgen( ctx, unit );
+      TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+      rmesa->recheck_texgen[unit] = 0;
+      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+
+   format = tObj->Image[tObj->BaseLevel]->Format;
+   if ( rmesa->state.texture.unit[unit].format != format ||
+	rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
+      rmesa->state.texture.unit[unit].format = format;
+      rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
+      if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
+	 return GL_FALSE;
+      }
+   }
+
+   FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
+   return !t->border_fallback;
+}
+
+
+
+static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
+{
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_TEXRECT_0 << unit, 0 );
+
+   if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
+      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_TEXRECT_0 << unit, 1 );
+
+      return (enable_tex_rect( ctx, unit ) &&
+	      update_tex_common( ctx, unit ));
+   }
+   else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
+      return (enable_tex_2d( ctx, unit ) &&
+	      update_tex_common( ctx, unit ));
+   }
+   else if ( texUnit->_ReallyEnabled ) {
+      return GL_FALSE;
+   }
+   else {
+      disable_tex( ctx, unit );
+      return GL_TRUE;
+   }
+}
+
+void radeonUpdateTextureState( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLboolean ok;
+
+   ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
+	 radeonUpdateTextureUnit( ctx, 1 ));
+
+   FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
+
+   if (rmesa->TclFallback)
+      radeonChooseVertexState( ctx );
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c b/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c
new file mode 100644
index 0000000000..b613e9eb43
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c
@@ -0,0 +1,1089 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c,v 1.5 2002/12/16 16:18:59 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "glheader.h"
+#include "imports.h"
+#include "api_noop.h"
+#include "api_arrayelt.h"
+#include "context.h"
+#include "mtypes.h"
+#include "enums.h"
+#include "glapi.h"
+#include "colormac.h"
+#include "light.h"
+#include "state.h"
+#include "vtxfmt.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_array_api.h"
+
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#include "radeon_swtcl.h"
+#include "radeon_vtxfmt.h"
+
+static void radeonVtxfmtFlushVertices( GLcontext *, GLuint );
+
+static void count_func( const char *name,  struct dynfn *l )
+{
+   int i = 0;
+   struct dynfn *f;
+   foreach (f, l) i++;
+   if (i) fprintf(stderr, "%s: %d\n", name, i );
+}
+
+static void count_funcs( radeonContextPtr rmesa )
+{
+   count_func( "Vertex2f", &rmesa->vb.dfn_cache.Vertex2f );
+   count_func( "Vertex2fv", &rmesa->vb.dfn_cache.Vertex2fv );
+   count_func( "Vertex3f", &rmesa->vb.dfn_cache.Vertex3f );
+   count_func( "Vertex3fv", &rmesa->vb.dfn_cache.Vertex3fv );
+   count_func( "Color4ub", &rmesa->vb.dfn_cache.Color4ub );
+   count_func( "Color4ubv", &rmesa->vb.dfn_cache.Color4ubv );
+   count_func( "Color3ub", &rmesa->vb.dfn_cache.Color3ub );
+   count_func( "Color3ubv", &rmesa->vb.dfn_cache.Color3ubv );
+   count_func( "Color4f", &rmesa->vb.dfn_cache.Color4f );
+   count_func( "Color4fv", &rmesa->vb.dfn_cache.Color4fv );
+   count_func( "Color3f", &rmesa->vb.dfn_cache.Color3f );
+   count_func( "Color3fv", &rmesa->vb.dfn_cache.Color3fv );
+   count_func( "SecondaryColor3f", &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   count_func( "SecondaryColor3fv", &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   count_func( "SecondaryColor3ub", &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   count_func( "SecondaryColor3ubv", &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   count_func( "Normal3f", &rmesa->vb.dfn_cache.Normal3f );
+   count_func( "Normal3fv", &rmesa->vb.dfn_cache.Normal3fv );
+   count_func( "TexCoord2f", &rmesa->vb.dfn_cache.TexCoord2f );
+   count_func( "TexCoord2fv", &rmesa->vb.dfn_cache.TexCoord2fv );
+   count_func( "TexCoord1f", &rmesa->vb.dfn_cache.TexCoord1f );
+   count_func( "TexCoord1fv", &rmesa->vb.dfn_cache.TexCoord1fv );
+   count_func( "MultiTexCoord2fARB", &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   count_func( "MultiTexCoord2fvARB", &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   count_func( "MultiTexCoord1fARB", &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   count_func( "MultiTexCoord1fvARB", &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
+}
+
+
+void radeon_copy_to_current( GLcontext *ctx ) 
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   assert(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT);
+
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_N0) {
+      ctx->Current.Attrib[VERT_ATTRIB_NORMAL][0] = rmesa->vb.normalptr[0];
+      ctx->Current.Attrib[VERT_ATTRIB_NORMAL][1] = rmesa->vb.normalptr[1];
+      ctx->Current.Attrib[VERT_ATTRIB_NORMAL][2] = rmesa->vb.normalptr[2];
+   }
+
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_PKCOLOR) {
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0] = UBYTE_TO_FLOAT( rmesa->vb.colorptr->red );
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1] = UBYTE_TO_FLOAT( rmesa->vb.colorptr->green );
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2] = UBYTE_TO_FLOAT( rmesa->vb.colorptr->blue );
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = UBYTE_TO_FLOAT( rmesa->vb.colorptr->alpha );
+   } 
+   
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_FPCOLOR) {
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0] = rmesa->vb.floatcolorptr[0];
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1] = rmesa->vb.floatcolorptr[1];
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2] = rmesa->vb.floatcolorptr[2];
+   }
+
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_FPALPHA)
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = rmesa->vb.floatcolorptr[3];
+      
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_PKSPEC) {
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR1][0] = UBYTE_TO_FLOAT( rmesa->vb.specptr->red );
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR1][1] = UBYTE_TO_FLOAT( rmesa->vb.specptr->green );
+      ctx->Current.Attrib[VERT_ATTRIB_COLOR1][2] = UBYTE_TO_FLOAT( rmesa->vb.specptr->blue );
+   } 
+
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_ST0) {
+      ctx->Current.Attrib[VERT_ATTRIB_TEX0][0] = rmesa->vb.texcoordptr[0][0];
+      ctx->Current.Attrib[VERT_ATTRIB_TEX0][1] = rmesa->vb.texcoordptr[0][1];
+      ctx->Current.Attrib[VERT_ATTRIB_TEX0][2] = 0.0F;
+      ctx->Current.Attrib[VERT_ATTRIB_TEX0][3] = 1.0F;
+   }
+
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_ST1) {
+      ctx->Current.Attrib[VERT_ATTRIB_TEX1][0] = rmesa->vb.texcoordptr[1][0];
+      ctx->Current.Attrib[VERT_ATTRIB_TEX1][1] = rmesa->vb.texcoordptr[1][1];
+      ctx->Current.Attrib[VERT_ATTRIB_TEX1][2] = 0.0F;
+      ctx->Current.Attrib[VERT_ATTRIB_TEX1][3] = 1.0F;
+   }
+
+   ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
+}
+
+static GLboolean discreet_gl_prim[GL_POLYGON+1] = {
+   1,				/* 0 points */
+   1,				/* 1 lines */
+   0,				/* 2 line_strip */
+   0,				/* 3 line_loop */
+   1,				/* 4 tris */
+   0,				/* 5 tri_fan */
+   0,				/* 6 tri_strip */
+   1,				/* 7 quads */
+   0,				/* 8 quadstrip */
+   0,				/* 9 poly */
+};
+
+static void flush_prims( radeonContextPtr rmesa )
+{
+   int i,j;
+   struct radeon_dma_region tmp = rmesa->dma.current;
+   
+   tmp.buf->refcount++;
+   tmp.aos_size = rmesa->vb.vertex_size;
+   tmp.aos_stride = rmesa->vb.vertex_size;
+   tmp.aos_start = GET_START(&tmp);
+
+   rmesa->dma.current.ptr = rmesa->dma.current.start += 
+      (rmesa->vb.initial_counter - rmesa->vb.counter) * rmesa->vb.vertex_size * 4; 
+
+   rmesa->tcl.vertex_format = rmesa->vb.vertex_format;
+   rmesa->tcl.aos_components[0] = &tmp;
+   rmesa->tcl.nr_aos_components = 1;
+   rmesa->dma.flush = 0;
+
+   /* Optimize the primitive list:
+    */
+   if (rmesa->vb.nrprims > 1) {
+      for (j = 0, i = 1 ; i < rmesa->vb.nrprims; i++) {
+	 int pj = rmesa->vb.primlist[j].prim & 0xf;
+	 int pi = rmesa->vb.primlist[i].prim & 0xf;
+      
+	 if (pj == pi && discreet_gl_prim[pj] &&
+	     rmesa->vb.primlist[i].start == rmesa->vb.primlist[j].end) {
+	    rmesa->vb.primlist[j].end = rmesa->vb.primlist[i].end;
+	 }
+	 else {
+	    j++;
+	    if (j != i) rmesa->vb.primlist[j] = rmesa->vb.primlist[i];
+	 }
+      }
+      rmesa->vb.nrprims = j+1;
+   }
+
+   for (i = 0 ; i < rmesa->vb.nrprims; i++) {
+      if (RADEON_DEBUG & DEBUG_PRIMS)
+	 fprintf(stderr, "vtxfmt prim %d: %s %d..%d\n", i,
+		 _mesa_lookup_enum_by_nr( rmesa->vb.primlist[i].prim & 
+					  PRIM_MODE_MASK ),
+		 rmesa->vb.primlist[i].start,
+		 rmesa->vb.primlist[i].end);
+
+      radeonEmitPrimitive( rmesa->glCtx,
+			   rmesa->vb.primlist[i].start,
+			   rmesa->vb.primlist[i].end,
+			   rmesa->vb.primlist[i].prim );
+   }
+
+   rmesa->vb.nrprims = 0;
+   radeonReleaseDmaRegion( rmesa, &tmp, __FUNCTION__ );
+}
+
+
+static void start_prim( radeonContextPtr rmesa, GLuint mode )
+{
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, rmesa->vb.initial_counter - rmesa->vb.counter);
+
+   rmesa->vb.primlist[rmesa->vb.nrprims].start = rmesa->vb.initial_counter - rmesa->vb.counter;
+   rmesa->vb.primlist[rmesa->vb.nrprims].prim = mode;
+}
+
+static void note_last_prim( radeonContextPtr rmesa, GLuint flags )
+{
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, rmesa->vb.initial_counter - rmesa->vb.counter);
+
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      rmesa->vb.primlist[rmesa->vb.nrprims].prim |= flags;
+      rmesa->vb.primlist[rmesa->vb.nrprims].end = rmesa->vb.initial_counter - rmesa->vb.counter;
+
+      if (++(rmesa->vb.nrprims) == RADEON_MAX_PRIMS)
+	 flush_prims( rmesa );
+   }
+}
+
+
+static void copy_vertex( radeonContextPtr rmesa, GLuint n, GLfloat *dst )
+{
+   GLuint i;
+   GLfloat *src = (GLfloat *)(rmesa->dma.current.address + 
+			      rmesa->dma.current.ptr + 
+			      (rmesa->vb.primlist[rmesa->vb.nrprims].start + n) * 
+			      rmesa->vb.vertex_size * 4);
+
+   if (RADEON_DEBUG & DEBUG_VFMT) 
+      fprintf(stderr, "copy_vertex %d\n", rmesa->vb.primlist[rmesa->vb.nrprims].start + n);
+
+   for (i = 0 ; i < rmesa->vb.vertex_size; i++) {
+      dst[i] = src[i];
+   }
+}
+
+/* NOTE: This actually reads the copied vertices back from uncached
+ * memory.  Could also use the counter/notify mechanism to populate
+ * tmp on the fly as vertices are generated.  
+ */
+static GLuint copy_dma_verts( radeonContextPtr rmesa, GLfloat (*tmp)[15] )
+{
+   GLuint ovf, i;
+   GLuint nr = (rmesa->vb.initial_counter - rmesa->vb.counter) - rmesa->vb.primlist[rmesa->vb.nrprims].start;
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s %d verts\n", __FUNCTION__, nr);
+
+   switch( rmesa->vb.prim[0] )
+   {
+   case GL_POINTS:
+      return 0;
+   case GL_LINES:
+      ovf = nr&1;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_TRIANGLES:
+      ovf = nr%3;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_QUADS:
+      ovf = nr&3;
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_LINE_STRIP:
+      if (nr == 0) 
+	 return 0;
+      copy_vertex( rmesa, nr-1, tmp[0] );
+      return 1;
+   case GL_LINE_LOOP:
+   case GL_TRIANGLE_FAN:
+   case GL_POLYGON:
+      if (nr == 0) 
+	 return 0;
+      else if (nr == 1) {
+	 copy_vertex( rmesa, 0, tmp[0] );
+	 return 1;
+      } else {
+	 copy_vertex( rmesa, 0, tmp[0] );
+	 copy_vertex( rmesa, nr-1, tmp[1] );
+	 return 2;
+      }
+   case GL_TRIANGLE_STRIP:
+      ovf = MIN2(nr, 2);
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   case GL_QUAD_STRIP:
+      switch (nr) {
+      case 0: ovf = 0; break;
+      case 1: ovf = 1; break;
+      default: ovf = 2 + (nr&1); break;
+      }
+      for (i = 0 ; i < ovf ; i++)
+	 copy_vertex( rmesa, nr-ovf+i, tmp[i] );
+      return i;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+static void VFMT_FALLBACK_OUTSIDE_BEGIN_END( const char *caller )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+
+   if (ctx->Driver.NeedFlush) 
+      radeonVtxfmtFlushVertices( ctx, ctx->Driver.NeedFlush );
+
+   if (ctx->NewState)
+      _mesa_update_state( ctx ); /* clear state so fell_back sticks */
+
+   _tnl_wakeup_exec( ctx );
+   ctx->Driver.FlushVertices = radeonFlushVertices;
+
+   assert( rmesa->dma.flush == 0 );
+   rmesa->vb.fell_back = GL_TRUE;
+   rmesa->vb.installed = GL_FALSE;
+}
+
+
+static void VFMT_FALLBACK( const char *caller )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat tmp[3][15];
+   GLuint i, prim;
+   GLuint ind = rmesa->vb.vertex_format;
+   GLuint nrverts;
+   GLfloat alpha = 1.0;
+
+   if (RADEON_DEBUG & (DEBUG_FALLBACKS|DEBUG_VFMT))
+      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+
+   if (rmesa->vb.prim[0] == GL_POLYGON+1) {
+      VFMT_FALLBACK_OUTSIDE_BEGIN_END( __FUNCTION__ );
+      return;
+   }
+
+   /* Copy vertices out of dma:
+    */
+   nrverts = copy_dma_verts( rmesa, tmp );
+
+   /* Finish the prim at this point:
+    */
+   note_last_prim( rmesa, 0 );
+   flush_prims( rmesa );
+
+   /* Update ctx->Driver.CurrentExecPrimitive and swap in swtnl. 
+    */
+   prim = rmesa->vb.prim[0];
+   ctx->Driver.CurrentExecPrimitive = GL_POLYGON+1;
+   _tnl_wakeup_exec( ctx );
+   ctx->Driver.FlushVertices = radeonFlushVertices;
+
+   assert(rmesa->dma.flush == 0);
+   rmesa->vb.fell_back = GL_TRUE;
+   rmesa->vb.installed = GL_FALSE;
+   glBegin( prim );
+   
+   if (rmesa->vb.installed_color_3f_sz == 4)
+      alpha = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3];
+
+   /* Replay saved vertices
+    */
+   for (i = 0 ; i < nrverts; i++) {
+      GLuint offset = 3;
+      if (ind & RADEON_CP_VC_FRMT_N0) {
+	 glNormal3fv( &tmp[i][offset] ); 
+	 offset += 3;
+      }
+
+      if (ind & RADEON_CP_VC_FRMT_PKCOLOR) {
+	 radeon_color_t *col = (radeon_color_t *)&tmp[i][offset];
+	 glColor4ub( col->red, col->green, col->blue, col->alpha );
+	 offset++;
+      }
+      else if (ind & RADEON_CP_VC_FRMT_FPALPHA) {
+	 glColor4fv( &tmp[i][offset] ); 
+	 offset+=4;
+      } 
+      else if (ind & RADEON_CP_VC_FRMT_FPCOLOR) {
+	 glColor3fv( &tmp[i][offset] ); 
+	 offset+=3;
+      }
+
+      if (ind & RADEON_CP_VC_FRMT_PKSPEC) {
+	 radeon_color_t *spec = (radeon_color_t *)&tmp[i][offset];
+	 _glapi_Dispatch->SecondaryColor3ubEXT( spec->red, spec->green, spec->blue );
+	 offset++;
+      }
+
+      if (ind & RADEON_CP_VC_FRMT_ST0) {
+	 glTexCoord2fv( &tmp[i][offset] ); 
+	 offset += 2;
+      }
+
+      if (ind & RADEON_CP_VC_FRMT_ST1) {
+	 glMultiTexCoord2fvARB( GL_TEXTURE1_ARB, &tmp[i][offset] );
+	 offset += 2;
+      }
+      glVertex3fv( &tmp[i][0] );
+   }
+
+   /* Replay current vertex
+    */
+   if (ind & RADEON_CP_VC_FRMT_N0) 
+      glNormal3fv( rmesa->vb.normalptr );
+
+   if (ind & RADEON_CP_VC_FRMT_PKCOLOR)
+      glColor4ub( rmesa->vb.colorptr->red, rmesa->vb.colorptr->green, rmesa->vb.colorptr->blue, rmesa->vb.colorptr->alpha );
+   else if (ind & RADEON_CP_VC_FRMT_FPALPHA)
+      glColor4fv( rmesa->vb.floatcolorptr );
+   else if (ind & RADEON_CP_VC_FRMT_FPCOLOR) {
+      if (rmesa->vb.installed_color_3f_sz == 4 && alpha != 1.0)
+	 glColor4f( rmesa->vb.floatcolorptr[0],
+		    rmesa->vb.floatcolorptr[1],
+		    rmesa->vb.floatcolorptr[2],
+		    alpha );
+      else
+	 glColor3fv( rmesa->vb.floatcolorptr );
+   }
+
+   if (ind & RADEON_CP_VC_FRMT_PKSPEC) 
+      _glapi_Dispatch->SecondaryColor3ubEXT( rmesa->vb.specptr->red, rmesa->vb.specptr->green, rmesa->vb.specptr->blue ); 
+
+   if (ind & RADEON_CP_VC_FRMT_ST0) 
+      glTexCoord2fv( rmesa->vb.texcoordptr[0] );
+
+   if (ind & RADEON_CP_VC_FRMT_ST1) 
+      glMultiTexCoord2fvARB( GL_TEXTURE1_ARB, rmesa->vb.texcoordptr[1] );
+}
+
+
+
+static void wrap_buffer( void )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat tmp[3][15];
+   GLuint i, nrverts;
+
+   if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_PRIMS))
+      fprintf(stderr, "%s %d\n", __FUNCTION__, rmesa->vb.initial_counter - rmesa->vb.counter);
+
+   /* Don't deal with parity.
+    */
+   if ((((rmesa->vb.initial_counter - rmesa->vb.counter) -  
+	 rmesa->vb.primlist[rmesa->vb.nrprims].start) & 1)) {
+      rmesa->vb.counter++;
+      rmesa->vb.initial_counter++;
+      return;
+   }
+
+   /* Copy vertices out of dma:
+    */
+   if (rmesa->vb.prim[0] == GL_POLYGON+1) 
+      nrverts = 0;
+   else {
+      nrverts = copy_dma_verts( rmesa, tmp );
+
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%d vertices to copy\n", nrverts);
+   
+      /* Finish the prim at this point:
+       */
+      note_last_prim( rmesa, 0 );
+   }
+
+   /* Fire any buffered primitives
+    */
+   flush_prims( rmesa );
+
+   /* Get new buffer
+    */
+   radeonRefillCurrentDmaRegion( rmesa );
+
+   /* Reset counter, dmaptr
+    */
+   rmesa->vb.dmaptr = (int *)(rmesa->dma.current.ptr + rmesa->dma.current.address);
+   rmesa->vb.counter = (rmesa->dma.current.end - rmesa->dma.current.ptr) / 
+      (rmesa->vb.vertex_size * 4);
+   rmesa->vb.counter--;
+   rmesa->vb.initial_counter = rmesa->vb.counter;
+   rmesa->vb.notify = wrap_buffer;
+
+   rmesa->dma.flush = flush_prims;
+
+   /* Restart wrapped primitive:
+    */
+   if (rmesa->vb.prim[0] != GL_POLYGON+1)
+      start_prim( rmesa, rmesa->vb.prim[0] );
+
+   /* Reemit saved vertices
+    */
+   for (i = 0 ; i < nrverts; i++) {
+      if (RADEON_DEBUG & DEBUG_VERTS) {
+	 int j;
+	 fprintf(stderr, "re-emit vertex %d to %p\n", i, rmesa->vb.dmaptr);
+	 if (RADEON_DEBUG & DEBUG_VERBOSE)
+	    for (j = 0 ; j < rmesa->vb.vertex_size; j++) 
+	       fprintf(stderr, "\t%08x/%f\n", *(int*)&tmp[i][j], tmp[i][j]);
+      }
+
+      memcpy( rmesa->vb.dmaptr, tmp[i], rmesa->vb.vertex_size * 4 );
+      rmesa->vb.dmaptr += rmesa->vb.vertex_size;
+      rmesa->vb.counter--;
+   }
+}
+
+
+
+static GLboolean check_vtx_fmt( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint ind = RADEON_CP_VC_FRMT_Z;
+
+   if (rmesa->TclFallback || rmesa->vb.fell_back || ctx->CompileFlag)
+      return GL_FALSE;
+
+   if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) 
+      ctx->Driver.FlushVertices( ctx, FLUSH_UPDATE_CURRENT );
+   
+   /* Make all this event-driven:
+    */
+   if (ctx->Light.Enabled) {
+      ind |= RADEON_CP_VC_FRMT_N0;
+
+      /* TODO: make this data driven: If we receive only ubytes, send
+       * color as ubytes.  Also check if converting (with free
+       * checking for overflow) is cheaper than sending floats
+       * directly.
+       */
+      if (ctx->Light.ColorMaterialEnabled) {
+	 ind |= (RADEON_CP_VC_FRMT_FPCOLOR |
+		 RADEON_CP_VC_FRMT_FPALPHA);
+      }
+      else
+	 ind |= RADEON_CP_VC_FRMT_PKCOLOR; /* for alpha? */
+   }
+   else {
+      /* TODO: make this data driven?
+       */
+      ind |= RADEON_CP_VC_FRMT_PKCOLOR;
+	 
+      if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+	 ind |= RADEON_CP_VC_FRMT_PKSPEC;
+      }
+   }
+
+   if (ctx->Texture.Unit[0]._ReallyEnabled) {
+      if (ctx->Texture.Unit[0].TexGenEnabled) {
+	 if (rmesa->TexGenNeedNormals[0]) {
+	    ind |= RADEON_CP_VC_FRMT_N0;
+	 }
+      } else {
+	 if (ctx->Current.Attrib[VERT_ATTRIB_TEX0][2] != 0.0F ||
+	     ctx->Current.Attrib[VERT_ATTRIB_TEX0][3] != 1.0) {
+	    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+	       fprintf(stderr, "%s: rq0\n", __FUNCTION__);
+	    return GL_FALSE;
+	 }
+	 ind |= RADEON_CP_VC_FRMT_ST0;
+      }
+   }
+
+   if (ctx->Texture.Unit[1]._ReallyEnabled) {
+      if (ctx->Texture.Unit[1].TexGenEnabled) {
+	 if (rmesa->TexGenNeedNormals[1]) {
+	    ind |= RADEON_CP_VC_FRMT_N0;
+	 }
+      } else {
+	 if (ctx->Current.Attrib[VERT_ATTRIB_TEX1][2] != 0.0F ||
+	     ctx->Current.Attrib[VERT_ATTRIB_TEX1][3] != 1.0) {
+	    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+	       fprintf(stderr, "%s: rq1\n", __FUNCTION__);
+	    return GL_FALSE;
+	 }
+	 ind |= RADEON_CP_VC_FRMT_ST1;
+      }
+   }
+
+   if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_STATE))
+      fprintf(stderr, "%s: format: 0x%x\n", __FUNCTION__, ind );
+
+   RADEON_NEWPRIM(rmesa);
+   rmesa->vb.vertex_format = ind;
+   rmesa->vb.vertex_size = 3;
+   rmesa->vb.prim = &ctx->Driver.CurrentExecPrimitive;
+
+   rmesa->vb.normalptr = ctx->Current.Attrib[VERT_ATTRIB_NORMAL];
+   rmesa->vb.colorptr = NULL;
+   rmesa->vb.floatcolorptr = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
+   rmesa->vb.specptr = NULL;
+   rmesa->vb.floatspecptr = ctx->Current.Attrib[VERT_ATTRIB_COLOR1];
+   rmesa->vb.texcoordptr[0] = ctx->Current.Attrib[VERT_ATTRIB_TEX0];
+   rmesa->vb.texcoordptr[1] = ctx->Current.Attrib[VERT_ATTRIB_TEX1];
+
+   /* Run through and initialize the vertex components in the order
+    * the hardware understands:
+    */
+   if (ind & RADEON_CP_VC_FRMT_N0) {
+      rmesa->vb.normalptr = &rmesa->vb.vertex[rmesa->vb.vertex_size].f;
+      rmesa->vb.vertex_size += 3;
+      rmesa->vb.normalptr[0] = ctx->Current.Attrib[VERT_ATTRIB_NORMAL][0];
+      rmesa->vb.normalptr[1] = ctx->Current.Attrib[VERT_ATTRIB_NORMAL][1];
+      rmesa->vb.normalptr[2] = ctx->Current.Attrib[VERT_ATTRIB_NORMAL][2];
+   }
+
+   if (ind & RADEON_CP_VC_FRMT_PKCOLOR) {
+      rmesa->vb.colorptr = &rmesa->vb.vertex[rmesa->vb.vertex_size].color;
+      rmesa->vb.vertex_size += 1;
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.colorptr->red,   ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0] );
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.colorptr->green, ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1] );
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.colorptr->blue,  ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2] );
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.colorptr->alpha, ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] );
+   }
+
+   if (ind & RADEON_CP_VC_FRMT_FPCOLOR) {
+      assert(!(ind & RADEON_CP_VC_FRMT_PKCOLOR));
+      rmesa->vb.floatcolorptr = &rmesa->vb.vertex[rmesa->vb.vertex_size].f;
+      rmesa->vb.vertex_size += 3;
+      rmesa->vb.floatcolorptr[0] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][0];
+      rmesa->vb.floatcolorptr[1] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][1];
+      rmesa->vb.floatcolorptr[2] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][2];
+
+      if (ind & RADEON_CP_VC_FRMT_FPALPHA) {
+	 rmesa->vb.vertex_size += 1;
+	 rmesa->vb.floatcolorptr[3] = ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3];
+      }
+   }
+   
+   if (ind & RADEON_CP_VC_FRMT_PKSPEC) {
+      rmesa->vb.specptr = &rmesa->vb.vertex[rmesa->vb.vertex_size].color;
+      rmesa->vb.vertex_size += 1;
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.specptr->red,   ctx->Current.Attrib[VERT_ATTRIB_COLOR1][0] );
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.specptr->green, ctx->Current.Attrib[VERT_ATTRIB_COLOR1][1] );
+      UNCLAMPED_FLOAT_TO_CHAN( rmesa->vb.specptr->blue,  ctx->Current.Attrib[VERT_ATTRIB_COLOR1][2] );
+   }
+
+   if (ind & RADEON_CP_VC_FRMT_ST0) {
+      rmesa->vb.texcoordptr[0] = &rmesa->vb.vertex[rmesa->vb.vertex_size].f;
+      rmesa->vb.vertex_size += 2;
+      rmesa->vb.texcoordptr[0][0] = ctx->Current.Attrib[VERT_ATTRIB_TEX0][0];
+      rmesa->vb.texcoordptr[0][1] = ctx->Current.Attrib[VERT_ATTRIB_TEX0][1];   
+   } 
+
+   if (ind & RADEON_CP_VC_FRMT_ST1) {
+      rmesa->vb.texcoordptr[1] = &rmesa->vb.vertex[rmesa->vb.vertex_size].f;
+      rmesa->vb.vertex_size += 2;
+      rmesa->vb.texcoordptr[1][0] = ctx->Current.Attrib[VERT_ATTRIB_TEX1][0];
+      rmesa->vb.texcoordptr[1][1] = ctx->Current.Attrib[VERT_ATTRIB_TEX1][1];
+   } 
+
+   if (rmesa->vb.installed_vertex_format != rmesa->vb.vertex_format) {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "reinstall on vertex_format change\n");
+      _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+      rmesa->vb.installed_vertex_format = rmesa->vb.vertex_format;
+   }
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s -- success\n", __FUNCTION__);
+   
+   return GL_TRUE;
+}
+
+void radeonVtxfmtInvalidate( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   rmesa->vb.recheck = GL_TRUE;
+   rmesa->vb.fell_back = GL_FALSE;
+}
+
+
+static void radeonNewList( GLcontext *ctx, GLuint list, GLenum mode )
+{
+   VFMT_FALLBACK_OUTSIDE_BEGIN_END( __FUNCTION__ );
+}
+
+
+static void radeonVtxfmtValidate( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (ctx->Driver.NeedFlush)
+      ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush );
+
+   rmesa->vb.recheck = GL_FALSE;
+
+   if (check_vtx_fmt( ctx )) {
+      if (!rmesa->vb.installed) {
+	 if (RADEON_DEBUG & DEBUG_VFMT)
+	    fprintf(stderr, "reinstall (new install)\n");
+
+	 _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+	 ctx->Driver.FlushVertices = radeonVtxfmtFlushVertices;
+	 ctx->Driver.NewList = radeonNewList;
+	 rmesa->vb.installed = GL_TRUE;
+      }
+      else if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s: already installed", __FUNCTION__);
+   } 
+   else {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s: failed\n", __FUNCTION__);
+
+      if (rmesa->vb.installed) {
+	 if (rmesa->dma.flush)
+	    rmesa->dma.flush( rmesa );
+	 _tnl_wakeup_exec( ctx );
+	 ctx->Driver.FlushVertices = radeonFlushVertices;
+	 rmesa->vb.installed = GL_FALSE;
+      }
+   }      
+}
+
+
+
+/* Materials:
+ */
+static void radeon_Materialfv( GLenum face, GLenum pname, 
+			       const GLfloat *params )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      VFMT_FALLBACK( __FUNCTION__ );
+      glMaterialfv( face, pname, params );
+      return;
+   }
+   _mesa_noop_Materialfv( face, pname, params );
+   radeonUpdateMaterial( ctx );
+}
+
+
+/* Begin/End
+ */
+static void radeon_Begin( GLenum mode )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s( %s )\n", __FUNCTION__,
+	      _mesa_lookup_enum_by_nr( mode ));
+
+   if (mode > GL_POLYGON) {
+      _mesa_error( ctx, GL_INVALID_ENUM, "glBegin" );
+      return;
+   }
+
+   if (rmesa->vb.prim[0] != GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
+      return;
+   }
+   
+   if (ctx->NewState) 
+      _mesa_update_state( ctx );
+
+   if (rmesa->NewGLState)
+      radeonValidateState( ctx );
+
+   if (rmesa->vb.recheck) 
+      radeonVtxfmtValidate( ctx );
+
+   if (!rmesa->vb.installed) {
+      glBegin( mode );
+      return;
+   }
+
+
+   if (rmesa->dma.flush && rmesa->vb.counter < 12) {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s: flush almost-empty buffers\n", __FUNCTION__);
+      flush_prims( rmesa );
+   }
+
+   /* Need to arrange to save vertices here?  Or always copy from dma (yuk)?
+    */
+   if (!rmesa->dma.flush) {
+      if (rmesa->dma.current.ptr + 12*rmesa->vb.vertex_size*4 > 
+	  rmesa->dma.current.end) {
+	 RADEON_NEWPRIM( rmesa );
+	 radeonRefillCurrentDmaRegion( rmesa );
+      }
+
+      rmesa->vb.dmaptr = (int *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
+      rmesa->vb.counter = (rmesa->dma.current.end - rmesa->dma.current.ptr) / 
+	 (rmesa->vb.vertex_size * 4);
+      rmesa->vb.counter--;
+      rmesa->vb.initial_counter = rmesa->vb.counter;
+      rmesa->vb.notify = wrap_buffer;
+      rmesa->dma.flush = flush_prims;
+      ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+   }
+   
+   
+   rmesa->vb.prim[0] = mode;
+   start_prim( rmesa, mode | PRIM_BEGIN );
+}
+
+
+
+static void radeon_End( void )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (rmesa->vb.prim[0] == GL_POLYGON+1) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" );
+      return;
+   }
+	  
+   note_last_prim( rmesa, PRIM_END );
+   rmesa->vb.prim[0] = GL_POLYGON+1;
+}
+
+
+/* Fallback on difficult entrypoints:
+ */
+#define PRE_LOOPBACK( FUNC )			\
+do {						\
+   if (RADEON_DEBUG & DEBUG_VFMT) 		\
+      fprintf(stderr, "%s\n", __FUNCTION__);	\
+   VFMT_FALLBACK( __FUNCTION__ );		\
+} while (0)
+#define TAG(x) radeon_fallback_##x
+#include "vtxfmt_tmp.h"
+
+
+
+static GLboolean radeonNotifyBegin( GLcontext *ctx, GLenum p )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(!rmesa->vb.installed);
+
+   if (ctx->NewState) 
+      _mesa_update_state( ctx );
+
+   if (rmesa->NewGLState)
+      radeonValidateState( ctx );
+
+   if (ctx->Driver.NeedFlush)
+      ctx->Driver.FlushVertices( ctx, ctx->Driver.NeedFlush );
+
+   if (rmesa->vb.recheck) 
+      radeonVtxfmtValidate( ctx );
+
+   if (!rmesa->vb.installed) {
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "%s -- failed\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   radeon_Begin( p );
+   return GL_TRUE;
+}
+
+static void radeonVtxfmtFlushVertices( GLcontext *ctx, GLuint flags )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   if (RADEON_DEBUG & DEBUG_VFMT)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(rmesa->vb.installed);
+
+   if (flags & FLUSH_UPDATE_CURRENT) {
+      radeon_copy_to_current( ctx );
+      if (RADEON_DEBUG & DEBUG_VFMT)
+	 fprintf(stderr, "reinstall on update_current\n");
+      _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );
+      ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
+   }
+
+   if (flags & FLUSH_STORED_VERTICES) {
+      radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+      assert (rmesa->dma.flush == 0 ||
+	      rmesa->dma.flush == flush_prims);
+      if (rmesa->dma.flush == flush_prims)
+	 flush_prims( RADEON_CONTEXT( ctx ) );
+      ctx->Driver.NeedFlush &= ~FLUSH_STORED_VERTICES;
+   }
+}
+
+
+
+/* At this point, don't expect very many versions of each function to
+ * be generated, so not concerned about freeing them?
+ */
+
+
+void radeonVtxfmtInit( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   GLvertexformat *vfmt = &(rmesa->vb.vtxfmt);
+
+   MEMSET( vfmt, 0, sizeof(GLvertexformat) );
+
+   /* Hook in chooser functions for codegen, etc:
+    */
+   radeonVtxfmtInitChoosers( vfmt );
+
+   /* Handled fully in supported states, but no codegen:
+    */
+   vfmt->Materialfv = radeon_Materialfv;
+   vfmt->ArrayElement = _ae_loopback_array_elt;	        /* generic helper */
+   vfmt->Rectf = _mesa_noop_Rectf;			/* generic helper */
+   vfmt->Begin = radeon_Begin;
+   vfmt->End = radeon_End;
+
+   /* Fallback for performance reasons:  (Fix with cva/elt path here and
+    * dmatmp2.h style primitive-merging)
+    *
+    * These should call NotifyBegin(), as should _tnl_EvalMesh, to allow
+    * a driver-hook.
+    */
+   vfmt->DrawArrays = radeon_fallback_DrawArrays;
+   vfmt->DrawElements = radeon_fallback_DrawElements;
+   vfmt->DrawRangeElements = radeon_fallback_DrawRangeElements; 
+
+
+   /* Not active in supported states; just keep ctx->Current uptodate:
+    */
+   vfmt->FogCoordfvEXT = _mesa_noop_FogCoordfvEXT;
+   vfmt->FogCoordfEXT = _mesa_noop_FogCoordfEXT;
+   vfmt->EdgeFlag = _mesa_noop_EdgeFlag;
+   vfmt->EdgeFlagv = _mesa_noop_EdgeFlagv;
+   vfmt->Indexi = _mesa_noop_Indexi;
+   vfmt->Indexiv = _mesa_noop_Indexiv;
+
+
+   /* Active but unsupported -- fallback if we receive these:
+    */
+   vfmt->CallList = radeon_fallback_CallList;
+   vfmt->EvalCoord1f = radeon_fallback_EvalCoord1f;
+   vfmt->EvalCoord1fv = radeon_fallback_EvalCoord1fv;
+   vfmt->EvalCoord2f = radeon_fallback_EvalCoord2f;
+   vfmt->EvalCoord2fv = radeon_fallback_EvalCoord2fv;
+   vfmt->EvalMesh1 = radeon_fallback_EvalMesh1;
+   vfmt->EvalMesh2 = radeon_fallback_EvalMesh2;
+   vfmt->EvalPoint1 = radeon_fallback_EvalPoint1;
+   vfmt->EvalPoint2 = radeon_fallback_EvalPoint2;
+   vfmt->TexCoord3f = radeon_fallback_TexCoord3f;
+   vfmt->TexCoord3fv = radeon_fallback_TexCoord3fv;
+   vfmt->TexCoord4f = radeon_fallback_TexCoord4f;
+   vfmt->TexCoord4fv = radeon_fallback_TexCoord4fv;
+   vfmt->MultiTexCoord3fARB = radeon_fallback_MultiTexCoord3fARB;
+   vfmt->MultiTexCoord3fvARB = radeon_fallback_MultiTexCoord3fvARB;
+   vfmt->MultiTexCoord4fARB = radeon_fallback_MultiTexCoord4fARB;
+   vfmt->MultiTexCoord4fvARB = radeon_fallback_MultiTexCoord4fvARB;
+   vfmt->Vertex4f = radeon_fallback_Vertex4f;
+   vfmt->Vertex4fv = radeon_fallback_Vertex4fv;
+
+   (void)radeon_fallback_vtxfmt;
+
+   TNL_CONTEXT(ctx)->Driver.NotifyBegin = radeonNotifyBegin;
+
+   rmesa->vb.enabled = 1;
+   rmesa->vb.prim = &ctx->Driver.CurrentExecPrimitive;
+   rmesa->vb.primflags = 0;
+
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex2f );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex2fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Vertex3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4ub );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4ubv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3ub );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3ubv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4f );
+   make_empty_list( &rmesa->vb.dfn_cache.Color4fv );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Color3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   make_empty_list( &rmesa->vb.dfn_cache.Normal3f );
+   make_empty_list( &rmesa->vb.dfn_cache.Normal3fv );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord2f );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord2fv );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord1f );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord1fv );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
+
+   radeonInitCodegen( &rmesa->vb.codegen );
+}
+
+static void free_funcs( struct dynfn *l )
+{
+   struct dynfn *f, *tmp;
+   foreach_s (f, tmp, l) {
+      remove_from_list( f );
+      ALIGN_FREE( f->code );
+      FREE( f );
+   }
+}
+
+
+
+void radeonVtxfmtMakeCurrent( GLcontext *ctx )
+{
+}
+
+
+void radeonVtxfmtDestroy( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+
+   count_funcs( rmesa );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex2f );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex2fv );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex3f );
+   free_funcs( &rmesa->vb.dfn_cache.Vertex3fv );
+   free_funcs( &rmesa->vb.dfn_cache.Color4ub );
+   free_funcs( &rmesa->vb.dfn_cache.Color4ubv );
+   free_funcs( &rmesa->vb.dfn_cache.Color3ub );
+   free_funcs( &rmesa->vb.dfn_cache.Color3ubv );
+   free_funcs( &rmesa->vb.dfn_cache.Color4f );
+   free_funcs( &rmesa->vb.dfn_cache.Color4fv );
+   free_funcs( &rmesa->vb.dfn_cache.Color3f );
+   free_funcs( &rmesa->vb.dfn_cache.Color3fv );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3ubEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3fEXT );
+   free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
+   free_funcs( &rmesa->vb.dfn_cache.Normal3f );
+   free_funcs( &rmesa->vb.dfn_cache.Normal3fv );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord2f );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord2fv );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord1f );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord1fv );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord1fvARB );
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_vtxfmt.h b/src/mesa/drivers/dri/radeon/radeon_vtxfmt.h
new file mode 100644
index 0000000000..9792fcbb78
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_vtxfmt.h
@@ -0,0 +1,124 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.h,v 1.3 2002/12/21 17:02:16 dawes Exp $ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __RADEON_VTXFMT_H__
+#define __RADEON_VTXFMT_H__
+
+#ifdef GLX_DIRECT_RENDERING
+
+#include "radeon_context.h"
+
+
+extern void radeonVtxfmtUpdate( GLcontext *ctx );
+extern void radeonVtxfmtInit( GLcontext *ctx );
+extern void radeonVtxfmtInvalidate( GLcontext *ctx );
+extern void radeonVtxfmtDestroy( GLcontext *ctx );
+extern void radeonVtxfmtInitChoosers( GLvertexformat *vfmt );
+
+extern void radeonVtxfmtMakeCurrent( GLcontext *ctx );
+extern void radeonVtxfmtUnbindContext( GLcontext *ctx );
+
+extern void radeon_copy_to_current( GLcontext *ctx );
+
+#define DFN( FUNC, CACHE)				\
+do {							\
+   char *start = (char *)&FUNC;				\
+   char *end = (char *)&FUNC##_end;			\
+   insert_at_head( &CACHE, dfn );			\
+   dfn->key = key;					\
+   dfn->code = ALIGN_MALLOC( end - start, 16 );		\
+   memcpy (dfn->code, start, end - start);		\
+}							\
+while ( 0 )
+
+#define FIXUP( CODE, OFFSET, CHECKVAL, NEWVAL )	\
+do {						\
+   int *icode = (int *)(CODE+OFFSET);		\
+   assert (*icode == CHECKVAL);			\
+   *icode = (int)NEWVAL;			\
+} while (0)
+
+
+/* Useful for figuring out the offsets:
+ */
+#define FIXUP2( CODE, OFFSET, CHECKVAL, NEWVAL )		\
+do {								\
+   while (*(int *)(CODE+OFFSET) != CHECKVAL) OFFSET++;		\
+   fprintf(stderr, "%s/%d CVAL %x OFFSET %d VAL %x\n", __FUNCTION__,	\
+	   __LINE__, CHECKVAL, OFFSET, (int)(NEWVAL));			\
+   *(int *)(CODE+OFFSET) = (int)(NEWVAL);				\
+   OFFSET += 4;							\
+} while (0)
+
+/* 
+ */
+void radeonInitCodegen( struct dfn_generators *gen );
+void radeonInitX86Codegen( struct dfn_generators *gen );
+void radeonInitSSECodegen( struct dfn_generators *gen );
+
+
+
+/* Defined in radeon_vtxfmt_x86.c
+ */
+struct dynfn *radeon_makeX86Vertex2f( GLcontext *, int );
+struct dynfn *radeon_makeX86Vertex2fv( GLcontext *, int );
+struct dynfn *radeon_makeX86Vertex3f( GLcontext *, int );
+struct dynfn *radeon_makeX86Vertex3fv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4ub( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4ubv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3ub( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3ubv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4f( GLcontext *, int );
+struct dynfn *radeon_makeX86Color4fv( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3f( GLcontext *, int );
+struct dynfn *radeon_makeX86Color3fv( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3ubEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3ubvEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3fEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86SecondaryColor3fvEXT( GLcontext *, int );
+struct dynfn *radeon_makeX86Normal3f( GLcontext *, int );
+struct dynfn *radeon_makeX86Normal3fv( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord2f( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord2fv( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord1f( GLcontext *, int );
+struct dynfn *radeon_makeX86TexCoord1fv( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord2fARB( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord2fvARB( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord1fARB( GLcontext *, int );
+struct dynfn *radeon_makeX86MultiTexCoord1fvARB( GLcontext *, int );
+
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_vtxfmt_c.c b/src/mesa/drivers/dri/radeon/radeon_vtxfmt_c.c
new file mode 100644
index 0000000000..188e34a420
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_vtxfmt_c.c
@@ -0,0 +1,905 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "glheader.h"
+#include "mtypes.h"
+#include "colormac.h"
+#include "simple_list.h"
+#include "api_noop.h"
+#include "vtxfmt.h"
+
+#include "radeon_vtxfmt.h"
+
+/* Fallback versions of all the entrypoints for situations where
+ * codegen isn't available.  This is still a lot faster than the
+ * vb/pipeline implementation in Mesa.
+ */
+static void radeon_Vertex3f( GLfloat x, GLfloat y, GLfloat z )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int i;
+
+   *rmesa->vb.dmaptr++ = *(int *)&x;
+   *rmesa->vb.dmaptr++ = *(int *)&y;
+   *rmesa->vb.dmaptr++ = *(int *)&z;
+
+   for (i = 3; i < rmesa->vb.vertex_size; i++)
+      *rmesa->vb.dmaptr++ = rmesa->vb.vertex[i].i;
+   
+   if (--rmesa->vb.counter == 0)
+      rmesa->vb.notify();
+}
+
+
+static void radeon_Vertex3fv( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int i;
+
+   *rmesa->vb.dmaptr++ = *(int *)&v[0];
+   *rmesa->vb.dmaptr++ = *(int *)&v[1];
+   *rmesa->vb.dmaptr++ = *(int *)&v[2];
+
+   for (i = 3; i < rmesa->vb.vertex_size; i++)
+      *rmesa->vb.dmaptr++ = rmesa->vb.vertex[i].i;
+   
+   if (--rmesa->vb.counter == 0)
+      rmesa->vb.notify();
+}
+
+
+static void radeon_Vertex2f( GLfloat x, GLfloat y )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int i;
+
+   *rmesa->vb.dmaptr++ = *(int *)&x;
+   *rmesa->vb.dmaptr++ = *(int *)&y;
+   *rmesa->vb.dmaptr++ = 0;
+
+   for (i = 3; i < rmesa->vb.vertex_size; i++)
+      *rmesa->vb.dmaptr++ = *(int *)&rmesa->vb.vertex[i];
+   
+   if (--rmesa->vb.counter == 0)
+      rmesa->vb.notify();
+}
+
+
+static void radeon_Vertex2fv( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int i;
+
+   *rmesa->vb.dmaptr++ = *(int *)&v[0];
+   *rmesa->vb.dmaptr++ = *(int *)&v[1];
+   *rmesa->vb.dmaptr++ = 0;
+
+   for (i = 3; i < rmesa->vb.vertex_size; i++)
+      *rmesa->vb.dmaptr++ = rmesa->vb.vertex[i].i;
+   
+   if (--rmesa->vb.counter == 0)
+      rmesa->vb.notify();
+}
+
+
+
+/* Color for ubyte (packed) color formats:
+ */
+static void radeon_Color3ub_ub( GLubyte r, GLubyte g, GLubyte b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeon_color_t *dest = rmesa->vb.colorptr;
+   dest->red	= r;
+   dest->green	= g;
+   dest->blue	= b;
+   dest->alpha	= 0xff;
+}
+
+static void radeon_Color3ubv_ub( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeon_color_t *dest = rmesa->vb.colorptr;
+   dest->red	= v[0];
+   dest->green	= v[1];
+   dest->blue	= v[2];
+   dest->alpha	= 0xff;
+}
+
+static void radeon_Color4ub_ub( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeon_color_t *dest = rmesa->vb.colorptr;
+   dest->red	= r;
+   dest->green	= g;
+   dest->blue	= b;
+   dest->alpha	= a;
+}
+
+static void radeon_Color4ubv_ub( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   *(GLuint *)rmesa->vb.colorptr = LE32_TO_CPU(*(GLuint *)v);
+}
+
+
+static void radeon_Color3f_ub( GLfloat r, GLfloat g, GLfloat b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeon_color_t *dest = rmesa->vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,   r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  b );
+   dest->alpha = 255;
+}
+
+static void radeon_Color3fv_ub( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeon_color_t *dest = rmesa->vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,   v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  v[2] );
+   dest->alpha = 255;
+}
+
+static void radeon_Color4f_ub( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeon_color_t *dest = rmesa->vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,   r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  b );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->alpha, a );
+}
+
+static void radeon_Color4fv_ub( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeon_color_t *dest = rmesa->vb.colorptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,	  v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  v[2] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->alpha, v[3] );
+}
+
+
+/* Color for float color+alpha formats:
+ */
+static void radeon_Color3ub_4f( GLubyte r, GLubyte g, GLubyte b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   dest[3] = 1.0;
+}
+
+static void radeon_Color3ubv_4f( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   dest[3] = 1.0;
+}
+
+static void radeon_Color4ub_4f( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   dest[3] = UBYTE_TO_FLOAT(a);
+}
+
+static void radeon_Color4ubv_4f( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   dest[3] = UBYTE_TO_FLOAT(v[3]);
+}
+
+
+static void radeon_Color3f_4f( GLfloat r, GLfloat g, GLfloat b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = 1.0;		
+}
+
+static void radeon_Color3fv_4f( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = 1.0;
+}
+
+static void radeon_Color4f_4f( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = a;
+}
+
+static void radeon_Color4fv_4f( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = v[3];
+}
+
+
+/* Color for float color formats:
+ */
+static void radeon_Color3ub_3f( GLubyte r, GLubyte g, GLubyte b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+}
+
+static void radeon_Color3ubv_3f( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+}
+
+static void radeon_Color4ub_3f( GLubyte r, GLubyte g, GLubyte b, GLubyte a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = UBYTE_TO_FLOAT(a);
+}
+
+static void radeon_Color4ubv_3f( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = UBYTE_TO_FLOAT(v[3]);
+}
+
+
+static void radeon_Color3f_3f( GLfloat r, GLfloat g, GLfloat b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+}
+
+static void radeon_Color3fv_3f( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+}
+
+static void radeon_Color4f_3f( GLfloat r, GLfloat g, GLfloat b, GLfloat a )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = a;
+}
+
+static void radeon_Color4fv_3f( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatcolorptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = v[3]; 
+}
+
+
+/* Secondary Color:
+ */
+static void radeon_SecondaryColor3ubEXT_ub( GLubyte r, GLubyte g, GLubyte b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeon_color_t *dest = rmesa->vb.specptr;
+   dest->red	= r;
+   dest->green	= g;
+   dest->blue	= b;
+   dest->alpha	= 0xff;
+}
+
+static void radeon_SecondaryColor3ubvEXT_ub( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeon_color_t *dest = rmesa->vb.specptr;
+   dest->red	= v[0];
+   dest->green	= v[1];
+   dest->blue	= v[2];
+   dest->alpha	= 0xff;
+}
+
+static void radeon_SecondaryColor3fEXT_ub( GLfloat r, GLfloat g, GLfloat b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeon_color_t *dest = rmesa->vb.specptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,	  r );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, g );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  b );
+   dest->alpha = 255;
+}
+
+static void radeon_SecondaryColor3fvEXT_ub( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   radeon_color_t *dest = rmesa->vb.specptr;
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->red,	  v[0] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->green, v[1] );
+   UNCLAMPED_FLOAT_TO_UBYTE( dest->blue,  v[2] );
+   dest->alpha = 255;
+}
+
+static void radeon_SecondaryColor3ubEXT_3f( GLubyte r, GLubyte g, GLubyte b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatspecptr;
+   dest[0] = UBYTE_TO_FLOAT(r);
+   dest[1] = UBYTE_TO_FLOAT(g);
+   dest[2] = UBYTE_TO_FLOAT(b);
+   dest[3] = 1.0;
+}
+
+static void radeon_SecondaryColor3ubvEXT_3f( const GLubyte *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatspecptr;
+   dest[0] = UBYTE_TO_FLOAT(v[0]);
+   dest[1] = UBYTE_TO_FLOAT(v[1]);
+   dest[2] = UBYTE_TO_FLOAT(v[2]);
+   dest[3] = 1.0;
+}
+
+static void radeon_SecondaryColor3fEXT_3f( GLfloat r, GLfloat g, GLfloat b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatspecptr;
+   dest[0] = r;
+   dest[1] = g;
+   dest[2] = b;
+   dest[3] = 1.0;
+}
+
+static void radeon_SecondaryColor3fvEXT_3f( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.floatspecptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+   dest[3] = 1.0;
+}
+
+
+/* Normal
+ */
+static void radeon_Normal3f( GLfloat n0, GLfloat n1, GLfloat n2 )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.normalptr;
+   dest[0] = n0;
+   dest[1] = n1;
+   dest[2] = n2;
+}
+
+static void radeon_Normal3fv( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.normalptr;
+   dest[0] = v[0];
+   dest[1] = v[1];
+   dest[2] = v[2];
+}
+
+
+/* TexCoord
+ */
+static void radeon_TexCoord1f( GLfloat s )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[0];
+   dest[0] = s;
+   dest[1] = 0;
+}
+
+static void radeon_TexCoord1fv( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[0];
+   dest[0] = v[0];
+   dest[1] = 0;
+}
+
+static void radeon_TexCoord2f( GLfloat s, GLfloat t )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[0];
+   dest[0] = s;
+   dest[1] = t;
+}
+
+static void radeon_TexCoord2fv( const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[0];
+   dest[0] = v[0];
+   dest[1] = v[1];
+}
+
+
+/* MultiTexcoord
+ * 
+ * Technically speaking, these functions should subtract GL_TEXTURE0 from
+ * \c target before masking and using it.  The value of GL_TEXTURE0 is 0x84C0,
+ * which has the low-order 5 bits 0.  For all possible valid values of 
+ * \c target.  Subtracting GL_TEXTURE0 has the net effect of masking \c target
+ * with 0x1F.  Masking with 0x1F and then masking with 0x01 is redundant, so
+ * the subtraction has been omitted.
+ */
+
+static void radeon_MultiTexCoord1fARB( GLenum target, GLfloat s  )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   dest[0] = s;
+   dest[1] = 0;
+}
+
+static void radeon_MultiTexCoord1fvARB( GLenum target, const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   dest[0] = v[0];
+   dest[1] = 0;
+}
+
+static void radeon_MultiTexCoord2fARB( GLenum target, GLfloat s, GLfloat t )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   dest[0] = s;
+   dest[1] = t;
+}
+
+static void radeon_MultiTexCoord2fvARB( GLenum target, const GLfloat *v )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   dest[0] = v[0];
+   dest[1] = v[1];
+}
+
+static struct dynfn *lookup( struct dynfn *l, int key )
+{
+   struct dynfn *f;
+
+   foreach( f, l ) {
+      if (f->key == key) 
+	 return f;
+   }
+
+   return 0;
+}
+
+/* Can't use the loopback template for this:
+ */
+
+#define CHOOSE(FN, FNTYPE, MASK, ACTIVE, ARGS1, ARGS2 )			\
+static void choose_##FN ARGS1						\
+{									\
+   GET_CURRENT_CONTEXT(ctx);						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
+   int key = rmesa->vb.vertex_format & (MASK|ACTIVE);			\
+   struct dynfn *dfn;							\
+									\
+   dfn = lookup( &rmesa->vb.dfn_cache.FN, key );			\
+   if (dfn == 0)							\
+      dfn = rmesa->vb.codegen.FN( ctx, key );				\
+   else if (RADEON_DEBUG & DEBUG_CODEGEN)				\
+      fprintf(stderr, "%s -- cached codegen\n", __FUNCTION__ );		\
+									\
+   if (dfn)								\
+      ctx->Exec->FN = (FNTYPE)(dfn->code);				\
+   else {								\
+      if (RADEON_DEBUG & DEBUG_CODEGEN)					\
+	 fprintf(stderr, "%s -- generic version\n", __FUNCTION__ );	\
+      ctx->Exec->FN = radeon_##FN;					\
+   }									\
+									\
+   ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;			\
+   ctx->Exec->FN ARGS2;							\
+}
+
+
+
+/* For the _3f case, only allow one color function to be hooked in at
+ * a time.  Eventually, use a similar mechanism to allow selecting the
+ * color component of the vertex format based on client behaviour.  
+ *
+ * Note:  Perform these actions even if there is a codegen or cached 
+ * codegen version of the chosen function.
+ */
+#define CHOOSE_COLOR(FN, FNTYPE, NR, MASK, ACTIVE, ARGS1, ARGS2 )	\
+static void choose_##FN ARGS1						\
+{									\
+   GET_CURRENT_CONTEXT(ctx); \
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
+   int key = rmesa->vb.vertex_format & (MASK|ACTIVE);			\
+   struct dynfn *dfn;							\
+									\
+   if (rmesa->vb.vertex_format & ACTIVE_PKCOLOR) {			\
+      ctx->Exec->FN = radeon_##FN##_ub;					\
+   }									\
+   else if ((rmesa->vb.vertex_format &					\
+            (ACTIVE_FPCOLOR|ACTIVE_FPALPHA)) == ACTIVE_FPCOLOR) {	\
+									\
+      if (rmesa->vb.installed_color_3f_sz != NR) {			\
+         rmesa->vb.installed_color_3f_sz = NR;				\
+         if (NR == 3) ctx->Current.Attrib[VERT_ATTRIB_COLOR0][3] = 1.0;	\
+         if (ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) {		\
+            radeon_copy_to_current( ctx );				\
+            _mesa_install_exec_vtxfmt( ctx, &rmesa->vb.vtxfmt );	\
+            ctx->Exec->FN ARGS2;					\
+            return;							\
+         }								\
+      }									\
+									\
+      ctx->Exec->FN = radeon_##FN##_3f;					\
+   }									\
+   else {								\
+      ctx->Exec->FN = radeon_##FN##_4f;					\
+   }									\
+									\
+									\
+   dfn = lookup( &rmesa->vb.dfn_cache.FN, key );			\
+   if (!dfn) dfn = rmesa->vb.codegen.FN( ctx, key );			\
+									\
+   if (dfn) {								\
+      if (RADEON_DEBUG & DEBUG_CODEGEN)					\
+         fprintf(stderr, "%s -- codegen version\n", __FUNCTION__ );	\
+      ctx->Exec->FN = (FNTYPE)dfn->code;				\
+   }									\
+   else if (RADEON_DEBUG & DEBUG_CODEGEN)				\
+         fprintf(stderr, "%s -- 'c' version\n", __FUNCTION__ );		\
+									\
+   ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;			\
+   ctx->Exec->FN ARGS2;							\
+}
+
+
+
+/* Right now there are both _ub and _3f versions of the secondary color
+ * functions.  Currently, we only set-up the hardware to use the _ub versions.
+ * The _3f versions are needed for the cases where secondary color isn't used
+ * in the vertex format, but it still needs to be stored in the context
+ * state vector.
+ */
+#define CHOOSE_SECONDARY_COLOR(FN, FNTYPE, MASK, ACTIVE, ARGS1, ARGS2 )	\
+static void choose_##FN ARGS1						\
+{									\
+   GET_CURRENT_CONTEXT(ctx);						\
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
+   int key = rmesa->vb.vertex_format & (MASK|ACTIVE);			\
+   struct dynfn *dfn = lookup( &rmesa->vb.dfn_cache.FN, key );		\
+									\
+   if (dfn == 0)							\
+      dfn = rmesa->vb.codegen.FN( ctx, key );				\
+   else  if (RADEON_DEBUG & DEBUG_CODEGEN)				\
+      fprintf(stderr, "%s -- cached version\n", __FUNCTION__ );		\
+									\
+   if (dfn)								\
+      ctx->Exec->FN = (FNTYPE)(dfn->code);				\
+   else {								\
+      if (RADEON_DEBUG & DEBUG_CODEGEN)					\
+         fprintf(stderr, "%s -- generic version\n", __FUNCTION__ );	\
+      ctx->Exec->FN = ((rmesa->vb.vertex_format & ACTIVE_PKSPEC) != 0)	\
+	  ? radeon_##FN##_ub : radeon_##FN##_3f;			\
+   }									\
+									\
+   ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;			\
+   ctx->Exec->FN ARGS2;							\
+}
+
+
+
+
+
+/* Shorthands
+ */
+#define ACTIVE_XYZW (RADEON_CP_VC_FRMT_W0|RADEON_CP_VC_FRMT_Z)
+#define ACTIVE_NORM RADEON_CP_VC_FRMT_N0
+
+#define ACTIVE_PKCOLOR RADEON_CP_VC_FRMT_PKCOLOR
+#define ACTIVE_FPCOLOR RADEON_CP_VC_FRMT_FPCOLOR
+#define ACTIVE_FPALPHA RADEON_CP_VC_FRMT_FPALPHA
+#define ACTIVE_COLOR (ACTIVE_FPCOLOR|ACTIVE_PKCOLOR)
+
+#define ACTIVE_PKSPEC RADEON_CP_VC_FRMT_PKSPEC
+#define ACTIVE_FPSPEC RADEON_CP_VC_FRMT_FPSPEC
+#define ACTIVE_SPEC   (ACTIVE_FPSPEC|ACTIVE_PKSPEC)
+
+#define ACTIVE_ST0 RADEON_CP_VC_FRMT_ST0
+#define ACTIVE_ST1 RADEON_CP_VC_FRMT_ST1
+#define ACTIVE_ST_ALL (RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST0)
+
+/* Each codegen function should be able to be fully specified by a
+ * subsetted version of rmesa->vb.vertex_format.
+ */
+#define MASK_NORM    (ACTIVE_XYZW)
+#define MASK_COLOR   (MASK_NORM|ACTIVE_NORM)
+#define MASK_SPEC    (MASK_COLOR|ACTIVE_COLOR)
+#define MASK_ST0     (MASK_SPEC|ACTIVE_SPEC)
+#define MASK_ST1     (MASK_ST0|ACTIVE_ST0)
+#define MASK_ST_ALL  (MASK_ST1|ACTIVE_ST1)
+#define MASK_VERTEX  (MASK_ST_ALL|ACTIVE_FPALPHA) 
+
+
+typedef void (*p4f)( GLfloat, GLfloat, GLfloat, GLfloat );
+typedef void (*p3f)( GLfloat, GLfloat, GLfloat );
+typedef void (*p2f)( GLfloat, GLfloat );
+typedef void (*p1f)( GLfloat );
+typedef void (*pe2f)( GLenum, GLfloat, GLfloat );
+typedef void (*pe1f)( GLenum, GLfloat );
+typedef void (*p4ub)( GLubyte, GLubyte, GLubyte, GLubyte );
+typedef void (*p3ub)( GLubyte, GLubyte, GLubyte );
+typedef void (*pfv)( const GLfloat * );
+typedef void (*pefv)( GLenum, const GLfloat * );
+typedef void (*pubv)( const GLubyte * );
+
+
+CHOOSE(Normal3f, p3f, MASK_NORM, ACTIVE_NORM, 
+       (GLfloat a,GLfloat b,GLfloat c), (a,b,c))
+CHOOSE(Normal3fv, pfv, MASK_NORM, ACTIVE_NORM, 
+       (const GLfloat *v), (v))
+
+CHOOSE_COLOR(Color4ub, p4ub, 4, MASK_COLOR, ACTIVE_COLOR,
+	(GLubyte a,GLubyte b, GLubyte c, GLubyte d), (a,b,c,d))
+CHOOSE_COLOR(Color4ubv, pubv, 4, MASK_COLOR, ACTIVE_COLOR, 
+	(const GLubyte *v), (v))
+CHOOSE_COLOR(Color3ub, p3ub, 3, MASK_COLOR, ACTIVE_COLOR, 
+	(GLubyte a,GLubyte b, GLubyte c), (a,b,c))
+CHOOSE_COLOR(Color3ubv, pubv, 3, MASK_COLOR, ACTIVE_COLOR, 
+	(const GLubyte *v), (v))
+
+CHOOSE_COLOR(Color4f, p4f, 4, MASK_COLOR, ACTIVE_COLOR, 
+	(GLfloat a,GLfloat b, GLfloat c, GLfloat d), (a,b,c,d))
+CHOOSE_COLOR(Color4fv, pfv, 4, MASK_COLOR, ACTIVE_COLOR, 
+	(const GLfloat *v), (v))
+CHOOSE_COLOR(Color3f, p3f, 3, MASK_COLOR, ACTIVE_COLOR,
+	(GLfloat a,GLfloat b, GLfloat c), (a,b,c))
+CHOOSE_COLOR(Color3fv, pfv, 3, MASK_COLOR, ACTIVE_COLOR,
+	(const GLfloat *v), (v))
+
+
+CHOOSE_SECONDARY_COLOR(SecondaryColor3ubEXT, p3ub, MASK_SPEC, ACTIVE_SPEC,
+	(GLubyte a,GLubyte b, GLubyte c), (a,b,c))
+CHOOSE_SECONDARY_COLOR(SecondaryColor3ubvEXT, pubv, MASK_SPEC, ACTIVE_SPEC,
+	(const GLubyte *v), (v))
+CHOOSE_SECONDARY_COLOR(SecondaryColor3fEXT, p3f, MASK_SPEC, ACTIVE_SPEC,
+	(GLfloat a,GLfloat b, GLfloat c), (a,b,c))
+CHOOSE_SECONDARY_COLOR(SecondaryColor3fvEXT, pfv, MASK_SPEC, ACTIVE_SPEC,
+	(const GLfloat *v), (v))
+
+CHOOSE(TexCoord2f, p2f, MASK_ST0, ACTIVE_ST0, 
+       (GLfloat a,GLfloat b), (a,b))
+CHOOSE(TexCoord2fv, pfv, MASK_ST0, ACTIVE_ST0, 
+       (const GLfloat *v), (v))
+CHOOSE(TexCoord1f, p1f, MASK_ST0, ACTIVE_ST0, 
+       (GLfloat a), (a))
+CHOOSE(TexCoord1fv, pfv, MASK_ST0, ACTIVE_ST0, 
+       (const GLfloat *v), (v))
+
+CHOOSE(MultiTexCoord2fARB, pe2f, MASK_ST_ALL, ACTIVE_ST_ALL,
+	 (GLenum u,GLfloat a,GLfloat b), (u,a,b))
+CHOOSE(MultiTexCoord2fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL,
+	(GLenum u,const GLfloat *v), (u,v))
+CHOOSE(MultiTexCoord1fARB, pe1f, MASK_ST_ALL, ACTIVE_ST_ALL,
+	 (GLenum u,GLfloat a), (u,a))
+CHOOSE(MultiTexCoord1fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL,
+	(GLenum u,const GLfloat *v), (u,v))
+
+CHOOSE(Vertex3f, p3f, MASK_VERTEX, MASK_VERTEX, 
+       (GLfloat a,GLfloat b,GLfloat c), (a,b,c))
+CHOOSE(Vertex3fv, pfv, MASK_VERTEX, MASK_VERTEX, 
+       (const GLfloat *v), (v))
+CHOOSE(Vertex2f, p2f, MASK_VERTEX, MASK_VERTEX, 
+       (GLfloat a,GLfloat b), (a,b))
+CHOOSE(Vertex2fv, pfv, MASK_VERTEX, MASK_VERTEX, 
+       (const GLfloat *v), (v))
+
+
+
+
+
+void radeonVtxfmtInitChoosers( GLvertexformat *vfmt )
+{
+   vfmt->Color3f = choose_Color3f;
+   vfmt->Color3fv = choose_Color3fv;
+   vfmt->Color3ub = choose_Color3ub;
+   vfmt->Color3ubv = choose_Color3ubv;
+   vfmt->Color4f = choose_Color4f;
+   vfmt->Color4fv = choose_Color4fv;
+   vfmt->Color4ub = choose_Color4ub;
+   vfmt->Color4ubv = choose_Color4ubv;
+   vfmt->SecondaryColor3fEXT = choose_SecondaryColor3fEXT;
+   vfmt->SecondaryColor3fvEXT = choose_SecondaryColor3fvEXT;
+   vfmt->SecondaryColor3ubEXT = choose_SecondaryColor3ubEXT;
+   vfmt->SecondaryColor3ubvEXT = choose_SecondaryColor3ubvEXT;
+   vfmt->MultiTexCoord1fARB = choose_MultiTexCoord1fARB;
+   vfmt->MultiTexCoord1fvARB = choose_MultiTexCoord1fvARB;
+   vfmt->MultiTexCoord2fARB = choose_MultiTexCoord2fARB;
+   vfmt->MultiTexCoord2fvARB = choose_MultiTexCoord2fvARB;
+   vfmt->Normal3f = choose_Normal3f;
+   vfmt->Normal3fv = choose_Normal3fv;
+   vfmt->TexCoord1f = choose_TexCoord1f;
+   vfmt->TexCoord1fv = choose_TexCoord1fv;
+   vfmt->TexCoord2f = choose_TexCoord2f;
+   vfmt->TexCoord2fv = choose_TexCoord2fv;
+   vfmt->Vertex2f = choose_Vertex2f;
+   vfmt->Vertex2fv = choose_Vertex2fv;
+   vfmt->Vertex3f = choose_Vertex3f;
+   vfmt->Vertex3fv = choose_Vertex3fv;
+}
+
+
+static struct dynfn *codegen_noop( GLcontext *ctx, int key )
+{
+   (void) ctx; (void) key;
+   return 0;
+}
+
+void radeonInitCodegen( struct dfn_generators *gen )
+{
+   gen->Vertex3f = codegen_noop;
+   gen->Vertex3fv = codegen_noop;
+   gen->Color4ub = codegen_noop;
+   gen->Color4ubv = codegen_noop;
+   gen->Normal3f = codegen_noop;
+   gen->Normal3fv = codegen_noop;
+   gen->TexCoord2f = codegen_noop;
+   gen->TexCoord2fv = codegen_noop;
+   gen->MultiTexCoord2fARB = codegen_noop;
+   gen->MultiTexCoord2fvARB = codegen_noop;
+   gen->Vertex2f = codegen_noop;
+   gen->Vertex2fv = codegen_noop;
+   gen->Color3ub = codegen_noop;
+   gen->Color3ubv = codegen_noop;
+   gen->Color4f = codegen_noop;
+   gen->Color4fv = codegen_noop;
+   gen->Color3f = codegen_noop;
+   gen->Color3fv = codegen_noop;
+   gen->SecondaryColor3fEXT = codegen_noop;
+   gen->SecondaryColor3fvEXT = codegen_noop;
+   gen->SecondaryColor3ubEXT = codegen_noop;
+   gen->SecondaryColor3ubvEXT = codegen_noop;
+   gen->TexCoord1f = codegen_noop;
+   gen->TexCoord1fv = codegen_noop;
+   gen->MultiTexCoord1fARB = codegen_noop;
+   gen->MultiTexCoord1fvARB = codegen_noop;
+
+   if (!getenv("RADEON_NO_CODEGEN")) {
+#if defined(USE_X86_ASM)
+      radeonInitX86Codegen( gen );
+#endif
+
+#if defined(USE_SSE_ASM)
+      radeonInitSSECodegen( gen );
+#endif
+   }
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_vtxfmt_sse.c b/src/mesa/drivers/dri/radeon/radeon_vtxfmt_sse.c
new file mode 100644
index 0000000000..0f2c82bd87
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_vtxfmt_sse.c
@@ -0,0 +1,232 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "simple_list.h" 
+#include "radeon_vtxfmt.h"
+
+#if defined(USE_SSE_ASM)
+#include "X86/common_x86_asm.h"
+
+#define EXTERN( FUNC )		\
+extern const char *FUNC;	\
+extern const char *FUNC##_end
+
+EXTERN( _sse_Attribute2fv );
+EXTERN( _sse_Attribute2f );
+EXTERN( _sse_Attribute3fv );
+EXTERN( _sse_Attribute3f );
+EXTERN( _sse_MultiTexCoord2fv );
+EXTERN( _sse_MultiTexCoord2f );
+EXTERN( _sse_MultiTexCoord2fv_2 );
+EXTERN( _sse_MultiTexCoord2f_2 );
+
+/* Build specialized versions of the immediate calls on the fly for
+ * the current state.
+ */
+
+static struct dynfn *radeon_makeSSEAttribute2fv( struct dynfn * cache, int key,
+					       const char * name, void * dest)
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key );
+
+   DFN ( _sse_Attribute2fv, (*cache) );
+   FIXUP(dfn->code, 10, 0x0, (int)dest);
+   return dfn;
+}
+
+static struct dynfn *radeon_makeSSEAttribute2f( struct dynfn * cache, int key,
+					      const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key );
+
+   DFN ( _sse_Attribute2f, (*cache) );
+   FIXUP(dfn->code, 8, 0x0, (int)dest); 
+   return dfn;
+}
+
+static struct dynfn *radeon_makeSSEAttribute3fv( struct dynfn * cache, int key,
+					       const char * name, void * dest)
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key );
+
+   DFN ( _sse_Attribute3fv, (*cache) );
+   FIXUP(dfn->code, 13, 0x0, (int)dest);
+   FIXUP(dfn->code, 18, 0x8, 8+(int)dest);
+   return dfn;
+}
+
+static struct dynfn *radeon_makeSSEAttribute3f( struct dynfn * cache, int key,
+					      const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key );
+
+   DFN ( _sse_Attribute3f, (*cache) );
+   FIXUP(dfn->code, 12, 0x0, (int)dest); 
+   FIXUP(dfn->code, 17, 0x8, 8+(int)dest); 
+   return dfn;
+}
+
+static struct dynfn * radeon_makeSSENormal3fv( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   return radeon_makeSSEAttribute3fv( & rmesa->vb.dfn_cache.Normal3fv, key,
+				      __FUNCTION__, rmesa->vb.normalptr );
+}
+
+static struct dynfn *radeon_makeSSENormal3f( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   return radeon_makeSSEAttribute3f( & rmesa->vb.dfn_cache.Normal3f, key,
+				     __FUNCTION__, rmesa->vb.normalptr );
+}
+
+static struct dynfn *radeon_makeSSEColor3fv( GLcontext *ctx, int key )
+{
+   if (key & (RADEON_CP_VC_FRMT_PKCOLOR|RADEON_CP_VC_FRMT_FPALPHA))
+      return 0;
+   else
+   {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+      return radeon_makeSSEAttribute3fv( & rmesa->vb.dfn_cache.Color3fv, key,
+					 __FUNCTION__, rmesa->vb.floatcolorptr );
+   }
+}
+
+static struct dynfn *radeon_makeSSEColor3f( GLcontext *ctx, int key )
+{
+   if (key & (RADEON_CP_VC_FRMT_PKCOLOR|RADEON_CP_VC_FRMT_FPALPHA))
+      return 0;
+   else
+   {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+      return radeon_makeSSEAttribute3f( & rmesa->vb.dfn_cache.Color3f, key,
+					__FUNCTION__, rmesa->vb.floatcolorptr );
+   }
+}
+
+static struct dynfn *radeon_makeSSETexCoord2fv( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   return radeon_makeSSEAttribute2fv( & rmesa->vb.dfn_cache.TexCoord2fv, key,
+				       __FUNCTION__, rmesa->vb.texcoordptr[0] );
+}
+
+static struct dynfn *radeon_makeSSETexCoord2f( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   return radeon_makeSSEAttribute2f( & rmesa->vb.dfn_cache.TexCoord2f, key,
+				     __FUNCTION__, rmesa->vb.texcoordptr[0] );
+}
+
+static struct dynfn *radeon_makeSSEMultiTexCoord2fv( GLcontext *ctx, int key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
+      (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
+      DFN ( _sse_MultiTexCoord2fv, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 18, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);	
+   } else {
+      DFN ( _sse_MultiTexCoord2fv_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 14, 0x0, (int)rmesa->vb.texcoordptr);
+   }
+   return dfn;
+}
+
+static struct dynfn *radeon_makeSSEMultiTexCoord2f( GLcontext *ctx, int key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
+      (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
+      DFN ( _sse_MultiTexCoord2f, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 16, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);	
+   } else {
+      DFN ( _sse_MultiTexCoord2f_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 15, 0x0, (int)rmesa->vb.texcoordptr);
+   }
+   return dfn;
+}
+
+void radeonInitSSECodegen( struct dfn_generators *gen )
+{
+   if ( cpu_has_xmm ) {
+      gen->Normal3fv = (void *) radeon_makeSSENormal3fv;
+      gen->Normal3f = (void *) radeon_makeSSENormal3f;
+      gen->Color3fv = (void *) radeon_makeSSEColor3fv;
+      gen->Color3f = (void *) radeon_makeSSEColor3f;
+      gen->TexCoord2fv = (void *) radeon_makeSSETexCoord2fv;
+      gen->TexCoord2f = (void *) radeon_makeSSETexCoord2f;
+      gen->MultiTexCoord2fvARB = (void *) radeon_makeSSEMultiTexCoord2fv;
+      gen->MultiTexCoord2fARB = (void *) radeon_makeSSEMultiTexCoord2f;
+   }
+}
+
+#else 
+
+void radeonInitSSECodegen( struct dfn_generators *gen )
+{
+   (void) gen;
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_vtxfmt_x86.c b/src/mesa/drivers/dri/radeon/radeon_vtxfmt_x86.c
new file mode 100644
index 0000000000..92941ca5f8
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_vtxfmt_x86.c
@@ -0,0 +1,437 @@
+/* $XFree86$ */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "imports.h"
+#include "simple_list.h" 
+#include "radeon_vtxfmt.h"
+
+#if defined(USE_X86_ASM)
+
+#define EXTERN( FUNC )		\
+extern const char *FUNC;	\
+extern const char *FUNC##_end
+
+EXTERN ( _x86_Attribute2fv );
+EXTERN ( _x86_Attribute2f );
+EXTERN ( _x86_Attribute3fv );
+EXTERN ( _x86_Attribute3f );
+EXTERN ( _x86_Vertex3fv_6 );
+EXTERN ( _x86_Vertex3fv_8 );
+EXTERN ( _x86_Vertex3fv );
+EXTERN ( _x86_Vertex3f_4 );
+EXTERN ( _x86_Vertex3f_6 );
+EXTERN ( _x86_Vertex3f );
+EXTERN ( _x86_Color4ubv_ub );
+EXTERN ( _x86_Color4ubv_4f );
+EXTERN ( _x86_Color4ub_ub );
+EXTERN ( _x86_MultiTexCoord2fv );
+EXTERN ( _x86_MultiTexCoord2fv_2 );
+EXTERN ( _x86_MultiTexCoord2f );
+EXTERN ( _x86_MultiTexCoord2f_2 );
+
+
+/* Build specialized versions of the immediate calls on the fly for
+ * the current state.  Generic x86 versions.
+ */
+
+struct dynfn *radeon_makeX86Vertex3f( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x %d\n", __FUNCTION__, key, rmesa->vb.vertex_size );
+
+   switch (rmesa->vb.vertex_size) {
+   case 4: {
+
+      DFN ( _x86_Vertex3f_4, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 2, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 25, 0x0, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 36, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 46, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 51, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 60, 0x0, (int)&rmesa->vb.notify);
+      break;
+   }
+   case 6: {
+
+      DFN ( _x86_Vertex3f_6, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 3, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 28, 0x0, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 34, 0x0, (int)&rmesa->vb.vertex[4]);
+      FIXUP(dfn->code, 40, 0x0, (int)&rmesa->vb.vertex[5]);
+      FIXUP(dfn->code, 57, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 63, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 70, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 79, 0x0, (int)&rmesa->vb.notify);
+      break;
+   }
+   default: {
+
+      DFN ( _x86_Vertex3f, rmesa->vb.dfn_cache.Vertex3f );
+      FIXUP(dfn->code, 3, 0x0, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 9, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 37, 0x0, rmesa->vb.vertex_size-3);
+      FIXUP(dfn->code, 44, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 50, 0x0, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 56, 0x0, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 67, 0x0, (int)&rmesa->vb.notify);
+   break;
+   }
+   }
+
+   return dfn;
+}
+
+
+
+struct dynfn *radeon_makeX86Vertex3fv( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x %d\n", __FUNCTION__, key, rmesa->vb.vertex_size );
+
+   switch (rmesa->vb.vertex_size) {
+   case 6: {
+
+      DFN ( _x86_Vertex3fv_6, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 1, 0x00000000, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 27, 0x0000001c, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 33, 0x00000020, (int)&rmesa->vb.vertex[4]);
+      FIXUP(dfn->code, 45, 0x00000024, (int)&rmesa->vb.vertex[5]);
+      FIXUP(dfn->code, 56, 0x00000000, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 61, 0x00000004, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 67, 0x00000004, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 76, 0x00000008, (int)&rmesa->vb.notify);
+      break;
+   }
+   
+
+   case 8: {
+
+      DFN ( _x86_Vertex3fv_8, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 1, 0x00000000, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 27, 0x0000001c, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 33, 0x00000020, (int)&rmesa->vb.vertex[4]);
+      FIXUP(dfn->code, 45, 0x0000001c, (int)&rmesa->vb.vertex[5]);
+      FIXUP(dfn->code, 51, 0x00000020, (int)&rmesa->vb.vertex[6]);
+      FIXUP(dfn->code, 63, 0x00000024, (int)&rmesa->vb.vertex[7]);
+      FIXUP(dfn->code, 74, 0x00000000, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 79, 0x00000004, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 85, 0x00000004, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 94, 0x00000008, (int)&rmesa->vb.notify);
+      break;
+   }
+   
+
+
+   default: {
+
+      DFN ( _x86_Vertex3fv, rmesa->vb.dfn_cache.Vertex3fv );
+      FIXUP(dfn->code, 8, 0x01010101, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 32, 0x00000006, rmesa->vb.vertex_size-3);
+      FIXUP(dfn->code, 37, 0x00000058, (int)&rmesa->vb.vertex[3]);
+      FIXUP(dfn->code, 45, 0x01010101, (int)&rmesa->vb.dmaptr);
+      FIXUP(dfn->code, 50, 0x02020202, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 58, 0x02020202, (int)&rmesa->vb.counter);
+      FIXUP(dfn->code, 67, 0x0, (int)&rmesa->vb.notify);
+   break;
+   }
+   }
+
+   return dfn;
+}
+
+static struct dynfn *
+radeon_makeX86Attribute2fv( struct dynfn * cache, int key,
+			    const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key );
+
+   DFN ( _x86_Attribute2fv, (*cache) );
+   FIXUP(dfn->code, 11, 0x0, (int)dest); 
+   FIXUP(dfn->code, 16, 0x4, 4+(int)dest); 
+
+   return dfn;
+}
+
+static struct dynfn *
+radeon_makeX86Attribute2f( struct dynfn * cache, int key,
+			   const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key );
+
+   DFN ( _x86_Attribute2f, (*cache) );
+   FIXUP(dfn->code, 1, 0x0, (int)dest); 
+
+   return dfn;
+}
+
+
+static struct dynfn *
+radeon_makeX86Attribute3fv( struct dynfn * cache, int key,
+			    const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key );
+
+   DFN ( _x86_Attribute3fv, (*cache) );
+   FIXUP(dfn->code, 14, 0x0, (int)dest); 
+   FIXUP(dfn->code, 20, 0x4, 4+(int)dest); 
+   FIXUP(dfn->code, 25, 0x8, 8+(int)dest);
+
+   return dfn;
+}
+
+static struct dynfn *
+radeon_makeX86Attribute3f( struct dynfn * cache, int key,
+			 const char * name, void * dest )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", name, key );
+
+   DFN ( _x86_Attribute3f, (*cache) );
+   FIXUP(dfn->code, 14, 0x0, (int)dest); 
+   FIXUP(dfn->code, 20, 0x4, 4+(int)dest); 
+   FIXUP(dfn->code, 25, 0x8, 8+(int)dest);
+
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86Normal3fv( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   return radeon_makeX86Attribute3fv( & rmesa->vb.dfn_cache.Normal3fv, key,
+				      __FUNCTION__, rmesa->vb.normalptr );
+}
+
+struct dynfn *radeon_makeX86Normal3f( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   return radeon_makeX86Attribute3f( & rmesa->vb.dfn_cache.Normal3f, key,
+				     __FUNCTION__, rmesa->vb.normalptr );
+}
+
+struct dynfn *radeon_makeX86Color4ubv( GLcontext *ctx, int key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if (key & RADEON_CP_VC_FRMT_PKCOLOR) {
+      DFN ( _x86_Color4ubv_ub, rmesa->vb.dfn_cache.Color4ubv);
+      FIXUP(dfn->code, 5, 0x12345678, (int)rmesa->vb.colorptr); 
+      return dfn;
+   } 
+   else {
+
+      DFN ( _x86_Color4ubv_4f, rmesa->vb.dfn_cache.Color4ubv);
+      FIXUP(dfn->code, 2, 0x00000000, (int)_mesa_ubyte_to_float_color_tab); 
+      FIXUP(dfn->code, 27, 0xdeadbeaf, (int)rmesa->vb.floatcolorptr); 
+      FIXUP(dfn->code, 33, 0xdeadbeaf, (int)rmesa->vb.floatcolorptr+4); 
+      FIXUP(dfn->code, 55, 0xdeadbeaf, (int)rmesa->vb.floatcolorptr+8); 
+      FIXUP(dfn->code, 61, 0xdeadbeaf, (int)rmesa->vb.floatcolorptr+12); 
+      return dfn;
+   }
+}
+
+struct dynfn *radeon_makeX86Color4ub( GLcontext *ctx, int key )
+{
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if (key & RADEON_CP_VC_FRMT_PKCOLOR) {
+      struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+      DFN ( _x86_Color4ub_ub, rmesa->vb.dfn_cache.Color4ub );
+      FIXUP(dfn->code, 18, 0x0, (int)rmesa->vb.colorptr); 
+      FIXUP(dfn->code, 24, 0x0, (int)rmesa->vb.colorptr+1); 
+      FIXUP(dfn->code, 30, 0x0, (int)rmesa->vb.colorptr+2); 
+      FIXUP(dfn->code, 36, 0x0, (int)rmesa->vb.colorptr+3); 
+      return dfn;
+   }
+   else
+      return 0;
+}
+
+
+struct dynfn *radeon_makeX86Color3fv( GLcontext *ctx, int key )
+{
+   if (key & (RADEON_CP_VC_FRMT_PKCOLOR|RADEON_CP_VC_FRMT_FPALPHA))
+      return 0;
+   else
+   {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+      return radeon_makeX86Attribute3fv( & rmesa->vb.dfn_cache.Color3fv, key,
+					 __FUNCTION__, rmesa->vb.floatcolorptr );
+   }
+}
+
+struct dynfn *radeon_makeX86Color3f( GLcontext *ctx, int key )
+{
+   if (key & (RADEON_CP_VC_FRMT_PKCOLOR|RADEON_CP_VC_FRMT_FPALPHA))
+      return 0;
+   else
+   {
+      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+      return radeon_makeX86Attribute3f( & rmesa->vb.dfn_cache.Color3f, key,
+					__FUNCTION__, rmesa->vb.floatcolorptr );
+   }
+}
+
+
+
+struct dynfn *radeon_makeX86TexCoord2fv( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   return radeon_makeX86Attribute2fv( & rmesa->vb.dfn_cache.TexCoord2fv, key,
+				      __FUNCTION__, rmesa->vb.texcoordptr[0] );
+}
+
+struct dynfn *radeon_makeX86TexCoord2f( GLcontext *ctx, int key )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   return radeon_makeX86Attribute2f( & rmesa->vb.dfn_cache.TexCoord2f, key,
+				     __FUNCTION__, rmesa->vb.texcoordptr[0] );
+}
+
+struct dynfn *radeon_makeX86MultiTexCoord2fvARB( GLcontext *ctx, int key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
+      (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
+      DFN ( _x86_MultiTexCoord2fv, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 21, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);
+      FIXUP(dfn->code, 27, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]+4);
+   } else {
+      DFN ( _x86_MultiTexCoord2fv_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+      FIXUP(dfn->code, 14, 0x0, (int)rmesa->vb.texcoordptr);
+   }
+   return dfn;
+}
+
+struct dynfn *radeon_makeX86MultiTexCoord2fARB( GLcontext *ctx, 
+						int key )
+{
+   struct dynfn *dfn = MALLOC_STRUCT( dynfn );
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & DEBUG_CODEGEN)
+      fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
+
+   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
+       (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
+      DFN ( _x86_MultiTexCoord2f, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 20, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);
+      FIXUP(dfn->code, 26, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]+4); 
+   }
+   else {
+      /* Note: this might get generated multiple times, even though the
+       * actual emitted code is the same.
+       */
+      DFN ( _x86_MultiTexCoord2f_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+      FIXUP(dfn->code, 18, 0x0, (int)rmesa->vb.texcoordptr); 
+   }      
+   return dfn;
+}
+
+
+void radeonInitX86Codegen( struct dfn_generators *gen )
+{
+   gen->Vertex3f = radeon_makeX86Vertex3f;
+   gen->Vertex3fv = radeon_makeX86Vertex3fv;
+   gen->Color4ub = radeon_makeX86Color4ub; /* PKCOLOR only */
+   gen->Color4ubv = radeon_makeX86Color4ubv; /* PKCOLOR only */
+   gen->Normal3f = radeon_makeX86Normal3f;
+   gen->Normal3fv = radeon_makeX86Normal3fv;
+   gen->TexCoord2f = radeon_makeX86TexCoord2f;
+   gen->TexCoord2fv = radeon_makeX86TexCoord2fv;
+   gen->MultiTexCoord2fARB = radeon_makeX86MultiTexCoord2fARB;
+   gen->MultiTexCoord2fvARB = radeon_makeX86MultiTexCoord2fvARB;
+   gen->Color3f = radeon_makeX86Color3f;
+   gen->Color3fv = radeon_makeX86Color3fv;
+
+   /* Not done:
+    */
+/*     gen->Vertex2f = radeon_makeX86Vertex2f; */
+/*     gen->Vertex2fv = radeon_makeX86Vertex2fv; */
+/*     gen->Color3ub = radeon_makeX86Color3ub; */
+/*     gen->Color3ubv = radeon_makeX86Color3ubv; */
+/*     gen->Color4f = radeon_makeX86Color4f; */
+/*     gen->Color4fv = radeon_makeX86Color4fv; */
+/*     gen->TexCoord1f = radeon_makeX86TexCoord1f; */
+/*     gen->TexCoord1fv = radeon_makeX86TexCoord1fv; */
+/*     gen->MultiTexCoord1fARB = radeon_makeX86MultiTexCoord1fARB; */
+/*     gen->MultiTexCoord1fvARB = radeon_makeX86MultiTexCoord1fvARB; */
+}
+
+
+#else 
+
+void radeonInitX86Codegen( struct dfn_generators *gen )
+{
+   (void) gen;
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/server/radeon_common.h b/src/mesa/drivers/dri/radeon/server/radeon_common.h
index c26ccd3cc2..0792b5c2e0 100644
--- a/src/mesa/drivers/dri/radeon/server/radeon_common.h
+++ b/src/mesa/drivers/dri/radeon/server/radeon_common.h
@@ -1,19 +1,5 @@
-/**
- * \file server/radeon_common.h 
- * \brief Common header definitions for Radeon 2D/3D/DRM driver suite.
+/* radeon_common.h -- common header definitions for Radeon 2D/3D/DRM suite
  *
- * \note Some of these structures are meant for backward compatibility and
- * aren't used by the subset driver.
- *
- * \author Gareth Hughes <gareth@valinux.com>
- * \author Kevin E. Martin <martin@valinux.com>
- * \author Keith Whitwell <keith@tungstengraphics.com>
- * 
- * \author Converted to common header format by
- * Jens Owen <jens@tungstengraphics.com>
- */
-
-/*
  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
  * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
@@ -36,10 +22,19 @@
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
+ *
+ * Author:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * Converted to common header format:
+ *   Jens Owen <jens@tungstengraphics.com>
+ *
+ * $XFree86: xc/programs/Xserver/hw/xfree86/os-support/xf86drmRadeon.h,v 1.6 2001/04/16 15:02:13 tsi Exp $
+ *
  */
 
-/* $XFree86: xc/programs/Xserver/hw/xfree86/os-support/xf86drmRadeon.h,v 1.6 2001/04/16 15:02:13 tsi Exp $ */
-
 #ifndef _RADEON_COMMON_H_
 #define _RADEON_COMMON_H_
 
@@ -90,42 +85,33 @@
 #define RADEON_CLEAR_DEPTH     4
 
 
-/**
- * \brief DRM_RADEON_CP_INIT ioctl argument type.
- */
 typedef struct {
    enum {
-      DRM_RADEON_INIT_CP    = 0x01,   /**< \brief initialize CP */
-      DRM_RADEON_CLEANUP_CP = 0x02,   /**< \brief clean up CP */
-      DRM_RADEON_INIT_R200_CP = 0x03  /**< \brief initialize R200 CP */
-   } func;                            /**< \brief request */
-   unsigned long sarea_priv_offset;   /**< \brief SAREA private offset */
-   int is_pci;                        /**< \brief is current card a PCI card? */
-   int cp_mode;                       /**< \brief CP mode */
-   int agp_size;                      /**< \brief AGP space size */
-   int ring_size;                     /**< \brief CP ring buffer size */
-   int usec_timeout;                  /**< \brief timeout for DRM operations in usecs */
-
-   unsigned int fb_bpp;               
-   unsigned int front_offset;         /**< \brief front color buffer offset */
-   unsigned int front_pitch;          /**< \brief front color buffer pitch */
-   unsigned int back_offset;          /**< \brief back color buffer offset */
-   unsigned int back_pitch;           /**< \brief back color buffer pitch*/
-   unsigned int depth_bpp;            /**< \brief depth buffer bits-per-pixel */
-   unsigned int depth_offset;         /**< \brief depth buffer offset */
-   unsigned int depth_pitch;          /**< \brief depth buffer pitch */
-
-   unsigned long fb_offset;           /**< \brief framebuffer offset */
-   unsigned long mmio_offset;         /**< \brief MMIO register offset */
-   unsigned long ring_offset;         /**< \brief CP ring buffer offset */
-   unsigned long ring_rptr_offset;    /**< \brief CP ring buffer read pointer offset */
-   unsigned long buffers_offset;      /**< \brief vertex buffers offset */
-   unsigned long agp_textures_offset; /**< \brief AGP textures offset */
+      DRM_RADEON_INIT_CP    = 0x01,
+      DRM_RADEON_CLEANUP_CP = 0x02,
+      DRM_RADEON_INIT_R200_CP = 0x03
+   } func;
+   unsigned long sarea_priv_offset;
+   int is_pci;
+   int cp_mode;
+   int agp_size;
+   int ring_size;
+   int usec_timeout;
+
+   unsigned int fb_bpp;
+   unsigned int front_offset, front_pitch;
+   unsigned int back_offset, back_pitch;
+   unsigned int depth_bpp;
+   unsigned int depth_offset, depth_pitch;
+
+   unsigned long fb_offset;
+   unsigned long mmio_offset;
+   unsigned long ring_offset;
+   unsigned long ring_rptr_offset;
+   unsigned long buffers_offset;
+   unsigned long agp_textures_offset;
 } drmRadeonInit;
 
-/**
- * \brief DRM_RADEON_CP_STOP ioctl argument type.
- */
 typedef struct {
    int flush;
    int idle;
@@ -143,17 +129,13 @@ typedef union drmRadeonClearR {
         unsigned int ui[5];
 } drmRadeonClearRect;
 
-/**
- * \brief DRM_RADEON_CLEAR ioctl argument type.
- */
 typedef struct drmRadeonClearT {
-        unsigned int flags;              /**< \brief bitmask of the planes to clear */
-        unsigned int clear_color;        /**< \brief color buffer clear value */
-        unsigned int clear_depth;        /**< \brief depth buffer clear value */
-        unsigned int color_mask;         /**< \brief color buffer clear mask */
-        unsigned int depth_mask;         /**< \brief stencil buffer clear value
-					   *  \todo Misnamed field. */
-        drmRadeonClearRect *depth_boxes; /**< \brief depth buffer cliprects */
+        unsigned int flags;
+        unsigned int clear_color;
+        unsigned int clear_depth;
+        unsigned int color_mask;
+        unsigned int depth_mask;   /* misnamed field:  should be stencil */
+        drmRadeonClearRect *depth_boxes;
 } drmRadeonClearType;
 
 typedef struct drmRadeonFullscreenT {
@@ -163,16 +145,10 @@ typedef struct drmRadeonFullscreenT {
         } func;
 } drmRadeonFullscreenType;
 
-/**
- * \brief DRM_RADEON_STIPPLE ioctl argument type.
- */
 typedef struct {
         unsigned int *mask;
 } drmRadeonStipple;
 
-/**
- * \brief Texture image for drmRadeonTexture.
- */
 typedef struct {
         unsigned int x;
         unsigned int y;
@@ -181,22 +157,18 @@ typedef struct {
         const void *data;
 } drmRadeonTexImage;
 
-/**
- * \brief DRM_RADEON_TEXTURE ioctl argument type.
- */
 typedef struct {
-        int offset;               /**< \brief texture offset */
-        int pitch;                /**< \brief texture pitch */
-        int format;               /**< \brief pixel format */
-        int width;                /**< \brief texture width */
-        int height;               /**< \brief texture height */
-	drmRadeonTexImage *image; /**< \brief image */
+        int offset;
+        int pitch;
+        int format;
+        int width;                      /* Texture image coordinates */
+        int height;
+        drmRadeonTexImage *image;
 } drmRadeonTexture;
 
 
 #define RADEON_MAX_TEXTURE_UNITS 3
 
-
 /* Layout matches drm_radeon_state_t in linux drm_radeon.h.  
  */
 typedef struct {
@@ -266,16 +238,13 @@ typedef struct {
 	unsigned int dirty;
 } drmRadeonState;
 
-/**
- * \brief DRM 1.1 vertex ioctl.
- *
- * Used in compatibility modes.
+/* 1.1 vertex ioctl.  Used in compatibility modes.
  */
 typedef struct {
-	int prim;			/**< \brief Primitive number */
-	int idx;			/**< \brief Index of vertex buffer */
-	int count;			/**< \brief Number of vertices in buffer */
-	int discard;			/**< \brief Client finished with buffer? */
+	int prim;
+	int idx;			/* Index of vertex buffer */
+	int count;			/* Number of vertices in buffer */
+	int discard;			/* Client finished with buffer? */
 } drmRadeonVertex;
 
 typedef struct {
@@ -283,13 +252,13 @@ typedef struct {
 	unsigned int finish;
 	unsigned int prim:8;
 	unsigned int stateidx:8;
-	unsigned int numverts:16;	/**< overloaded as offset/64 for elt prims */
+	unsigned int numverts:16; /* overloaded as offset/64 for elt prims */
         unsigned int vc_format;
 } drmRadeonPrim;
 
 typedef struct {
-        int idx;                        /**< \brief Index of vertex buffer */
-        int discard;                    /**< \brief Client finished with buffer? */
+        int idx;                        /* Index of vertex buffer */
+        int discard;                    /* Client finished with buffer? */
         int nr_states;
         drmRadeonState *state;
         int nr_prims;
@@ -299,156 +268,127 @@ typedef struct {
 #define RADEON_MAX_STATES 16
 #define RADEON_MAX_PRIMS  64
 
-
-/**
- * \brief Command buffer.  
- *
- * \todo Replace with true DMA stream?
+/* Command buffer.  Replace with true dma stream?
  */
 typedef struct {
-	int bufsz;          /**< \brief buffer size */
-	char *buf;          /**< \brief buffer */
-	int nbox;           /**< \brief number of cliprects */
-        drmClipRect *boxes; /**< \brief cliprects */
+	int bufsz;
+	char *buf;
+	int nbox;
+        drmClipRect *boxes;
 } drmRadeonCmdBuffer;
 
-
-/**
- * \brief Per-packet identifiers for use with the ::RADEON_CMD_PACKET command
- * in the DRM_RADEON_CMDBUF ioctl.  
- *
- * \note Comments relate new packets to old state bits and the packet size.
+/* New style per-packet identifiers for use in cmd_buffer ioctl with
+ * the RADEON_EMIT_PACKET command.  Comments relate new packets to old
+ * state bits and the packet size:
  */
-enum drmRadeonCmdPkt {
-   RADEON_EMIT_PP_MISC                       = 0, /* context/7 */
-   RADEON_EMIT_PP_CNTL                       = 1, /* context/3 */
-   RADEON_EMIT_RB3D_COLORPITCH               = 2, /* context/1 */
-   RADEON_EMIT_RE_LINE_PATTERN               = 3, /* line/2 */
-   RADEON_EMIT_SE_LINE_WIDTH                 = 4, /* line/1 */
-   RADEON_EMIT_PP_LUM_MATRIX                 = 5, /* bumpmap/1 */
-   RADEON_EMIT_PP_ROT_MATRIX_0               = 6, /* bumpmap/2 */
-   RADEON_EMIT_RB3D_STENCILREFMASK           = 7, /* masks/3 */
-   RADEON_EMIT_SE_VPORT_XSCALE               = 8, /* viewport/6 */
-   RADEON_EMIT_SE_CNTL                       = 9, /* setup/2 */
-   RADEON_EMIT_SE_CNTL_STATUS                = 10, /* setup/1 */
-   RADEON_EMIT_RE_MISC                       = 11, /* misc/1 */
-   RADEON_EMIT_PP_TXFILTER_0                 = 12, /* tex0/6 */
-   RADEON_EMIT_PP_BORDER_COLOR_0             = 13, /* tex0/1 */
-   RADEON_EMIT_PP_TXFILTER_1                 = 14, /* tex1/6 */
-   RADEON_EMIT_PP_BORDER_COLOR_1             = 15, /* tex1/1 */
-   RADEON_EMIT_PP_TXFILTER_2                 = 16, /* tex2/6 */
-   RADEON_EMIT_PP_BORDER_COLOR_2             = 17, /* tex2/1 */
-   RADEON_EMIT_SE_ZBIAS_FACTOR               = 18, /* zbias/2 */
-   RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT         = 19, /* tcl/11 */
-   RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED = 20, /* material/17 */
-   R200_EMIT_PP_TXCBLEND_0                   = 21, /* tex0/4 */
-   R200_EMIT_PP_TXCBLEND_1                   = 22, /* tex1/4 */
-   R200_EMIT_PP_TXCBLEND_2                   = 23, /* tex2/4 */
-   R200_EMIT_PP_TXCBLEND_3                   = 24, /* tex3/4 */
-   R200_EMIT_PP_TXCBLEND_4                   = 25, /* tex4/4 */
-   R200_EMIT_PP_TXCBLEND_5                   = 26, /* tex5/4 */
-   R200_EMIT_PP_TXCBLEND_6                   = 27, /* /4 */
-   R200_EMIT_PP_TXCBLEND_7                   = 28, /* /4 */
-   R200_EMIT_TCL_LIGHT_MODEL_CTL_0           = 29, /* tcl/6 */
-   R200_EMIT_TFACTOR_0                       = 30, /* tf/6 */
-   R200_EMIT_VTX_FMT_0                       = 31, /* vtx/4 */
-   R200_EMIT_VAP_CTL                         = 32, /* vap/1 */
-   R200_EMIT_MATRIX_SELECT_0                 = 33, /* msl/5 */
-   R200_EMIT_TEX_PROC_CTL_2                  = 34, /* tcg/5 */
-   R200_EMIT_TCL_UCP_VERT_BLEND_CTL          = 35, /* tcl/1 */
-   R200_EMIT_PP_TXFILTER_0                   = 36, /* tex0/6 */
-   R200_EMIT_PP_TXFILTER_1                   = 37, /* tex1/6 */
-   R200_EMIT_PP_TXFILTER_2                   = 38, /* tex2/6 */
-   R200_EMIT_PP_TXFILTER_3                   = 39, /* tex3/6 */
-   R200_EMIT_PP_TXFILTER_4                   = 40, /* tex4/6 */
-   R200_EMIT_PP_TXFILTER_5                   = 41, /* tex5/6 */
-   R200_EMIT_PP_TXOFFSET_0                   = 42, /* tex0/1 */
-   R200_EMIT_PP_TXOFFSET_1                   = 43, /* tex1/1 */
-   R200_EMIT_PP_TXOFFSET_2                   = 44, /* tex2/1 */
-   R200_EMIT_PP_TXOFFSET_3                   = 45, /* tex3/1 */
-   R200_EMIT_PP_TXOFFSET_4                   = 46, /* tex4/1 */
-   R200_EMIT_PP_TXOFFSET_5                   = 47, /* tex5/1 */
-   R200_EMIT_VTE_CNTL                        = 48, /* vte/1 */
-   R200_EMIT_OUTPUT_VTX_COMP_SEL             = 49, /* vtx/1 */
-   R200_EMIT_PP_TAM_DEBUG3                   = 50, /* tam/1 */
-   R200_EMIT_PP_CNTL_X                       = 51, /* cst/1 */
-   R200_EMIT_RB3D_DEPTHXY_OFFSET             = 52, /* cst/1 */
-   R200_EMIT_RE_AUX_SCISSOR_CNTL             = 53, /* cst/1 */
-   R200_EMIT_RE_SCISSOR_TL_0                 = 54, /* cst/2 */
-   R200_EMIT_RE_SCISSOR_TL_1                 = 55, /* cst/2 */
-   R200_EMIT_RE_SCISSOR_TL_2                 = 56, /* cst/2 */
-   R200_EMIT_SE_VAP_CNTL_STATUS              = 57, /* cst/1 */
-   R200_EMIT_SE_VTX_STATE_CNTL               = 58, /* cst/1 */
-   R200_EMIT_RE_POINTSIZE                    = 59, /* cst/1 */
-   R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0     = 60, /* cst/4 */
-   R200_EMIT_PP_CUBIC_FACES_0                = 61,
-   R200_EMIT_PP_CUBIC_OFFSETS_0              = 62,
-   R200_EMIT_PP_CUBIC_FACES_1                = 63,
-   R200_EMIT_PP_CUBIC_OFFSETS_1              = 64,
-   R200_EMIT_PP_CUBIC_FACES_2                = 65,
-   R200_EMIT_PP_CUBIC_OFFSETS_2              = 66,
-   R200_EMIT_PP_CUBIC_FACES_3                = 67,
-   R200_EMIT_PP_CUBIC_OFFSETS_3              = 68,
-   R200_EMIT_PP_CUBIC_FACES_4                = 69,
-   R200_EMIT_PP_CUBIC_OFFSETS_4              = 70,
-   R200_EMIT_PP_CUBIC_FACES_5                = 71,
-   R200_EMIT_PP_CUBIC_OFFSETS_5              = 72,
-   RADEON_MAX_STATE_PACKETS                  = 73
-} ;
-
-
-/**
- * \brief Command types understood by the DRM_RADEON_CMDBUF ioctl.  
- * 
- * More can be added but obviously these can't be removed or changed.
- *
- * \sa drmRadeonCmdHeader.
- */
-enum drmRadeonCmdType {
-   RADEON_CMD_PACKET       = 1, /**< \brief emit one of the ::drmRadeonCmdPkt register packets */
-   RADEON_CMD_SCALARS      = 2, /**< \brief emit scalar data */
-   RADEON_CMD_VECTORS      = 3, /**< \brief emit vector data */
-   RADEON_CMD_DMA_DISCARD  = 4, /**< \brief discard current DMA buffer */
-   RADEON_CMD_PACKET3      = 5, /**< \brief emit hardware packet */
-   RADEON_CMD_PACKET3_CLIP = 6, /**< \brief emit hardware packet wrapped in cliprects */
-   RADEON_CMD_SCALARS2     = 7, /**< \brief R200 stopgap */
-   RADEON_CMD_WAIT         = 8  /**< \brief synchronization */
-} ;
-
-/**
- * \brief Command packet headers understood by the DRM_RADEON_CMDBUF ioctl.
- *
- * \sa drmRadeonCmdType.
+#define RADEON_EMIT_PP_MISC                         0 /* context/7 */
+#define RADEON_EMIT_PP_CNTL                         1 /* context/3 */
+#define RADEON_EMIT_RB3D_COLORPITCH                 2 /* context/1 */
+#define RADEON_EMIT_RE_LINE_PATTERN                 3 /* line/2 */
+#define RADEON_EMIT_SE_LINE_WIDTH                   4 /* line/1 */
+#define RADEON_EMIT_PP_LUM_MATRIX                   5 /* bumpmap/1 */
+#define RADEON_EMIT_PP_ROT_MATRIX_0                 6 /* bumpmap/2 */
+#define RADEON_EMIT_RB3D_STENCILREFMASK             7 /* masks/3 */
+#define RADEON_EMIT_SE_VPORT_XSCALE                 8 /* viewport/6 */
+#define RADEON_EMIT_SE_CNTL                         9 /* setup/2 */
+#define RADEON_EMIT_SE_CNTL_STATUS                  10 /* setup/1 */
+#define RADEON_EMIT_RE_MISC                         11 /* misc/1 */
+#define RADEON_EMIT_PP_TXFILTER_0                   12 /* tex0/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_0               13 /* tex0/1 */
+#define RADEON_EMIT_PP_TXFILTER_1                   14 /* tex1/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_1               15 /* tex1/1 */
+#define RADEON_EMIT_PP_TXFILTER_2                   16 /* tex2/6 */
+#define RADEON_EMIT_PP_BORDER_COLOR_2               17 /* tex2/1 */
+#define RADEON_EMIT_SE_ZBIAS_FACTOR                 18 /* zbias/2 */
+#define RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT           19 /* tcl/11 */
+#define RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED   20 /* material/17 */
+#define R200_EMIT_PP_TXCBLEND_0                     21 /* tex0/4 */
+#define R200_EMIT_PP_TXCBLEND_1                     22 /* tex1/4 */
+#define R200_EMIT_PP_TXCBLEND_2                     23 /* tex2/4 */
+#define R200_EMIT_PP_TXCBLEND_3                     24 /* tex3/4 */
+#define R200_EMIT_PP_TXCBLEND_4                     25 /* tex4/4 */
+#define R200_EMIT_PP_TXCBLEND_5                     26 /* tex5/4 */
+#define R200_EMIT_PP_TXCBLEND_6                     27 /* /4 */
+#define R200_EMIT_PP_TXCBLEND_7                     28 /* /4 */
+#define R200_EMIT_TCL_LIGHT_MODEL_CTL_0             29 /* tcl/6 */
+#define R200_EMIT_TFACTOR_0                         30 /* tf/6 */
+#define R200_EMIT_VTX_FMT_0                         31 /* vtx/4 */
+#define R200_EMIT_VAP_CTL                           32 /* vap/1 */
+#define R200_EMIT_MATRIX_SELECT_0                   33 /* msl/5 */
+#define R200_EMIT_TEX_PROC_CTL_2                    34 /* tcg/5 */
+#define R200_EMIT_TCL_UCP_VERT_BLEND_CTL            35 /* tcl/1 */
+#define R200_EMIT_PP_TXFILTER_0                     36 /* tex0/6 */
+#define R200_EMIT_PP_TXFILTER_1                     37 /* tex1/6 */
+#define R200_EMIT_PP_TXFILTER_2                     38 /* tex2/6 */
+#define R200_EMIT_PP_TXFILTER_3                     39 /* tex3/6 */
+#define R200_EMIT_PP_TXFILTER_4                     40 /* tex4/6 */
+#define R200_EMIT_PP_TXFILTER_5                     41 /* tex5/6 */
+#define R200_EMIT_PP_TXOFFSET_0                     42 /* tex0/1 */
+#define R200_EMIT_PP_TXOFFSET_1                     43 /* tex1/1 */
+#define R200_EMIT_PP_TXOFFSET_2                     44 /* tex2/1 */
+#define R200_EMIT_PP_TXOFFSET_3                     45 /* tex3/1 */
+#define R200_EMIT_PP_TXOFFSET_4                     46 /* tex4/1 */
+#define R200_EMIT_PP_TXOFFSET_5                     47 /* tex5/1 */
+#define R200_EMIT_VTE_CNTL                          48 /* vte/1 */
+#define R200_EMIT_OUTPUT_VTX_COMP_SEL               49 /* vtx/1 */
+#define R200_EMIT_PP_TAM_DEBUG3                     50 /* tam/1 */
+#define R200_EMIT_PP_CNTL_X                         51 /* cst/1 */
+#define R200_EMIT_RB3D_DEPTHXY_OFFSET               52 /* cst/1 */
+#define R200_EMIT_RE_AUX_SCISSOR_CNTL               53 /* cst/1 */
+#define R200_EMIT_RE_SCISSOR_TL_0                   54 /* cst/2 */
+#define R200_EMIT_RE_SCISSOR_TL_1                   55 /* cst/2 */
+#define R200_EMIT_RE_SCISSOR_TL_2                   56 /* cst/2 */
+#define R200_EMIT_SE_VAP_CNTL_STATUS                57 /* cst/1 */
+#define R200_EMIT_SE_VTX_STATE_CNTL                 58 /* cst/1 */
+#define R200_EMIT_RE_POINTSIZE                      59 /* cst/1 */
+#define R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0       60 /* cst/4 */
+#define R200_EMIT_PP_CUBIC_FACES_0                  61
+#define R200_EMIT_PP_CUBIC_OFFSETS_0                62
+#define R200_EMIT_PP_CUBIC_FACES_1                  63
+#define R200_EMIT_PP_CUBIC_OFFSETS_1                64
+#define R200_EMIT_PP_CUBIC_FACES_2                  65
+#define R200_EMIT_PP_CUBIC_OFFSETS_2                66
+#define R200_EMIT_PP_CUBIC_FACES_3                  67
+#define R200_EMIT_PP_CUBIC_OFFSETS_3                68
+#define R200_EMIT_PP_CUBIC_FACES_4                  69
+#define R200_EMIT_PP_CUBIC_OFFSETS_4                70
+#define R200_EMIT_PP_CUBIC_FACES_5                  71
+#define R200_EMIT_PP_CUBIC_OFFSETS_5                72
+#define RADEON_EMIT_PP_TEX_SIZE_0                   73
+#define RADEON_EMIT_PP_TEX_SIZE_1                   74
+#define RADEON_EMIT_PP_TEX_SIZE_2                   75
+#define RADEON_MAX_STATE_PACKETS                    76
+
+
+/* Commands understood by cmd_buffer ioctl.  More can be added but
+ * obviously these can't be removed or changed:
  */
+#define RADEON_CMD_PACKET      1 /* emit one of the register packets above */
+#define RADEON_CMD_SCALARS     2 /* emit scalar data */
+#define RADEON_CMD_VECTORS     3 /* emit vector data */
+#define RADEON_CMD_DMA_DISCARD 4 /* discard current dma buf */
+#define RADEON_CMD_PACKET3     5 /* emit hw packet */
+#define RADEON_CMD_PACKET3_CLIP 6 /* emit hw packet wrapped in cliprects */
+#define RADEON_CMD_SCALARS2     7 /* R200 stopgap */
+#define RADEON_CMD_WAIT         8 /* synchronization */
+
 typedef union {
-	/** \brief integer equivalent */
 	int i;
-
 	struct { 
 	   unsigned char cmd_type, pad0, pad1, pad2;
 	} header;
-
-	/** \brief emit a register packet */
 	struct { 
 	   unsigned char cmd_type, packet_id, pad0, pad1;
 	} packet;
-	
-	/** \brief scalar data */
 	struct { 
 	   unsigned char cmd_type, offset, stride, count; 
 	} scalars;
-	
-	/** \brief vector data */
 	struct { 
 	   unsigned char cmd_type, offset, stride, count; 
 	} vectors;
-	
-	/** \brief discard current DMA buffer */
 	struct { 
 	   unsigned char cmd_type, buf_idx, pad0, pad1; 
 	} dma;
-	
-	/** \brief synchronization */
 	struct { 
 	   unsigned char cmd_type, flags, pad0, pad1; 
 	} wait;
@@ -458,12 +398,10 @@ typedef union {
 #define RADEON_WAIT_2D  0x1
 #define RADEON_WAIT_3D  0x2
 
-/**
- * \brief DRM_RADEON_GETPARAM ioctl argument type.
- */
+
 typedef struct drm_radeon_getparam {
-	int param;  /**< \brief parameter number */
-	int *value; /**< \brief parameter value */
+	int param;
+	void *value;
 } drmRadeonGetParam;
 
 #define RADEON_PARAM_AGP_BUFFER_OFFSET 1
@@ -472,10 +410,6 @@ typedef struct drm_radeon_getparam {
 #define RADEON_PARAM_LAST_CLEAR        4
 #define RADEON_PARAM_IRQ_NR            5
 #define RADEON_PARAM_AGP_BASE          6
-#define RADEON_PARAM_REGISTER_HANDLE   7 
-#define RADEON_PARAM_STATUS_HANDLE     8
-#define RADEON_PARAM_SAREA_HANDLE      9
-#define RADEON_PARAM_AGP_TEX_HANDLE    10
 
 
 #define RADEON_MEM_REGION_AGP 1
@@ -493,29 +427,18 @@ typedef struct drm_radeon_mem_free {
 	int region_offset;
 } drmRadeonMemFree;
 
-/**
- * \brief DRM_RADEON_INIT_HEAP argument type.
- */
 typedef struct drm_radeon_mem_init_heap {
-	int region; /**< \brief region type */
-	int size;   /**< \brief region size */
-	int start;  /**< \brief region start offset */
+	int region;
+	int size;
+	int start;	
 } drmRadeonMemInitHeap;
 
-/**
- * \brief DRM_RADEON_IRQ_EMIT ioctl argument type.
- *
- * New in DRM 1.6: userspace can request and wait on IRQ's.
+/* 1.6: Userspace can request & wait on irq's:
  */
 typedef struct drm_radeon_irq_emit {
 	int *irq_seq;
 } drmRadeonIrqEmit;
 
-/**
- * \brief DRM_RADEON_IRQ_WAIT ioctl argument type.
- *
- * New in DRM 1.6: userspace can request and wait on IRQ's.
- */
 typedef struct drm_radeon_irq_wait {
 	int irq_seq;
 } drmRadeonIrqWait;
diff --git a/src/mesa/drivers/dri/radeon/server/radeon_dri.c b/src/mesa/drivers/dri/radeon/server/radeon_dri.c
index f14bd13a95..4271aa7da2 100644
--- a/src/mesa/drivers/dri/radeon/server/radeon_dri.c
+++ b/src/mesa/drivers/dri/radeon/server/radeon_dri.c
@@ -12,6 +12,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <unistd.h>
 
 #include "driver.h"
 #include "drm.h"
@@ -23,9 +24,6 @@
 #include "radeon_sarea.h"
 #include "sarea.h"
 
-#include <unistd.h>
-
-
 
 /* HACK - for now, put this here... */
 /* Alpha - this may need to be a variable to handle UP1x00 vs TITAN */
@@ -735,7 +733,7 @@ static int RADEONMemoryInit( const DRIDriverContext *ctx, RADEONInfoPtr info )
  * Setups a RADEONDRIRec structure to be passed to radeon_dri.so for its
  * initialization.
  */
-static int RADEONScreenInit( DRIDriverContext *ctx, RADEONInfoPtr info )
+static int RADEONScreenInit( const DRIDriverContext *ctx, RADEONInfoPtr info )
 {
    RADEONDRIPtr   pRADEONDRI;
    int err;
diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
index 2cd9dbe094..5570a43945 100644
--- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h
+++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
@@ -1,24 +1,4 @@
-/**
- * \file server/radeon_reg.h
- * \brief Registers and register definitions for the Radeon.
- * 
- * \authors Kevin E. Martin <martin@xfree86.org>
- * \authors Rickard E. Faith <faith@valinux.com>
- * \authors Alan Hourihane <alanh@fairlite.demon.co.uk>
- *
- * \par References
- *
- * - RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
- *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
- *   1999.
- * - RAGE 128 Software Development Manual (Technical Reference Manual P/N
- *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
- *
- * \note !!!! FIXME !!!! THIS FILE HAS BEEN CONVERTED FROM r128_reg.h
- * AND CONTAINS REGISTERS AND REGISTER DEFINITIONS THAT ARE NOT CORRECT
- * ON THE RADEON.  A FULL AUDIT OF THIS CODE IS NEEDED!
- */
-
+/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_reg.h,v 1.25 2003/02/07 18:08:59 martin Exp $ */
 /*
  * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
  *                VA Linux Systems Inc., Fremont, California.
@@ -47,7 +27,28 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_reg.h,v 1.20 2002/10/12 01:38:07 martin Exp $ */
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@xfree86.org>
+ *   Rickard E. Faith <faith@valinux.com>
+ *   Alan Hourihane <alanh@fairlite.demon.co.uk>
+ *
+ * References:
+ *
+ * !!!! FIXME !!!!
+ *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
+ *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
+ *   1999.
+ *
+ * !!!! FIXME !!!!
+ *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
+ *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
+ *
+ */
+
+/* !!!! FIXME !!!!  NOTE: THIS FILE HAS BEEN CONVERTED FROM r128_reg.h
+ * AND CONTAINS REGISTERS AND REGISTER DEFINITIONS THAT ARE NOT CORRECT
+ * ON THE RADEON.  A FULL AUDIT OF THIS CODE IS NEEDED!  */
 
 #ifndef _RADEON_REG_H_
 #define _RADEON_REG_H_
@@ -216,6 +217,10 @@
 #define RADEON_CONFIG_APER_SIZE             0x0108
 #define RADEON_CONFIG_BONDS                 0x00e8
 #define RADEON_CONFIG_CNTL                  0x00e0
+#       define RADEON_CFG_ATI_REV_A11       (0   << 16)
+#       define RADEON_CFG_ATI_REV_A12       (1   << 16)
+#       define RADEON_CFG_ATI_REV_A13       (2   << 16)
+#       define RADEON_CFG_ATI_REV_ID_MASK   (0xf << 16)
 #define RADEON_CONFIG_MEMSIZE               0x00f8
 #define RADEON_CONFIG_MEMSIZE_EMBEDDED      0x0114
 #define RADEON_CONFIG_REG_1_BASE            0x010c
@@ -302,6 +307,10 @@
 #define RADEON_CRTC2_PITCH                  0x032c
 #define RADEON_CRTC_STATUS                  0x005c
 #       define RADEON_CRTC_VBLANK_SAVE      (1 <<  1)
+#       define RADEON_CRTC_VBLANK_SAVE_CLEAR  (1 <<  1)
+#define RADEON_CRTC2_STATUS                  0x03fc
+#       define RADEON_CRTC2_VBLANK_SAVE      (1 <<  1)
+#       define RADEON_CRTC2_VBLANK_SAVE_CLEAR  (1 <<  1)
 #define RADEON_CRTC_V_SYNC_STRT_WID         0x020c
 #       define RADEON_CRTC_V_SYNC_STRT        (0x7ff <<  0)
 #       define RADEON_CRTC_V_SYNC_STRT_SHIFT  0
@@ -492,6 +501,7 @@
 #define RADEON_DST_LINE_START               0x1600
 #define RADEON_DST_LINE_END                 0x1604
 #define RADEON_DST_LINE_PATCOUNT            0x1608
+#       define RADEON_BRES_CNTL_SHIFT       8
 #define RADEON_DST_OFFSET                   0x1404
 #define RADEON_DST_PITCH                    0x1408
 #define RADEON_DST_PITCH_OFFSET             0x142c
@@ -554,6 +564,7 @@
 #define RADEON_FP_GEN_CNTL                  0x0284
 #       define RADEON_FP_FPON                  (1 <<  0)
 #       define RADEON_FP_TMDS_EN               (1 <<  2)
+#       define RADEON_FP_PANEL_FORMAT          (1 <<  3)
 #       define RADEON_FP_EN_TMDS               (1 <<  7)
 #       define RADEON_FP_DETECT_SENSE          (1 <<  8)
 #       define RADEON_FP_SEL_CRTC2             (1 << 13)
@@ -612,6 +623,8 @@
 #define RADEON_GEN_INT_STATUS               0x0044
 #       define RADEON_VSYNC_INT_AK          (1 <<  2)
 #       define RADEON_VSYNC_INT             (1 <<  2)
+#       define RADEON_VSYNC2_INT_AK         (1 <<  6)
+#       define RADEON_VSYNC2_INT            (1 <<  6)
 #define RADEON_GENENB                       0x03c3 /* VGA */
 #define RADEON_GENFC_RD                     0x03ca /* VGA */
 #define RADEON_GENFC_WT                     0x03da /* VGA, 0x03ba */
@@ -708,6 +721,9 @@
 #define RADEON_MM_DATA                      0x0004
 #define RADEON_MM_INDEX                     0x0000
 #define RADEON_MPLL_CNTL                    0x000e /* PLL */
+#define RADEON_MPP_TB_CONFIG                0x01c0 /* ? */
+#define RADEON_MPP_GP_CONFIG                0x01c8 /* ? */
+
 
 #define RADEON_N_VIF_COUNT                  0x0248
 
@@ -863,6 +879,8 @@
 #       define RADEON_P2PLL_REF_DIV_MASK    0x03ff
 #       define RADEON_P2PLL_ATOMIC_UPDATE_R (1 << 15) /* same as _W */
 #       define RADEON_P2PLL_ATOMIC_UPDATE_W (1 << 15) /* same as _R */
+#       define R300_PPLL_REF_DIV_ACC_MASK   (0x3ff < 18)
+#       define R300_PPLL_REF_DIV_ACC_SHIFT  18
 #define RADEON_PALETTE_DATA                 0x00b4
 #define RADEON_PALETTE_30_DATA              0x00b8
 #define RADEON_PALETTE_INDEX                0x00b0
@@ -1122,6 +1140,10 @@
 #       define RADEON_LOD_BIAS_SHIFT                       8
 #       define RADEON_MAX_MIP_LEVEL_MASK                   (0x0f << 16)
 #       define RADEON_MAX_MIP_LEVEL_SHIFT                  16
+#       define RADEON_YUV_TO_RGB                           (1  << 20)
+#       define RADEON_YUV_TEMPERATURE_COOL                 (0  << 21)
+#       define RADEON_YUV_TEMPERATURE_HOT                  (1  << 21)
+#       define RADEON_YUV_TEMPERATURE_MASK                 (1  << 21)
 #       define RADEON_WRAPEN_S                             (1  << 22)
 #       define RADEON_CLAMP_S_WRAP                         (0  << 23)
 #       define RADEON_CLAMP_S_MIRROR                       (1  << 23)
@@ -1129,6 +1151,8 @@
 #       define RADEON_CLAMP_S_MIRROR_CLAMP_LAST            (3  << 23)
 #       define RADEON_CLAMP_S_CLAMP_BORDER                 (4  << 23)
 #       define RADEON_CLAMP_S_MIRROR_CLAMP_BORDER          (5  << 23)
+#       define RADEON_CLAMP_S_CLAMP_GL                     (6  << 23)
+#       define RADEON_CLAMP_S_MIRROR_CLAMP_GL              (7  << 23)
 #       define RADEON_CLAMP_S_MASK                         (7  << 23)
 #       define RADEON_WRAPEN_T                             (1  << 26)
 #       define RADEON_CLAMP_T_WRAP                         (0  << 27)
@@ -1137,6 +1161,8 @@
 #       define RADEON_CLAMP_T_MIRROR_CLAMP_LAST            (3  << 27)
 #       define RADEON_CLAMP_T_CLAMP_BORDER                 (4  << 27)
 #       define RADEON_CLAMP_T_MIRROR_CLAMP_BORDER          (5  << 27)
+#       define RADEON_CLAMP_T_CLAMP_GL                     (6  << 27)
+#       define RADEON_CLAMP_T_MIRROR_CLAMP_GL              (7  << 27)
 #       define RADEON_CLAMP_T_MASK                         (7  << 27)
 #       define RADEON_BORDER_MODE_OGL                      (0  << 31)
 #       define RADEON_BORDER_MODE_D3D                      (1  << 31)
@@ -1152,6 +1178,11 @@
 #       define RADEON_TXFORMAT_ARGB8888           (6  <<  0)
 #       define RADEON_TXFORMAT_RGBA8888           (7  <<  0)
 #       define RADEON_TXFORMAT_Y8                 (8  <<  0)
+#       define RADEON_TXFORMAT_VYUY422            (10 <<  0)
+#       define RADEON_TXFORMAT_YVYU422            (11 <<  0)
+#       define RADEON_TXFORMAT_DXT1               (12 <<  0)
+#       define RADEON_TXFORMAT_DXT23              (14 <<  0)
+#       define RADEON_TXFORMAT_DXT45              (15 <<  0)
 #       define RADEON_TXFORMAT_FORMAT_MASK        (31 <<  0)
 #       define RADEON_TXFORMAT_FORMAT_SHIFT       0
 #       define RADEON_TXFORMAT_APPLE_YUV_MODE     (1  <<  5)
@@ -1161,6 +1192,10 @@
 #       define RADEON_TXFORMAT_WIDTH_SHIFT        8
 #       define RADEON_TXFORMAT_HEIGHT_MASK        (15 << 12)
 #       define RADEON_TXFORMAT_HEIGHT_SHIFT       12
+#       define RADEON_TXFORMAT_F5_WIDTH_MASK      (15 << 16)
+#       define RADEON_TXFORMAT_F5_WIDTH_SHIFT     16
+#       define RADEON_TXFORMAT_F5_HEIGHT_MASK     (15 << 20)
+#       define RADEON_TXFORMAT_F5_HEIGHT_SHIFT    20
 #       define RADEON_TXFORMAT_ST_ROUTE_STQ0      (0  << 24)
 #       define RADEON_TXFORMAT_ST_ROUTE_MASK      (3  << 24)
 #       define RADEON_TXFORMAT_ST_ROUTE_STQ1      (1  << 24)
@@ -1173,6 +1208,26 @@
 #       define RADEON_TXFORMAT_CHROMA_KEY_ENABLE  (1  << 29)
 #       define RADEON_TXFORMAT_CUBIC_MAP_ENABLE   (1  << 30)
 #       define RADEON_TXFORMAT_PERSPECTIVE_ENABLE (1  << 31)
+#define RADEON_PP_CUBIC_FACES_0             0x1d24
+#define RADEON_PP_CUBIC_FACES_1             0x1d28
+#define RADEON_PP_CUBIC_FACES_2             0x1d2c
+#       define RADEON_FACE_WIDTH_1_SHIFT          0
+#       define RADEON_FACE_HEIGHT_1_SHIFT         4
+#       define RADEON_FACE_WIDTH_1_MASK           (0xf << 0)
+#       define RADEON_FACE_HEIGHT_1_MASK          (0xf << 4)
+#       define RADEON_FACE_WIDTH_2_SHIFT          8
+#       define RADEON_FACE_HEIGHT_2_SHIFT         12
+#       define RADEON_FACE_WIDTH_2_MASK           (0xf << 8)
+#       define RADEON_FACE_HEIGHT_2_MASK          (0xf << 12)
+#       define RADEON_FACE_WIDTH_3_SHIFT          16
+#       define RADEON_FACE_HEIGHT_3_SHIFT         20
+#       define RADEON_FACE_WIDTH_3_MASK           (0xf << 16)
+#       define RADEON_FACE_HEIGHT_3_MASK          (0xf << 20)
+#       define RADEON_FACE_WIDTH_4_SHIFT          24
+#       define RADEON_FACE_HEIGHT_4_SHIFT         28
+#       define RADEON_FACE_WIDTH_4_MASK           (0xf << 24)
+#       define RADEON_FACE_HEIGHT_4_MASK          (0xf << 28)
+
 #define RADEON_PP_TXOFFSET_0                0x1c5c
 #define RADEON_PP_TXOFFSET_1                0x1c74
 #define RADEON_PP_TXOFFSET_2                0x1c8c
@@ -1187,6 +1242,39 @@
 #       define RADEON_TXO_MICRO_TILE_OPT     (2 << 3)
 #       define RADEON_TXO_OFFSET_MASK        0xffffffe0
 #       define RADEON_TXO_OFFSET_SHIFT       5
+
+#define RADEON_PP_CUBIC_OFFSET_T0_0         0x1dd0  /* bits [31:5] */
+#define RADEON_PP_CUBIC_OFFSET_T0_1         0x1dd4
+#define RADEON_PP_CUBIC_OFFSET_T0_2         0x1dd8
+#define RADEON_PP_CUBIC_OFFSET_T0_3         0x1ddc
+#define RADEON_PP_CUBIC_OFFSET_T0_4         0x1de0
+#define RADEON_PP_CUBIC_OFFSET_T1_0         0x1e00
+#define RADEON_PP_CUBIC_OFFSET_T1_1         0x1e04
+#define RADEON_PP_CUBIC_OFFSET_T1_2         0x1e08
+#define RADEON_PP_CUBIC_OFFSET_T1_3         0x1e0c
+#define RADEON_PP_CUBIC_OFFSET_T1_4         0x1e10
+#define RADEON_PP_CUBIC_OFFSET_T2_0         0x1e14
+#define RADEON_PP_CUBIC_OFFSET_T2_1         0x1e18
+#define RADEON_PP_CUBIC_OFFSET_T2_2         0x1e1c
+#define RADEON_PP_CUBIC_OFFSET_T2_3         0x1e20
+#define RADEON_PP_CUBIC_OFFSET_T2_4         0x1e24
+
+#define RADEON_PP_TEX_SIZE_0                0x1d04  /* NPOT */
+#define RADEON_PP_TEX_SIZE_1                0x1d0c
+#define RADEON_PP_TEX_SIZE_2                0x1d14
+#       define RADEON_TEX_USIZE_MASK        (0x7ff << 0)
+#       define RADEON_TEX_USIZE_SHIFT       0
+#       define RADEON_TEX_VSIZE_MASK        (0x7ff << 16)
+#       define RADEON_TEX_VSIZE_SHIFT       16
+#       define RADEON_SIGNED_RGB_MASK       (1 << 30)
+#       define RADEON_SIGNED_RGB_SHIFT      30
+#       define RADEON_SIGNED_ALPHA_MASK     (1 << 31)
+#       define RADEON_SIGNED_ALPHA_SHIFT    31
+#define RADEON_PP_TEX_PITCH_0               0x1d08  /* NPOT */
+#define RADEON_PP_TEX_PITCH_1               0x1d10  /* NPOT */
+#define RADEON_PP_TEX_PITCH_2               0x1d18  /* NPOT */
+/* note: bits 13-5: 32 byte aligned stride of texture map */
+
 #define RADEON_PP_TXCBLEND_0                0x1c60
 #define RADEON_PP_TXCBLEND_1                0x1c78
 #define RADEON_PP_TXCBLEND_2                0x1c90
diff --git a/src/mesa/drivers/dri/radeon/server/radeon_sarea.h b/src/mesa/drivers/dri/radeon/server/radeon_sarea.h
index f682bb6b6a..81e4325d7a 100644
--- a/src/mesa/drivers/dri/radeon/server/radeon_sarea.h
+++ b/src/mesa/drivers/dri/radeon/server/radeon_sarea.h
@@ -296,9 +296,9 @@ typedef struct {
     /** \brief Texture regions. 
      * Last element is sentinal
      */
-    radeon_tex_region_t texList[RADEON_NR_TEX_HEAPS][RADEON_NR_TEX_REGIONS+1];
+    drmTextureRegion texList[RADEON_NR_TEX_HEAPS][RADEON_NR_TEX_REGIONS+1];
     /** \brief last time texture was uploaded */
-    int texAge[RADEON_NR_TEX_HEAPS];
+    unsigned int texAge[RADEON_NR_TEX_HEAPS];
     /*@}*/
 
     int ctxOwner;		/**< \brief last context to upload state */
-- 
cgit v1.2.3