From aa93b4ced05708c20d095fdd905ebd6cd9f634cd Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 23 Apr 2006 05:54:06 +0000 Subject: Enable USER_BUFFERS, RADEON_VTXFMT_A and HW_VBOS on big endian by forcing all elts to 32 bits (it would be possible to do a half word swap for 16 bits if somebody is interested) and by using the input route mecanism to swap ubyte's properly in r300EmitArrays. Tested with cva, varray and quake3 --- src/mesa/drivers/dri/r300/r300_context.c | 2 + src/mesa/drivers/dri/r300/r300_context.h | 6 ++- src/mesa/drivers/dri/r300/r300_maos.c | 83 +++++++++++++++++------------ src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c | 53 +++++++++++++++--- 4 files changed, 101 insertions(+), 43 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 21a474bf67..2dd6a6c335 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -384,6 +384,7 @@ static void r300FreeGartAllocations(r300ContextPtr r300) resize_u_list(r300); #endif +#ifdef USER_BUFFERS for (i = r300->rmm->u_last + 1; i > 0; i--) { if (r300->rmm->u_list[i].ptr == NULL) { continue; @@ -417,6 +418,7 @@ static void r300FreeGartAllocations(r300ContextPtr r300) } } r300->rmm->u_head = i; +#endif /* USER_BUFFERS */ } /* Destroy the device specific context. diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 58f789eeae..65bf9964df 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -47,11 +47,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "colormac.h" #include "radeon_context.h" -/* PPC doesnt support 16 bit elts ... */ -#ifndef MESA_BIG_ENDIAN #define USER_BUFFERS #define RADEON_VTXFMT_A #define HW_VBOS + +/* We don't handle 16 bits elts swapping yet */ +#ifdef MESA_BIG_ENDIAN +#define FORCE_32BITS_ELTS #endif //#define OPTIMIZE_ELTS diff --git a/src/mesa/drivers/dri/r300/r300_maos.c b/src/mesa/drivers/dri/r300/r300_maos.c index 290ffb44ea..6b8365e6d9 100644 --- a/src/mesa/drivers/dri/r300/r300_maos.c +++ b/src/mesa/drivers/dri/r300/r300_maos.c @@ -252,6 +252,43 @@ void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, int elt_siz memcpy(out, elts, n_elts * elt_size); } + /* Mesa assumes that all missing components are from (0, 0, 0, 1) */ +#define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<> R300_INPUT_ROUTE_X_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_W_SHIFT; + dw_temp |= ((dw >> R300_INPUT_ROUTE_Y_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_Z_SHIFT; + dw_temp |= ((dw >> R300_INPUT_ROUTE_Z_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_Y_SHIFT; + dw_temp |= ((dw >> R300_INPUT_ROUTE_W_SHIFT) & R300_INPUT_ROUTE_SELECT_MASK) << R300_INPUT_ROUTE_X_SHIFT; + + return dw_temp; + } +#endif /* MESA_BIG_ENDIAN */ + return dw; + +} + /* Emit vertex data to GART memory (unless immediate mode) * Route inputs to the vertex processor */ @@ -264,7 +301,7 @@ void r300EmitArrays(GLcontext * ctx, GLboolean immd) //struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; GLuint nr = 0; GLuint count = VB->Count; - GLuint dw,mask; + GLuint dw; GLuint vic_1 = 0; /* R300_VAP_INPUT_CNTL_1 */ GLuint aa_vap_reg = 0; /* VAP register assignment */ GLuint i; @@ -469,17 +506,6 @@ void r300EmitArrays(GLcontext * ctx, GLboolean immd) ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->packet0.count = (nr+1)>>1; - /* Mesa assumes that all missing components are from (0, 0, 0, 1) */ -#define ALL_COMPONENTS ((R300_INPUT_ROUTE_SELECT_X<state.aos[i].aos_size=/*3*/4; /* XXX */ } - - for(i=0;i+1state.aos[i].aos_size*3))-1; - dw=(ALL_COMPONENTS & mask) - | (ALL_DEFAULT & ~mask) - | R300_INPUT_ROUTE_ENABLE; - + dw = fix_comps(t_comps(r300->state.aos[i].aos_size), r300->state.aos[i].aos_format) | R300_INPUT_ROUTE_ENABLE; /* i+1 */ - mask=(1<<(r300->state.aos[i+1].aos_size*3))-1; - dw|=( - (ALL_COMPONENTS & mask) - | (ALL_DEFAULT & ~mask) - | R300_INPUT_ROUTE_ENABLE - )<<16; - + dw |= (fix_comps(t_comps(r300->state.aos[i+1].aos_size), r300->state.aos[i+1].aos_format) | R300_INPUT_ROUTE_ENABLE) << 16; + //fprintf(stderr, "vir1 dw=%08x\n", dw); r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(i>>1)]=dw; - } - if(nr & 1){ - mask=(1<<(r300->state.aos[nr-1].aos_size*3))-1; - dw=(ALL_COMPONENTS & mask) - | (ALL_DEFAULT & ~mask) - | R300_INPUT_ROUTE_ENABLE; - r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw; + } + if (nr & 1) { + dw = fix_comps(t_comps(r300->state.aos[nr-1].aos_size), r300->state.aos[nr-1].aos_format) | R300_INPUT_ROUTE_ENABLE; + //fprintf(stderr, "vir1 dw=%08x\n", dw); - } + r300->hw.vir[1].cmd[R300_VIR_CNTL_0+(nr>>1)]=dw; + } + /* Set the rest of INPUT_ROUTE_1 to 0 */ //for(i=((count+1)>>1); i<8; i++)r300->hw.vir[1].cmd[R300_VIR_CNTL_0+i]=0x0; ((drm_r300_cmd_header_t*)r300->hw.vir[1].cmd)->packet0.count = (nr+1)>>1; diff --git a/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c b/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c index 8f7620482f..941ff0078e 100644 --- a/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c +++ b/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c @@ -216,14 +216,22 @@ static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const G max = ((unsigned char *)indices)[i]; } +#ifdef FORCE_32BITS_ELTS + elt_size = 4; +#else elt_size = 2; - +#endif r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); rvb.aos_offset = GET_START(&rvb); ptr = rvb.address + rvb.start; +#ifdef FORCE_32BITS_ELTS + for (i=0; i < count; i++) + ((unsigned int *)ptr)[i] = ((unsigned char *)indices)[i] - min; +#else for (i=0; i < count; i++) ((unsigned short int *)ptr)[i] = ((unsigned char *)indices)[i] - min; +#endif break; case GL_UNSIGNED_SHORT: @@ -234,14 +242,23 @@ static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const G max = ((unsigned short int *)indices)[i]; } +#ifdef FORCE_32BITS_ELTS + elt_size = 4; +#else elt_size = 2; +#endif r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); rvb.aos_offset = GET_START(&rvb); ptr = rvb.address + rvb.start; +#ifdef FORCE_32BITS_ELTS + for (i=0; i < count; i++) + ((unsigned int *)ptr)[i] = ((unsigned short int *)indices)[i] - min; +#else for (i=0; i < count; i++) ((unsigned short int *)ptr)[i] = ((unsigned short int *)indices)[i] - min; +#endif break; case GL_UNSIGNED_INT: @@ -252,17 +269,20 @@ static void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const G max = ((unsigned int *)indices)[i]; } +#ifdef FORCE_32BITS_ELTS + elt_size = 4; +#else if (max - min <= 65535) elt_size = 2; else elt_size = 4; - +#endif r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); rvb.aos_offset = GET_START(&rvb); ptr = rvb.address + rvb.start; - if (max - min <= 65535) + if (elt_size == 2) for (i=0; i < count; i++) ((unsigned short int *)ptr)[i] = ((unsigned int *)indices)[i] - min; else @@ -388,19 +408,30 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei memset(&rvb, 0, sizeof(rvb)); switch (type){ case GL_UNSIGNED_BYTE: +#ifdef FORCE_32BITS_ELTS + elt_size = 4; +#else elt_size = 2; - +#endif r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); rvb.aos_offset = GET_START(&rvb); ptr = rvb.address + rvb.start; +#ifdef FORCE_32BITS_ELTS + for(i=0; i < count; i++) + ((unsigned int *)ptr)[i] = ((unsigned char *)indices)[i] - min; +#else for(i=0; i < count; i++) ((unsigned short int *)ptr)[i] = ((unsigned char *)indices)[i] - min; +#endif break; case GL_UNSIGNED_SHORT: +#ifdef FORCE_32BITS_ELTS + elt_size = 4; +#else elt_size = 2; - +#endif #ifdef OPTIMIZE_ELTS if (min == 0 && ctx->Array.ElementArrayBufferObj->Name){ ptr = indices; @@ -411,21 +442,29 @@ static void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei rvb.aos_offset = GET_START(&rvb); ptr = rvb.address + rvb.start; +#ifdef FORCE_32BITS_ELTS + for(i=0; i < count; i++) + ((unsigned int *)ptr)[i] = ((unsigned short int *)indices)[i] - min; +#else for(i=0; i < count; i++) ((unsigned short int *)ptr)[i] = ((unsigned short int *)indices)[i] - min; +#endif break; case GL_UNSIGNED_INT: +#ifdef FORCE_32BITS_ELTS + elt_size = 4; +#else if (max - min <= 65535) elt_size = 2; else elt_size = 4; - +#endif r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); rvb.aos_offset = GET_START(&rvb); ptr = rvb.address + rvb.start; - if (max - min <= 65535) + if (elt_size == 2) for (i=0; i < count; i++) ((unsigned short int *)ptr)[i] = ((unsigned int *)indices)[i] - min; else -- cgit v1.2.3