diff options
Diffstat (limited to 'src/mesa/shader/slang/slang_execute_x86.c')
-rw-r--r-- | src/mesa/shader/slang/slang_execute_x86.c | 1114 |
1 files changed, 560 insertions, 554 deletions
diff --git a/src/mesa/shader/slang/slang_execute_x86.c b/src/mesa/shader/slang/slang_execute_x86.c index 3e21edff6a..958086ff07 100644 --- a/src/mesa/shader/slang/slang_execute_x86.c +++ b/src/mesa/shader/slang/slang_execute_x86.c @@ -40,40 +40,42 @@ typedef struct { - GLuint index; - GLubyte *csr; + GLuint index; + GLubyte *csr; } fixup; typedef struct { - struct x86_function f; - struct x86_reg r_eax; - struct x86_reg r_ecx; - struct x86_reg r_edx; + struct x86_function f; + struct x86_reg r_eax; + struct x86_reg r_ecx; + struct x86_reg r_edx; struct x86_reg r_ebx; - struct x86_reg r_esp; - struct x86_reg r_ebp; - struct x86_reg r_st0; - struct x86_reg r_st1; - struct x86_reg r_st2; - struct x86_reg r_st3; + struct x86_reg r_esp; + struct x86_reg r_ebp; + struct x86_reg r_st0; + struct x86_reg r_st1; + struct x86_reg r_st2; + struct x86_reg r_st3; struct x86_reg r_st4; - fixup *fixups; - GLuint fixup_count; - GLubyte **labels; - slang_machine *mach; - GLubyte *l_discard; - GLubyte *l_exit; - GLshort fpucntl; + fixup *fixups; + GLuint fixup_count; + GLubyte **labels; + slang_machine *mach; + GLubyte *l_discard; + GLubyte *l_exit; + GLshort fpucntl; } codegen_ctx; -static GLvoid add_fixup (codegen_ctx *G, GLuint index, GLubyte *csr) +static GLvoid +add_fixup(codegen_ctx * G, GLuint index, GLubyte * csr) { - G->fixups = (fixup *) slang_alloc_realloc (G->fixups, G->fixup_count * sizeof (fixup), - (G->fixup_count + 1) * sizeof (fixup)); - G->fixups[G->fixup_count].index = index; - G->fixups[G->fixup_count].csr = csr; - G->fixup_count++; + G->fixups = + (fixup *) slang_alloc_realloc(G->fixups, G->fixup_count * sizeof(fixup), + (G->fixup_count + 1) * sizeof(fixup)); + G->fixups[G->fixup_count].index = index; + G->fixups[G->fixup_count].csr = csr; + G->fixup_count++; } #ifdef NO_FAST_MATH @@ -88,661 +90,665 @@ static GLvoid add_fixup (codegen_ctx *G, GLuint index, GLubyte *csr) /* * XXX - * These should produce a valid code that computes powers. Unfortunately, it does not. + * These should produce a valid code that computes powers. + * Unfortunately, it does not. */ -static void set_fpu_round_neg_inf (codegen_ctx *G) +static void +set_fpu_round_neg_inf(codegen_ctx * G) { - if (G->fpucntl != RND_NEG_FPU) - { - G->fpucntl = RND_NEG_FPU; - x87_fnclex (&G->f); - x86_mov_reg_imm (&G->f, G->r_eax, (GLint) &G->mach->x86.fpucntl_rnd_neg); - x87_fldcw (&G->f, x86_deref (G->r_eax)); - } + if (G->fpucntl != RND_NEG_FPU) { + G->fpucntl = RND_NEG_FPU; + x87_fnclex(&G->f); + x86_mov_reg_imm(&G->f, G->r_eax, + (GLint) & G->mach->x86.fpucntl_rnd_neg); + x87_fldcw(&G->f, x86_deref(G->r_eax)); + } } -static void emit_x87_ex2 (codegen_ctx *G) +static void +emit_x87_ex2(codegen_ctx * G) { - set_fpu_round_neg_inf (G); - - x87_fld (&G->f, G->r_st0); /* a a */ - x87_fprndint (&G->f); /* int(a) a */ - x87_fld (&G->f, G->r_st0); /* int(a) int(a) a */ - x87_fstp (&G->f, G->r_st3); /* int(a) a int(a)*/ - x87_fsubp (&G->f, G->r_st1);/* frac(a) int(a) */ - x87_f2xm1 (&G->f); /* (2^frac(a))-1 int(a)*/ - x87_fld1 (&G->f); /* 1 (2^frac(a))-1 int(a)*/ - x87_faddp (&G->f, G->r_st1);/* 2^frac(a) int(a) */ - x87_fscale (&G->f); /* 2^a */ + set_fpu_round_neg_inf(G); + + x87_fld(&G->f, G->r_st0); /* a a */ + x87_fprndint(&G->f); /* int(a) a */ + x87_fld(&G->f, G->r_st0); /* int(a) int(a) a */ + x87_fstp(&G->f, G->r_st3); /* int(a) a int(a) */ + x87_fsubp(&G->f, G->r_st1); /* frac(a) int(a) */ + x87_f2xm1(&G->f); /* (2^frac(a))-1 int(a) */ + x87_fld1(&G->f); /* 1 (2^frac(a))-1 int(a) */ + x87_faddp(&G->f, G->r_st1); /* 2^frac(a) int(a) */ + x87_fscale(&G->f); /* 2^a */ } -static void emit_pow (codegen_ctx *G) +static void +emit_pow(codegen_ctx * G) { - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); - x87_fyl2x (&G->f); - emit_x87_ex2 (G); + x87_fld(&G->f, x86_deref(G->r_esp)); + x87_fld(&G->f, x86_make_disp(G->r_esp, 4)); + x87_fyl2x(&G->f); + emit_x87_ex2(G); } #endif -static GLfloat do_ceilf (GLfloat x) +static GLfloat +do_ceilf(GLfloat x) { - return CEILF (x); + return CEILF(x); } -static GLfloat do_floorf (GLfloat x) +static GLfloat +do_floorf(GLfloat x) { - return FLOORF (x); + return FLOORF(x); } static GLfloat -do_ftoi (GLfloat x) +do_ftoi(GLfloat x) { return (GLfloat) ((GLint) (x)); } -static GLfloat do_powf (GLfloat y, GLfloat x) +static GLfloat +do_powf(GLfloat y, GLfloat x) { - return (GLfloat) _mesa_pow ((GLdouble) x, (GLdouble) y); + return (GLfloat) _mesa_pow((GLdouble) x, (GLdouble) y); } static GLvoid -ensure_infolog_created (slang_info_log **infolog) +ensure_infolog_created(slang_info_log ** infolog) { if (*infolog == NULL) { - *infolog = slang_alloc_malloc (sizeof (slang_info_log)); + *infolog = slang_alloc_malloc(sizeof(slang_info_log)); if (*infolog == NULL) return; - slang_info_log_construct (*infolog); + slang_info_log_construct(*infolog); } } -static GLvoid do_print_float (slang_info_log **infolog, GLfloat x) +static GLvoid +do_print_float(slang_info_log ** infolog, GLfloat x) { - _mesa_printf ("slang print: %f\n", x); - ensure_infolog_created (infolog); - slang_info_log_print (*infolog, "%f", x); + _mesa_printf("slang print: %f\n", x); + ensure_infolog_created(infolog); + slang_info_log_print(*infolog, "%f", x); } -static GLvoid do_print_int (slang_info_log **infolog, GLfloat x) +static GLvoid +do_print_int(slang_info_log ** infolog, GLfloat x) { - _mesa_printf ("slang print: %d\n", (GLint) (x)); - ensure_infolog_created (infolog); - slang_info_log_print (*infolog, "%d", (GLint) (x)); + _mesa_printf("slang print: %d\n", (GLint) (x)); + ensure_infolog_created(infolog); + slang_info_log_print(*infolog, "%d", (GLint) (x)); } -static GLvoid do_print_bool (slang_info_log **infolog, GLfloat x) +static GLvoid +do_print_bool(slang_info_log ** infolog, GLfloat x) { - _mesa_printf ("slang print: %s\n", (GLint) (x) ? "true" : "false"); - ensure_infolog_created (infolog); - slang_info_log_print (*infolog, "%s", (GLint) (x) ? "true" : "false"); + _mesa_printf("slang print: %s\n", (GLint) (x) ? "true" : "false"); + ensure_infolog_created(infolog); + slang_info_log_print(*infolog, "%s", (GLint) (x) ? "true" : "false"); } #define FLOAT_ONE 0x3f800000 #define FLOAT_ZERO 0 -static GLvoid codegen_assem (codegen_ctx *G, slang_assembly *a, slang_info_log **infolog) +static GLvoid +codegen_assem(codegen_ctx * G, slang_assembly * a, slang_info_log ** infolog) { - GLint disp, i; - - switch (a->type) - { - case slang_asm_none: - break; - case slang_asm_float_copy: - case slang_asm_int_copy: - case slang_asm_bool_copy: - x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, a->param[0])); - x86_pop (&G->f, G->r_ecx); - x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1]), G->r_ecx); - break; - case slang_asm_float_move: - case slang_asm_int_move: - case slang_asm_bool_move: - x86_lea (&G->f, G->r_eax, x86_make_disp (G->r_esp, a->param[1])); - x86_add (&G->f, G->r_eax, x86_deref (G->r_esp)); - x86_mov (&G->f, G->r_eax, x86_deref (G->r_eax)); - x86_mov (&G->f, x86_make_disp (G->r_esp, a->param[0]), G->r_eax); - break; - case slang_asm_float_push: - case slang_asm_int_push: - case slang_asm_bool_push: - /* TODO: use push imm32 */ - x86_mov_reg_imm (&G->f, G->r_eax, *((GLint *) &a->literal)); - x86_push (&G->f, G->r_eax); - break; - case slang_asm_float_deref: - case slang_asm_int_deref: - case slang_asm_bool_deref: - case slang_asm_addr_deref: - x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); - x86_mov (&G->f, G->r_eax, x86_deref (G->r_eax)); - x86_mov (&G->f, x86_deref (G->r_esp), G->r_eax); - break; - case slang_asm_float_add: - x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_faddp (&G->f, G->r_st1); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_multiply: - x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_fmulp (&G->f, G->r_st1); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_divide: - x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_fdivp (&G->f, G->r_st1); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_negate: - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_fchs (&G->f); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_less: - x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); - x87_fcomp (&G->f, x86_deref (G->r_esp)); - x87_fnstsw (&G->f, G->r_eax); - /* TODO: use test r8,imm8 */ - x86_mov_reg_imm (&G->f, G->r_ecx, 0x100); - x86_test (&G->f, G->r_eax, G->r_ecx); - { - GLubyte *lab0, *lab1; - - /* TODO: use jcc rel8 */ - lab0 = x86_jcc_forward (&G->f, cc_E); - x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ONE); - /* TODO: use jmp rel8 */ - lab1 = x86_jmp_forward (&G->f); - x86_fixup_fwd_jump (&G->f, lab0); - x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ZERO); - x86_fixup_fwd_jump (&G->f, lab1); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); - x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx); - } - break; - case slang_asm_float_equal_exp: - x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); - x87_fcomp (&G->f, x86_deref (G->r_esp)); - x87_fnstsw (&G->f, G->r_eax); - /* TODO: use test r8,imm8 */ - x86_mov_reg_imm (&G->f, G->r_ecx, 0x4000); - x86_test (&G->f, G->r_eax, G->r_ecx); - { - GLubyte *lab0, *lab1; - - /* TODO: use jcc rel8 */ - lab0 = x86_jcc_forward (&G->f, cc_E); - x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ONE); - /* TODO: use jmp rel8 */ - lab1 = x86_jmp_forward (&G->f); - x86_fixup_fwd_jump (&G->f, lab0); - x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ZERO); - x86_fixup_fwd_jump (&G->f, lab1); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); - x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx); - } - break; - case slang_asm_float_equal_int: - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, -4)); - x87_fld (&G->f, x86_make_disp (G->r_esp, a->param[0] + 4)); - x87_fcomp (&G->f, x86_make_disp (G->r_esp, a->param[1] + 4)); - x87_fnstsw (&G->f, G->r_eax); - /* TODO: use test r8,imm8 */ - x86_mov_reg_imm (&G->f, G->r_ecx, 0x4000); - x86_test (&G->f, G->r_eax, G->r_ecx); - { - GLubyte *lab0, *lab1; - - /* TODO: use jcc rel8 */ - lab0 = x86_jcc_forward (&G->f, cc_E); - x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ONE); - /* TODO: use jmp rel8 */ - lab1 = x86_jmp_forward (&G->f); - x86_fixup_fwd_jump (&G->f, lab0); - x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ZERO); - x86_fixup_fwd_jump (&G->f, lab1); - x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx); - } - break; - case slang_asm_float_to_int: + GLint disp, i; + + switch (a->type) { + case slang_asm_none: + break; + case slang_asm_float_copy: + case slang_asm_int_copy: + case slang_asm_bool_copy: + x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, a->param[0])); + x86_pop(&G->f, G->r_ecx); + x86_mov(&G->f, x86_make_disp(G->r_eax, a->param[1]), G->r_ecx); + break; + case slang_asm_float_move: + case slang_asm_int_move: + case slang_asm_bool_move: + x86_lea(&G->f, G->r_eax, x86_make_disp(G->r_esp, a->param[1])); + x86_add(&G->f, G->r_eax, x86_deref(G->r_esp)); + x86_mov(&G->f, G->r_eax, x86_deref(G->r_eax)); + x86_mov(&G->f, x86_make_disp(G->r_esp, a->param[0]), G->r_eax); + break; + case slang_asm_float_push: + case slang_asm_int_push: + case slang_asm_bool_push: + /* TODO: use push imm32 */ + x86_mov_reg_imm(&G->f, G->r_eax, *((GLint *) & a->literal)); + x86_push(&G->f, G->r_eax); + break; + case slang_asm_float_deref: + case slang_asm_int_deref: + case slang_asm_bool_deref: + case slang_asm_addr_deref: + x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp)); + x86_mov(&G->f, G->r_eax, x86_deref(G->r_eax)); + x86_mov(&G->f, x86_deref(G->r_esp), G->r_eax); + break; + case slang_asm_float_add: + x87_fld(&G->f, x86_make_disp(G->r_esp, 4)); + x87_fld(&G->f, x86_deref(G->r_esp)); + x87_faddp(&G->f, G->r_st1); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_multiply: + x87_fld(&G->f, x86_make_disp(G->r_esp, 4)); + x87_fld(&G->f, x86_deref(G->r_esp)); + x87_fmulp(&G->f, G->r_st1); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_divide: + x87_fld(&G->f, x86_make_disp(G->r_esp, 4)); + x87_fld(&G->f, x86_deref(G->r_esp)); + x87_fdivp(&G->f, G->r_st1); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_negate: + x87_fld(&G->f, x86_deref(G->r_esp)); + x87_fchs(&G->f); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_less: + x87_fld(&G->f, x86_make_disp(G->r_esp, 4)); + x87_fcomp(&G->f, x86_deref(G->r_esp)); + x87_fnstsw(&G->f, G->r_eax); + /* TODO: use test r8,imm8 */ + x86_mov_reg_imm(&G->f, G->r_ecx, 0x100); + x86_test(&G->f, G->r_eax, G->r_ecx); + { + GLubyte *lab0, *lab1; + /* TODO: use jcc rel8 */ + lab0 = x86_jcc_forward(&G->f, cc_E); + x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ONE); + /* TODO: use jmp rel8 */ + lab1 = x86_jmp_forward(&G->f); + x86_fixup_fwd_jump(&G->f, lab0); + x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ZERO); + x86_fixup_fwd_jump(&G->f, lab1); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); + x86_mov(&G->f, x86_deref(G->r_esp), G->r_ecx); + } + break; + case slang_asm_float_equal_exp: + x87_fld(&G->f, x86_make_disp(G->r_esp, 4)); + x87_fcomp(&G->f, x86_deref(G->r_esp)); + x87_fnstsw(&G->f, G->r_eax); + /* TODO: use test r8,imm8 */ + x86_mov_reg_imm(&G->f, G->r_ecx, 0x4000); + x86_test(&G->f, G->r_eax, G->r_ecx); + { + GLubyte *lab0, *lab1; + /* TODO: use jcc rel8 */ + lab0 = x86_jcc_forward(&G->f, cc_E); + x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ONE); + /* TODO: use jmp rel8 */ + lab1 = x86_jmp_forward(&G->f); + x86_fixup_fwd_jump(&G->f, lab0); + x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ZERO); + x86_fixup_fwd_jump(&G->f, lab1); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); + x86_mov(&G->f, x86_deref(G->r_esp), G->r_ecx); + } + break; + case slang_asm_float_equal_int: + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, -4)); + x87_fld(&G->f, x86_make_disp(G->r_esp, a->param[0] + 4)); + x87_fcomp(&G->f, x86_make_disp(G->r_esp, a->param[1] + 4)); + x87_fnstsw(&G->f, G->r_eax); + /* TODO: use test r8,imm8 */ + x86_mov_reg_imm(&G->f, G->r_ecx, 0x4000); + x86_test(&G->f, G->r_eax, G->r_ecx); + { + GLubyte *lab0, *lab1; + /* TODO: use jcc rel8 */ + lab0 = x86_jcc_forward(&G->f, cc_E); + x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ONE); + /* TODO: use jmp rel8 */ + lab1 = x86_jmp_forward(&G->f); + x86_fixup_fwd_jump(&G->f, lab0); + x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ZERO); + x86_fixup_fwd_jump(&G->f, lab1); + x86_mov(&G->f, x86_deref(G->r_esp), G->r_ecx); + } + break; + case slang_asm_float_to_int: /* TODO: use fistp without rounding */ - x86_call (&G->f, (GLubyte *) (do_ftoi)); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_sine: - /* TODO: use fsin */ - x86_call (&G->f, (GLubyte *) _mesa_sinf); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_arcsine: - /* TODO: use fpatan (?) */ - x86_call (&G->f, (GLubyte *) _mesa_asinf); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_arctan: - /* TODO: use fpatan */ - x86_call (&G->f, (GLubyte *) _mesa_atanf); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_power: - /* TODO: use emit_pow() */ - x86_call (&G->f, (GLubyte *) do_powf); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_log2: - x87_fld1 (&G->f); - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_fyl2x (&G->f); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_floor: - x86_call (&G->f, (GLubyte *) do_floorf); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_ceil: - x86_call (&G->f, (GLubyte *) do_ceilf); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_noise1: - x86_call (&G->f, (GLubyte *) _slang_library_noise1); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_noise2: - x86_call (&G->f, (GLubyte *) _slang_library_noise2); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_noise3: - x86_call (&G->f, (GLubyte *) _slang_library_noise4); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 8)); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_float_noise4: - x86_call (&G->f, (GLubyte *) _slang_library_noise4); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 12)); - x87_fstp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_int_to_float: - break; - case slang_asm_int_to_addr: - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_fistp (&G->f, x86_deref (G->r_esp)); - break; - case slang_asm_addr_copy: - x86_pop (&G->f, G->r_eax); - x86_mov (&G->f, G->r_ecx, x86_deref (G->r_esp)); - x86_mov (&G->f, x86_deref (G->r_ecx), G->r_eax); - break; - case slang_asm_addr_push: - /* TODO: use push imm32 */ - x86_mov_reg_imm (&G->f, G->r_eax, (GLint) a->param[0]); - x86_push (&G->f, G->r_eax); - break; - case slang_asm_addr_add: - x86_pop (&G->f, G->r_eax); - x86_add (&G->f, x86_deref (G->r_esp), G->r_eax); - break; - case slang_asm_addr_multiply: - x86_pop (&G->f, G->r_ecx); - x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); - x86_mul (&G->f, G->r_ecx); - x86_mov (&G->f, x86_deref (G->r_esp), G->r_eax); - break; - case slang_asm_vec4_tex1d: - x86_call (&G->f, (GLubyte *) _slang_library_tex1d); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 12)); - break; - case slang_asm_vec4_tex2d: - x86_call (&G->f, (GLubyte *) _slang_library_tex2d); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); - break; - case slang_asm_vec4_tex3d: - x86_call (&G->f, (GLubyte *) _slang_library_tex3d); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 20)); - break; - case slang_asm_vec4_texcube: - x86_call (&G->f, (GLubyte *) _slang_library_texcube); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 20)); - break; - case slang_asm_vec4_shad1d: - x86_call (&G->f, (GLubyte *) _slang_library_shad1d); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 20)); - break; - case slang_asm_vec4_shad2d: - x86_call (&G->f, (GLubyte *) _slang_library_shad2d); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 20)); - break; - case slang_asm_jump: - add_fixup (G, a->param[0], x86_jmp_forward (&G->f)); - break; - case slang_asm_jump_if_zero: - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); - x86_xor (&G->f, G->r_eax, G->r_eax); - x86_cmp (&G->f, G->r_eax, x86_make_disp (G->r_esp, -4)); - { - GLubyte *lab0; - - /* TODO: use jcc rel8 */ - lab0 = x86_jcc_forward (&G->f, cc_NE); - add_fixup (G, a->param[0], x86_jmp_forward (&G->f)); - x86_fixup_fwd_jump (&G->f, lab0); - } - break; - case slang_asm_enter: - /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */ - assert (a->param[0] != 0); - x86_push (&G->f, G->r_ebp); - x86_lea (&G->f, G->r_ebp, x86_make_disp (G->r_esp, (GLint) a->param[0])); - break; - case slang_asm_leave: - x86_pop (&G->f, G->r_ebp); - break; - case slang_asm_local_alloc: - /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */ - assert (a->param[0] != 0); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, -(GLint) a->param[0])); - break; - case slang_asm_local_free: - /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */ - assert (a->param[0] != 0); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, (GLint) a->param[0])); - break; - case slang_asm_local_addr: - disp = -(GLint) (a->param[0] + a->param[1]) + 4; - if (disp != 0) - { - x86_lea (&G->f, G->r_eax, x86_make_disp (G->r_ebp, disp)); - x86_push (&G->f, G->r_eax); - } - else - x86_push (&G->f, G->r_ebp); - break; - case slang_asm_global_addr: - /* TODO: use push imm32 */ - x86_mov_reg_imm (&G->f, G->r_eax, (GLint) &G->mach->mem + a->param[0]); - x86_push (&G->f, G->r_eax); - break; - case slang_asm_call: - add_fixup (G, a->param[0], x86_call_forward (&G->f)); - break; - case slang_asm_return: - x86_ret (&G->f); - break; - case slang_asm_discard: - x86_jmp (&G->f, G->l_discard); - break; - case slang_asm_exit: - x86_jmp (&G->f, G->l_exit); - break; - /* GL_MESA_shader_debug */ + x86_call(&G->f, (GLubyte *) (do_ftoi)); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_sine: + /* TODO: use fsin */ + x86_call(&G->f, (GLubyte *) _mesa_sinf); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_arcsine: + /* TODO: use fpatan (?) */ + x86_call(&G->f, (GLubyte *) _mesa_asinf); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_arctan: + /* TODO: use fpatan */ + x86_call(&G->f, (GLubyte *) _mesa_atanf); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_power: + /* TODO: use emit_pow() */ + x86_call(&G->f, (GLubyte *) do_powf); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_log2: + x87_fld1(&G->f); + x87_fld(&G->f, x86_deref(G->r_esp)); + x87_fyl2x(&G->f); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_floor: + x86_call(&G->f, (GLubyte *) do_floorf); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_ceil: + x86_call(&G->f, (GLubyte *) do_ceilf); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_noise1: + x86_call(&G->f, (GLubyte *) _slang_library_noise1); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_noise2: + x86_call(&G->f, (GLubyte *) _slang_library_noise2); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_noise3: + x86_call(&G->f, (GLubyte *) _slang_library_noise4); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 8)); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_float_noise4: + x86_call(&G->f, (GLubyte *) _slang_library_noise4); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 12)); + x87_fstp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_int_to_float: + break; + case slang_asm_int_to_addr: + x87_fld(&G->f, x86_deref(G->r_esp)); + x87_fistp(&G->f, x86_deref(G->r_esp)); + break; + case slang_asm_addr_copy: + x86_pop(&G->f, G->r_eax); + x86_mov(&G->f, G->r_ecx, x86_deref(G->r_esp)); + x86_mov(&G->f, x86_deref(G->r_ecx), G->r_eax); + break; + case slang_asm_addr_push: + /* TODO: use push imm32 */ + x86_mov_reg_imm(&G->f, G->r_eax, (GLint) a->param[0]); + x86_push(&G->f, G->r_eax); + break; + case slang_asm_addr_add: + x86_pop(&G->f, G->r_eax); + x86_add(&G->f, x86_deref(G->r_esp), G->r_eax); + break; + case slang_asm_addr_multiply: + x86_pop(&G->f, G->r_ecx); + x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp)); + x86_mul(&G->f, G->r_ecx); + x86_mov(&G->f, x86_deref(G->r_esp), G->r_eax); + break; + case slang_asm_vec4_tex1d: + x86_call(&G->f, (GLubyte *) _slang_library_tex1d); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 12)); + break; + case slang_asm_vec4_tex2d: + x86_call(&G->f, (GLubyte *) _slang_library_tex2d); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16)); + break; + case slang_asm_vec4_tex3d: + x86_call(&G->f, (GLubyte *) _slang_library_tex3d); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 20)); + break; + case slang_asm_vec4_texcube: + x86_call(&G->f, (GLubyte *) _slang_library_texcube); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 20)); + break; + case slang_asm_vec4_shad1d: + x86_call(&G->f, (GLubyte *) _slang_library_shad1d); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 20)); + break; + case slang_asm_vec4_shad2d: + x86_call(&G->f, (GLubyte *) _slang_library_shad2d); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 20)); + break; + case slang_asm_jump: + add_fixup(G, a->param[0], x86_jmp_forward(&G->f)); + break; + case slang_asm_jump_if_zero: + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); + x86_xor(&G->f, G->r_eax, G->r_eax); + x86_cmp(&G->f, G->r_eax, x86_make_disp(G->r_esp, -4)); + { + GLubyte *lab0; + /* TODO: use jcc rel8 */ + lab0 = x86_jcc_forward(&G->f, cc_NE); + add_fixup(G, a->param[0], x86_jmp_forward(&G->f)); + x86_fixup_fwd_jump(&G->f, lab0); + } + break; + case slang_asm_enter: + /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */ + assert(a->param[0] != 0); + x86_push(&G->f, G->r_ebp); + x86_lea(&G->f, G->r_ebp, x86_make_disp(G->r_esp, (GLint) a->param[0])); + break; + case slang_asm_leave: + x86_pop(&G->f, G->r_ebp); + break; + case slang_asm_local_alloc: + /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */ + assert(a->param[0] != 0); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, -(GLint) a->param[0])); + break; + case slang_asm_local_free: + /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */ + assert(a->param[0] != 0); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, (GLint) a->param[0])); + break; + case slang_asm_local_addr: + disp = -(GLint) (a->param[0] + a->param[1]) + 4; + if (disp != 0) { + x86_lea(&G->f, G->r_eax, x86_make_disp(G->r_ebp, disp)); + x86_push(&G->f, G->r_eax); + } + else + x86_push(&G->f, G->r_ebp); + break; + case slang_asm_global_addr: + /* TODO: use push imm32 */ + x86_mov_reg_imm(&G->f, G->r_eax, (GLint) & G->mach->mem + a->param[0]); + x86_push(&G->f, G->r_eax); + break; + case slang_asm_call: + add_fixup(G, a->param[0], x86_call_forward(&G->f)); + break; + case slang_asm_return: + x86_ret(&G->f); + break; + case slang_asm_discard: + x86_jmp(&G->f, G->l_discard); + break; + case slang_asm_exit: + x86_jmp(&G->f, G->l_exit); + break; + /* GL_MESA_shader_debug */ case slang_asm_float_print: /* TODO: use push imm32 */ - x86_mov_reg_imm (&G->f, G->r_eax, (GLint) (infolog)); - x86_push (&G->f, G->r_eax); - x86_call (&G->f, (GLubyte *) (do_print_float)); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); + x86_mov_reg_imm(&G->f, G->r_eax, (GLint) (infolog)); + x86_push(&G->f, G->r_eax); + x86_call(&G->f, (GLubyte *) (do_print_float)); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); break; - case slang_asm_int_print: + case slang_asm_int_print: /* TODO: use push imm32 */ - x86_mov_reg_imm (&G->f, G->r_eax, (GLint) (infolog)); - x86_push (&G->f, G->r_eax); - x86_call (&G->f, (GLubyte *) do_print_int); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); - break; - case slang_asm_bool_print: + x86_mov_reg_imm(&G->f, G->r_eax, (GLint) (infolog)); + x86_push(&G->f, G->r_eax); + x86_call(&G->f, (GLubyte *) do_print_int); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); + break; + case slang_asm_bool_print: /* TODO: use push imm32 */ - x86_mov_reg_imm (&G->f, G->r_eax, (GLint) (infolog)); - x86_push (&G->f, G->r_eax); - x86_call (&G->f, (GLubyte *) do_print_bool); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); - break; - /* vec4 */ + x86_mov_reg_imm(&G->f, G->r_eax, (GLint) (infolog)); + x86_push(&G->f, G->r_eax); + x86_call(&G->f, (GLubyte *) do_print_bool); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); + break; + /* vec4 */ case slang_asm_float_to_vec4: /* [vec4] | float > [vec4] */ - x87_fld (&G->f, x86_deref (G->r_esp)); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); - x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); - x87_fst (&G->f, x86_make_disp (G->r_eax, 12)); - x87_fst (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fst (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fstp (&G->f, x86_deref (G->r_eax)); + x87_fld(&G->f, x86_deref(G->r_esp)); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4)); + x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp)); + x87_fst(&G->f, x86_make_disp(G->r_eax, 12)); + x87_fst(&G->f, x86_make_disp(G->r_eax, 8)); + x87_fst(&G->f, x86_make_disp(G->r_eax, 4)); + x87_fstp(&G->f, x86_deref(G->r_eax)); break; case slang_asm_vec4_add: /* [vec4] | vec4 > [vec4] */ - x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16)); + x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, 16)); for (i = 0; i < 4; i++) - x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4)); for (i = 0; i < 4; i++) - x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4)); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); + x87_fld(&G->f, x86_make_disp(G->r_esp, i * 4)); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16)); for (i = 0; i < 4; i++) - x87_faddp (&G->f, G->r_st4); + x87_faddp(&G->f, G->r_st4); for (i = 0; i < 4; i++) - x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4)); + x87_fstp(&G->f, x86_make_disp(G->r_eax, 12 - i * 4)); break; case slang_asm_vec4_subtract: /* [vec4] | vec4 > [vec4] */ - x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16)); + x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, 16)); for (i = 0; i < 4; i++) - x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4)); for (i = 0; i < 4; i++) - x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4)); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); + x87_fld(&G->f, x86_make_disp(G->r_esp, i * 4)); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16)); for (i = 0; i < 4; i++) - x87_fsubp (&G->f, G->r_st4); + x87_fsubp(&G->f, G->r_st4); for (i = 0; i < 4; i++) - x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4)); + x87_fstp(&G->f, x86_make_disp(G->r_eax, 12 - i * 4)); break; case slang_asm_vec4_multiply: /* [vec4] | vec4 > [vec4] */ - x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16)); + x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, 16)); for (i = 0; i < 4; i++) - x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4)); for (i = 0; i < 4; i++) - x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4)); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); + x87_fld(&G->f, x86_make_disp(G->r_esp, i * 4)); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16)); for (i = 0; i < 4; i++) - x87_fmulp (&G->f, G->r_st4); + x87_fmulp(&G->f, G->r_st4); for (i = 0; i < 4; i++) - x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4)); + x87_fstp(&G->f, x86_make_disp(G->r_eax, 12 - i * 4)); break; case slang_asm_vec4_divide: /* [vec4] | vec4 > [vec4] */ - x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16)); + x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, 16)); for (i = 0; i < 4; i++) - x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4)); for (i = 0; i < 4; i++) - x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4)); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); + x87_fld(&G->f, x86_make_disp(G->r_esp, i * 4)); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16)); for (i = 0; i < 4; i++) - x87_fdivp (&G->f, G->r_st4); + x87_fdivp(&G->f, G->r_st4); for (i = 0; i < 4; i++) - x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4)); + x87_fstp(&G->f, x86_make_disp(G->r_eax, 12 - i * 4)); break; case slang_asm_vec4_negate: /* [vec4] > [vec4] */ - x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); + x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp)); for (i = 0; i < 4; i++) - x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4)); for (i = 0; i < 4; i++) { - x87_fchs (&G->f); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4)); + x87_fchs(&G->f); + x87_fstp(&G->f, x86_make_disp(G->r_eax, 12 - i * 4)); } break; case slang_asm_vec4_dot: /* [vec4] | vec4 > [float] */ for (i = 0; i < 4; i++) - x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4)); - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); - x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); + x87_fld(&G->f, x86_make_disp(G->r_esp, i * 4)); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16)); + x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp)); for (i = 0; i < 4; i++) - x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4)); for (i = 0; i < 4; i++) - x87_fmulp (&G->f, G->r_st4); + x87_fmulp(&G->f, G->r_st4); for (i = 0; i < 3; i++) - x87_faddp (&G->f, G->r_st1); - x87_fstp (&G->f, x86_deref (G->r_eax)); + x87_faddp(&G->f, G->r_st1); + x87_fstp(&G->f, x86_deref(G->r_eax)); break; case slang_asm_vec4_copy: /* [vec4] | vec4 > [vec4] */ - x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, a->param[0])); - x86_pop (&G->f, G->r_ecx); - x86_pop (&G->f, G->r_edx); - x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1]), G->r_ecx); - x86_pop (&G->f, G->r_ebx); - x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 4), G->r_edx); - x86_pop (&G->f, G->r_ecx); - x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 8), G->r_ebx); - x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 12), G->r_ecx); + x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, a->param[0])); + x86_pop(&G->f, G->r_ecx); + x86_pop(&G->f, G->r_edx); + x86_mov(&G->f, x86_make_disp(G->r_eax, a->param[1]), G->r_ecx); + x86_pop(&G->f, G->r_ebx); + x86_mov(&G->f, x86_make_disp(G->r_eax, a->param[1] + 4), G->r_edx); + x86_pop(&G->f, G->r_ecx); + x86_mov(&G->f, x86_make_disp(G->r_eax, a->param[1] + 8), G->r_ebx); + x86_mov(&G->f, x86_make_disp(G->r_eax, a->param[1] + 12), G->r_ecx); break; case slang_asm_vec4_deref: /* [vec4] > vec4 */ - x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); - x86_mov (&G->f, G->r_ecx, x86_make_disp (G->r_eax, 12)); - x86_mov (&G->f, G->r_edx, x86_make_disp (G->r_eax, 8)); - x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx); - x86_mov (&G->f, G->r_ebx, x86_make_disp (G->r_eax, 4)); - x86_push (&G->f, G->r_edx); - x86_mov (&G->f, G->r_ecx, x86_deref (G->r_eax)); - x86_push (&G->f, G->r_ebx); - x86_push (&G->f, G->r_ecx); + x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp)); + x86_mov(&G->f, G->r_ecx, x86_make_disp(G->r_eax, 12)); + x86_mov(&G->f, G->r_edx, x86_make_disp(G->r_eax, 8)); + x86_mov(&G->f, x86_deref(G->r_esp), G->r_ecx); + x86_mov(&G->f, G->r_ebx, x86_make_disp(G->r_eax, 4)); + x86_push(&G->f, G->r_edx); + x86_mov(&G->f, G->r_ecx, x86_deref(G->r_eax)); + x86_push(&G->f, G->r_ebx); + x86_push(&G->f, G->r_ecx); break; case slang_asm_vec4_equal_int: - x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, -4)); - x86_mov_reg_imm (&G->f, G->r_edx, 0x4000); + x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, -4)); + x86_mov_reg_imm(&G->f, G->r_edx, 0x4000); for (i = 0; i < 4; i++) { - x87_fld (&G->f, x86_make_disp (G->r_esp, a->param[0] + 4 + i * 4)); - x87_fcomp (&G->f, x86_make_disp (G->r_esp, a->param[1] + 4 + i * 4)); - x87_fnstsw (&G->f, G->r_eax); - x86_and (&G->f, G->r_edx, G->r_eax); + x87_fld(&G->f, x86_make_disp(G->r_esp, a->param[0] + 4 + i * 4)); + x87_fcomp(&G->f, x86_make_disp(G->r_esp, a->param[1] + 4 + i * 4)); + x87_fnstsw(&G->f, G->r_eax); + x86_and(&G->f, G->r_edx, G->r_eax); } /* TODO: use test r8,imm8 */ - x86_mov_reg_imm (&G->f, G->r_ecx, 0x4000); - x86_test (&G->f, G->r_edx, G->r_ecx); + x86_mov_reg_imm(&G->f, G->r_ecx, 0x4000); + x86_test(&G->f, G->r_edx, G->r_ecx); { GLubyte *lab0, *lab1; /* TODO: use jcc rel8 */ - lab0 = x86_jcc_forward (&G->f, cc_E); - x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ONE); + lab0 = x86_jcc_forward(&G->f, cc_E); + x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ONE); /* TODO: use jmp rel8 */ - lab1 = x86_jmp_forward (&G->f); - x86_fixup_fwd_jump (&G->f, lab0); - x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ZERO); - x86_fixup_fwd_jump (&G->f, lab1); - x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx); + lab1 = x86_jmp_forward(&G->f); + x86_fixup_fwd_jump(&G->f, lab0); + x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ZERO); + x86_fixup_fwd_jump(&G->f, lab1); + x86_mov(&G->f, x86_deref(G->r_esp), G->r_ecx); } break; default: - assert (0); + _mesa_problem(NULL, "Unexpected switch case in codegen_assem"); } } -GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GLuint start) +GLboolean +_slang_x86_codegen(slang_machine * mach, slang_assembly_file * file, + GLuint start) { - codegen_ctx G; - GLubyte *j_body, *j_exit; - GLuint i; + codegen_ctx G; + GLubyte *j_body, *j_exit; + GLuint i; /* Free the old code - if any. */ if (mach->x86.compiled_func != NULL) { - _mesa_exec_free (mach->x86.compiled_func); + _mesa_exec_free(mach->x86.compiled_func); mach->x86.compiled_func = NULL; } - /* - * We need as much as 1M because *all* assembly, including built-in library, is - * being translated to x86. - * The built-in library occupies 450K, so we can be safe for now. - * It is going to change in the future, when we get assembly analysis running. - */ - x86_init_func_size (&G.f, 1048576); - G.r_eax = x86_make_reg (file_REG32, reg_AX); - G.r_ecx = x86_make_reg (file_REG32, reg_CX); - G.r_edx = x86_make_reg (file_REG32, reg_DX); - G.r_ebx = x86_make_reg (file_REG32, reg_BX); - G.r_esp = x86_make_reg (file_REG32, reg_SP); - G.r_ebp = x86_make_reg (file_REG32, reg_BP); - G.r_st0 = x86_make_reg (file_x87, 0); - G.r_st1 = x86_make_reg (file_x87, 1); - G.r_st2 = x86_make_reg (file_x87, 2); - G.r_st3 = x86_make_reg (file_x87, 3); - G.r_st4 = x86_make_reg (file_x87, 4); - G.fixups = NULL; - G.fixup_count = 0; - G.labels = (GLubyte **) slang_alloc_malloc (file->count * sizeof (GLubyte *)); - G.mach = mach; - G.fpucntl = RESTORE_FPU; - - mach->x86.fpucntl_rnd_neg = RND_NEG_FPU; - mach->x86.fpucntl_restore = RESTORE_FPU; - - /* prepare stack and jump to start */ - x86_push (&G.f, G.r_ebp); - x86_mov_reg_imm (&G.f, G.r_eax, (GLint) &mach->x86.esp_restore); - x86_push (&G.f, G.r_esp); - x86_pop (&G.f, G.r_ecx); - x86_mov (&G.f, x86_deref (G.r_eax), G.r_ecx); - j_body = x86_jmp_forward (&G.f); - - /* "discard" instructions jump to this label */ - G.l_discard = x86_get_label (&G.f); - x86_mov_reg_imm (&G.f, G.r_eax, (GLint) &G.mach->kill); - x86_mov_reg_imm (&G.f, G.r_ecx, 1); - x86_mov (&G.f, x86_deref (G.r_eax), G.r_ecx); - G.l_exit = x86_get_label (&G.f); - j_exit = x86_jmp_forward (&G.f); - - for (i = 0; i < file->count; i++) - { - G.labels[i] = x86_get_label (&G.f); - if (i == start) - x86_fixup_fwd_jump (&G.f, j_body); - codegen_assem (&G, &file->code[i], &mach->infolog); - } - - /* - * Restore stack and return. - * This must be handled this way, because "discard" can be invoked from any - * place in the code. - */ - x86_fixup_fwd_jump (&G.f, j_exit); - x86_mov_reg_imm (&G.f, G.r_eax, (GLint) &mach->x86.esp_restore); - x86_mov (&G.f, G.r_esp, x86_deref (G.r_eax)); - x86_pop (&G.f, G.r_ebp); - if (G.fpucntl != RESTORE_FPU) - { - x87_fnclex (&G.f); - x86_mov_reg_imm (&G.f, G.r_eax, (GLint) &G.mach->x86.fpucntl_restore); - x87_fldcw (&G.f, x86_deref (G.r_eax)); - } - x86_ret (&G.f); - - /* fixup forward labels */ - for (i = 0; i < G.fixup_count; i++) - { - G.f.csr = G.labels[G.fixups[i].index]; - x86_fixup_fwd_jump (&G.f, G.fixups[i].csr); - } - - slang_alloc_free (G.fixups); - slang_alloc_free (G.labels); - - /* install new code */ - mach->x86.compiled_func = (GLvoid (*) (slang_machine *)) x86_get_func (&G.f); - - return GL_TRUE; + /* + * We need as much as 1M because *all* assembly, including built-in library, is + * being translated to x86. + * The built-in library occupies 450K, so we can be safe for now. + * It is going to change in the future, when we get assembly analysis running. + */ + x86_init_func_size(&G.f, 1048576); + G.r_eax = x86_make_reg(file_REG32, reg_AX); + G.r_ecx = x86_make_reg(file_REG32, reg_CX); + G.r_edx = x86_make_reg(file_REG32, reg_DX); + G.r_ebx = x86_make_reg(file_REG32, reg_BX); + G.r_esp = x86_make_reg(file_REG32, reg_SP); + G.r_ebp = x86_make_reg(file_REG32, reg_BP); + G.r_st0 = x86_make_reg(file_x87, 0); + G.r_st1 = x86_make_reg(file_x87, 1); + G.r_st2 = x86_make_reg(file_x87, 2); + G.r_st3 = x86_make_reg(file_x87, 3); + G.r_st4 = x86_make_reg(file_x87, 4); + G.fixups = NULL; + G.fixup_count = 0; + G.labels = + (GLubyte **) slang_alloc_malloc(file->count * sizeof(GLubyte *)); + G.mach = mach; + G.fpucntl = RESTORE_FPU; + + mach->x86.fpucntl_rnd_neg = RND_NEG_FPU; + mach->x86.fpucntl_restore = RESTORE_FPU; + + /* prepare stack and jump to start */ + x86_push(&G.f, G.r_ebp); + x86_mov_reg_imm(&G.f, G.r_eax, (GLint) & mach->x86.esp_restore); + x86_push(&G.f, G.r_esp); + x86_pop(&G.f, G.r_ecx); + x86_mov(&G.f, x86_deref(G.r_eax), G.r_ecx); + j_body = x86_jmp_forward(&G.f); + + /* "discard" instructions jump to this label */ + G.l_discard = x86_get_label(&G.f); + x86_mov_reg_imm(&G.f, G.r_eax, (GLint) & G.mach->kill); + x86_mov_reg_imm(&G.f, G.r_ecx, 1); + x86_mov(&G.f, x86_deref(G.r_eax), G.r_ecx); + G.l_exit = x86_get_label(&G.f); + j_exit = x86_jmp_forward(&G.f); + + for (i = 0; i < file->count; i++) { + G.labels[i] = x86_get_label(&G.f); + if (i == start) + x86_fixup_fwd_jump(&G.f, j_body); + codegen_assem(&G, &file->code[i], &mach->infolog); + } + + /* + * Restore stack and return. + * This must be handled this way, because "discard" can be invoked from any + * place in the code. + */ + x86_fixup_fwd_jump(&G.f, j_exit); + x86_mov_reg_imm(&G.f, G.r_eax, (GLint) & mach->x86.esp_restore); + x86_mov(&G.f, G.r_esp, x86_deref(G.r_eax)); + x86_pop(&G.f, G.r_ebp); + if (G.fpucntl != RESTORE_FPU) { + x87_fnclex(&G.f); + x86_mov_reg_imm(&G.f, G.r_eax, (GLint) & G.mach->x86.fpucntl_restore); + x87_fldcw(&G.f, x86_deref(G.r_eax)); + } + x86_ret(&G.f); + + /* fixup forward labels */ + for (i = 0; i < G.fixup_count; i++) { + G.f.csr = G.labels[G.fixups[i].index]; + x86_fixup_fwd_jump(&G.f, G.fixups[i].csr); + } + + slang_alloc_free(G.fixups); + slang_alloc_free(G.labels); + + /* install new code */ + mach->x86.compiled_func = (GLvoid(*)(slang_machine *)) x86_get_func(&G.f); + + return GL_TRUE; } #endif - |