// cache_addx are inlines static INLINE void cache_addb(Bit8u val) { *cache.pos++=val; // ptr,mov,+ } static INLINE void cache_addw(Bit16u val) { *(Bit16u*)cache.pos=val; //ptr,size,mov,+,mov cache.pos+=2; } // mov, mov, add, size override?, ptr? static INLINE void cache_addd(Bit32u val) { *(Bit32u*)cache.pos=val; //ptr,size,mov,+,mov cache.pos+=4; } //less instructions & smaller, fit more in cache //1 vs 2 cache_add parsed/total static void gen_setzeroflag(void) { if (x86gen.flagsactive) IllegalOption("gen_setzeroflag"); cache_addd(0x40240c83); //OR DWORD [ESP],0x40 } //1 vs 2 cache_add parsed/total static void gen_clearzeroflag(void) { if (x86gen.flagsactive) IllegalOption("gen_clearzeroflag"); cache_addd(0xbf242483); //AND DWORD [ESP],~0x40 } //3 vs 5 total cache_add, 2-3 vs 3 cache_add parsed, +tmp var mov,or,shift static void gen_load_host(void * data,DynReg * dr1,Bitu size) { GenReg * gr1=FindDynReg(dr1,true); Bit8u tmp; switch (size) { case 1:tmp=0xb6; break;//movzx byte case 2:tmp=0xb7; break; //movzx word case 4:tmp=0x8b; goto skip; break; //mov default: IllegalOption("gen_load_host"); } cache_addb(0x0f); skip: cache_addw(tmp|(0x5+(gr1->index<<3))<<8); cache_addd((Bit32u)data); dr1->flags|=DYNFLG_CHANGED; } //3 vs 5 total cache_add, 2-3 vs 3 cache_add parsed, +tmp var mov,or,shift static void gen_mov_host(void * data,DynReg * dr1,Bitu size,Bit8u di1=0) { GenReg * gr1=FindDynReg(dr1,(size==4)); Bit8u tmp; switch (size) { case 1:tmp=0x8a; break; //mov byte case 2:cache_addb(0x66); //mov word case 4:tmp=0x8b; break; //mov default: IllegalOption("gen_load_host"); } cache_addw(tmp|(0x5+((gr1->index+(di1?4:0))<<3))<<8); cache_addd((Bit32u)data); dr1->flags|=DYNFLG_CHANGED; } //2 vs 3 total cache_add, 2 vs 2 cache_add parsed, +tmp var mov static void gen_extend_word(bool sign,DynReg * ddr,DynReg * dsr) { GenReg * gsr=FindDynReg(dsr); GenReg * gdr=FindDynReg(ddr,true); Bit16u tmp; if (sign) tmp=0xbf0f; else tmp=0xb70f; cache_addw(tmp); cache_addb(0xc0+(gdr->index<<3)+(gsr->index)); ddr->flags|=DYNFLG_CHANGED; } //3 vs 4 total cache_add, 2-3 vs 2-3 cache_add parsed, +tmp var mov static void gen_extend_byte(bool sign,bool dword,DynReg * ddr,DynReg * dsr,Bit8u dsi) { GenReg * gsr=FindDynReg(dsr); GenReg * gdr=FindDynReg(ddr,dword); Bit16u tmp; if (!dword) cache_addb(0x66); if (sign) tmp=0xbe0f; else tmp=0xb60f; cache_addw(tmp); cache_addb(0xc0+(gdr->index<<3)+(gsr->index+dsi)); ddr->flags|=DYNFLG_CHANGED; } //version 2 1-2 vs 2-3 parsed static void gen_extend_byte(bool sign,bool dword,DynReg * ddr,DynReg * dsr,Bit8u dsi) { GenReg * gsr=FindDynReg(dsr); GenReg * gdr=FindDynReg(ddr,dword); Bit8u index=0xc0+(gdr->index<<3)+(gsr->index+dsi); Bit16u sign_op; if (sign) sign_op=0xbe0f; else sign_op=0xb60f; if (!dword) { cache_addd(0x66|sign_op<<8|index<<24); } else { cache_addw(sign_op); cache_addb(index); } ddr->flags|=DYNFLG_CHANGED; } //5 vs 12 total cache_add, 0/2/3 vs 0/2/3/4/5/6 cache_add parsed, +tmp mov,or,shift static void gen_lea(DynReg * ddr,DynReg * dsr1,DynReg * dsr2,Bitu scale,Bits imm) { GenReg * gdr=FindDynReg(ddr); Bitu imm_size; Bit8u rm_base=(gdr->index << 3); Bit8u index; if (dsr1) { GenReg * gsr1=FindDynReg(dsr1); if (!imm && (gsr1->index!=0x5)) { imm_size=0; rm_base+=0x0; //no imm } else if ((imm>=-128 && imm<=127)) { imm_size=1;rm_base+=0x40; //Signed byte imm } else { imm_size=4;rm_base+=0x80; //Signed dword imm } if (!dsr2 && (ddr==dsr1) && !imm_size) return; index=gsr1->index; } else { imm_size=4; index=5; } if (dsr2) { GenReg * gsr2=FindDynReg(dsr2); cache_addw(0x8d|(rm_base+0x4)<<8); //0x8d=LEA | The sib indicator Bit8u sib=(index+(gsr2->index<<3)+(scale<<6)); cache_addb(sib); } else { cache_addw(0x8d|(rm_base+index)<<8); //LEA | dword imm } switch (imm_size) { case 0: break; case 1:cache_addb(imm);break; case 4:cache_addd(imm);break; } ddr->flags|=DYNFLG_CHANGED; } //3 vs 6 total cache_add, vs 3 vs 6 cache_add parsed, +or,shift static void gen_lea_imm_mem(DynReg * ddr,DynReg * dsr,void* data) { GenReg * gdr=FindDynReg(ddr); Bit8u rm_base=(gdr->index << 3); cache_addw(0x058b+(rm_base<<8)); cache_addd((Bit32u)data); GenReg * gsr=FindDynReg(dsr); cache_addd((0x8d|(rm_base+0x44)<<8)|((rm_base+gsr->index)<<16)|0x00<<24); ddr->flags|=DYNFLG_CHANGED; } //7 vs 8 total cache_add, 2 vs 2-3 compares, 1-2 vs 3-4 cache_add parsed, +var mov,or static void gen_imul_word_imm(bool dword,DynReg * dr1,DynReg * dr2,Bits imm) { GenReg * gr1=FindDynReg(dr1);GenReg * gr2=FindDynReg(dr2); bool imm_byte=(imm>=-128 && imm<=127); Bit16u index=(0xc0+(gr1->index<<3)+gr2->index)<<8; if (!dword) { if (imm_byte) { cache_addd((0x66|(0x6b|index)<<8)|imm<<24); } else { cache_addb(0x66); cache_addd((0x69|index)|imm<<16); } } else { if (imm_byte) { cache_addw(0x6b|index); cache_addb(imm); } else { cache_addw(0x69|index); cache_addd(imm); } } dr1->flags|=DYNFLG_CHANGED; } //2 vs 2 total cache_add, 1 vs 1-2 cache_add parsed static void gen_cbw(bool dword,DynReg * dyn_ax) { ForceDynReg(x86gen.regs[X86_REG_EAX],dyn_ax); if (!dword) cache_addw(0x9866); else cache_addb(0x98); dyn_ax->flags|=DYNFLG_CHANGED; } //1 vs 2 total cache_add, 1 vs 1 cache_add parsed, +var mov static void gen_mul_byte(bool imul,DynReg * dyn_ax,DynReg * dr1,Bit8u di1) { ForceDynReg(x86gen.regs[X86_REG_EAX],dyn_ax); GenReg * gr1=FindDynReg(dr1); Bit16u tmp; if (imul) tmp=0xe8f6; else tmp=0xe0f6; cache_addw(tmp+((gr1->index+di1)<<8)); dyn_ax->flags|=DYNFLG_CHANGED; } //2 vs 3 total cache_add,1-2 vs 1-2 cache_add parsed,+var mov static void gen_mul_word(bool imul,DynReg * dyn_ax,DynReg * dyn_dx,bool dword,DynReg * dr1) { ForceDynReg(x86gen.regs[X86_REG_EAX],dyn_ax); ForceDynReg(x86gen.regs[X86_REG_EDX],dyn_dx); GenReg * gr1=FindDynReg(dr1); Bit16u tmp; if (!dword) cache_addb(0x66); if (imul) tmp=0xe8f7; else tmp=0xe0f7; cache_addw(tmp+(gr1->index<<8)); dyn_ax->flags|=DYNFLG_CHANGED; dyn_dx->flags|=DYNFLG_CHANGED; } //2 vs 5 total cache_add,1-2 vs 3-4 cache_add parsed,+var mov,or,shift static void gen_dshift_imm(bool dword,bool left,DynReg * dr1,DynReg * dr2,Bitu imm) { GenReg * gr1=FindDynReg(dr1); GenReg * gr2=FindDynReg(dr2); Bit16u tmp; if (!dword) cache_addb(0x66); if (left) tmp=0xa40f; //SHLD IMM else tmp=0xac0f; //SHRD IMM cache_addd(tmp|(0xc0+gr1->index+(gr2->index<<3))<<16|imm<<24); dr1->flags|=DYNFLG_CHANGED; } //3 vs 4 total cache_add,1-2 vs 2-3 cache_add parsed, +var mov static void gen_dshift_cl(bool dword,bool left,DynReg * dr1,DynReg * dr2,DynReg * drecx) { ForceDynReg(x86gen.regs[X86_REG_ECX],drecx); GenReg * gr1=FindDynReg(dr1); GenReg * gr2=FindDynReg(dr2); Bit16u tmp; if (left) tmp=0xa50f; //SHLD CL else tmp=0xad0f; //SHRD CL if (!dword) cache_addd((0x66|tmp<<8)|(0xc0+gr1->index+(gr2->index<<3))<<24); else { cache_addw(tmp); cache_addb(0xc0+gr1->index+(gr2->index<<3)); } dr1->flags|=DYNFLG_CHANGED; } reduced 11 total, reduced 2 parsed static void gen_call_function(void * func,char const* ops,...) { . . if (*scan++=='d') cache_addb(0x50+genreg->index); //Push reg else { Bit16u movzx_eax_reg; Bit8u index=0xc0; switch (*scan++) { case 'w': movzx_eax_reg=0xb70f; //MOVZX EAX,reg|index|Push EAX break; case 'h': index+=4; case 'l': movzx_eax_reg=0xb60f; //MOVZX EAX,reg[0/1]|index|Push EAX break; case 'r': /* release the reg afterwards */ release=true; goto scanagain; default: IllegalOption("gen_call_function param:DREG"); } cache_addd((movzx_eax_reg|(index+genreg->index)<<16)|(0x50<<24)); } if (release) gen_releasereg(dynreg); } . . /* Save the return value in correct register */ if (retparam) { DynReg * dynreg=(DynReg *)retparam->value; GenReg * genreg=FindDynReg(dynreg); Bit16u mov_reg_eax=0xc08b; Bit8u index=genreg->index; if (index) { // test for (e)ax/al switch (*retparam->line) { case 'w': cache_addb(0x66); case 'd': break; case 'h': index+=4; case 'l': mov_reg_eax--; break; default: goto skip2; } cache_addw(mov_reg_eax+((index) <<(8+3))); //mov reg,eax skip2: } dynreg->flags|=DYNFLG_CHANGED; } //3 parsed saved static void gen_return_fast(BlockReturn retcode,bool ret_exception=false) { if (GCC_UNLIKELY(x86gen.flagsactive)) IllegalOption("gen_return_fast"); cache_addw(0x0d8b); //MOV ECX, the flags cache_addd((Bit32u)&cpu_regs.flags); if (!ret_exception) { if (retcode==0) { //cache_addw(0xc483); //ADD ESP,4 //cache_addb(0x4); //cache_addw(0xc033); //MOV EAX, 0 cache_addb(0x83); cache_addd(0xc03304c4); } else { cache_addd(0x0804c483); //ADD ESP,4, MOV EAX, retcode cache_addd(retcode); } } cache_addb(0xc3); //RET }