[dynarmic] configurable JIT state ABI pointer

This commit is contained in:
lizzie 2025-07-27 19:12:51 +01:00 committed by crueter
parent c896897d40
commit b12989c077
15 changed files with 209 additions and 218 deletions

View file

@ -47,6 +47,7 @@ constexpr std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
constexpr inline Xbyak::Reg ABI_JIT_REG = Xbyak::util::rbx;
#ifdef _WIN32
// Microsoft x64 ABI

View file

@ -79,7 +79,7 @@ contain a prediction with the same `UniqueHash`.
? u64(unique_hash_to_code_ptr[imm64])
: u64(code->GetReturnFromRunCodeAddress());
code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]);
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
code->add(index_reg, 1);
code->and_(index_reg, u32(JitState::RSBSize - 1));
@ -91,13 +91,13 @@ contain a prediction with the same `UniqueHash`.
Xbyak::Label label;
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->je(label, code->T_SHORT);
}
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg);
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
code->L(label);
}
@ -122,14 +122,14 @@ To check if a predicition is in the RSB, we linearly scan the RSB.
// This calculation has to match up with IREmitter::PushRSB
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
code->shl(rcx, 32);
code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]);
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
code->or_(rbx, rcx);
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
}
code->jmp(rax);

View file

@ -44,21 +44,21 @@ namespace Dynarmic::Backend::X64 {
using namespace Xbyak::util;
static Xbyak::Address MJitStateReg(A32::Reg reg) {
return dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
}
static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) {
if (A32::IsSingleExtReg(reg)) {
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::S0);
return dword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u32) * index];
return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u32) * index];
}
if (A32::IsDoubleExtReg(reg)) {
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::D0);
return qword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u64) * index];
return qword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u64) * index];
}
if (A32::IsQuadExtReg(reg)) {
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::Q0);
return xword[r15 + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index];
return xword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index];
}
ASSERT_FALSE("Should never happen.");
}
@ -220,7 +220,7 @@ void A32EmitX64::GenTerminalHandlers() {
// PC ends up in ebp, location_descriptor ends up in rbx
const auto calculate_location_descriptor = [this] {
// This calculation has to match up with IREmitter::PushRSB
code.mov(ebx, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]);
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
code.shl(rbx, 32);
code.mov(ecx, MJitStateReg(A32::Reg::PC));
code.mov(ebp, ecx);
@ -232,17 +232,17 @@ void A32EmitX64::GenTerminalHandlers() {
code.align();
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
calculate_location_descriptor();
code.mov(eax, dword[r15 + offsetof(A32JitState, rsb_ptr)]);
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)]);
code.dec(eax);
code.and_(eax, u32(A32JitState::RSBPtrMask));
code.mov(dword[r15 + offsetof(A32JitState, rsb_ptr)], eax);
code.cmp(rbx, qword[r15 + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax);
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
if (conf.HasOptimization(OptimizationFlag::FastDispatch)) {
code.jne(rsb_cache_miss);
} else {
code.jne(code.GetReturnFromRunCodeAddress());
}
code.mov(rax, qword[r15 + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]);
code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]);
code.jmp(rax);
PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a32_terminal_handler_pop_rsb_hint");
@ -392,17 +392,17 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
// so we load them both at the same time with one 64-bit read. This allows us to
// extract all of their bits together at once with one pext.
static_assert(offsetof(A32JitState, upper_location_descriptor) + 4 == offsetof(A32JitState, cpsr_ge));
code.mov(result.cvt64(), qword[r15 + offsetof(A32JitState, upper_location_descriptor)]);
code.mov(result.cvt64(), qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
code.mov(tmp.cvt64(), 0x80808080'00000003ull);
code.pext(result.cvt64(), result.cvt64(), tmp.cvt64());
code.mov(tmp, 0x000f0220);
code.pdep(result, result, tmp);
} else {
code.mov(result, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]);
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
code.imul(result, result, 0x120);
code.and_(result, 0x00000220);
code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_ge)]);
code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
code.and_(tmp, 0x80808080);
code.imul(tmp, tmp, 0x00204081);
code.shr(tmp, 12);
@ -410,11 +410,11 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
code.or_(result, tmp);
}
code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_q)]);
code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
code.shl(tmp, 27);
code.or_(result, tmp);
code.mov(tmp2, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
code.mov(tmp2, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]);
if (code.HasHostFeature(HostFeature::FastBMI2)) {
code.mov(tmp, NZCV::x64_mask);
code.pext(tmp2, tmp2, tmp);
@ -426,7 +426,7 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
}
code.or_(result, tmp2);
code.or_(result, dword[r15 + offsetof(A32JitState, cpsr_jaifm)]);
code.or_(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)]);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -444,7 +444,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
// cpsr_q
code.bt(cpsr, 27);
code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]);
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
// cpsr_nzcv
code.mov(tmp, cpsr);
@ -456,12 +456,12 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
code.imul(tmp, tmp, NZCV::to_x64_multiplier);
code.and_(tmp, NZCV::x64_mask);
}
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], tmp);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], tmp);
// cpsr_jaifm
code.mov(tmp, cpsr);
code.and_(tmp, 0x010001DF);
code.mov(dword[r15 + offsetof(A32JitState, cpsr_jaifm)], tmp);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)], tmp);
if (code.HasHostFeature(HostFeature::FastBMI2)) {
// cpsr_et and cpsr_ge
@ -469,7 +469,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
// This mask is 0x7FFF0000, because we do not want the MSB to be sign extended to the upper dword.
static_assert((A32::LocationDescriptor::FPSCR_MODE_MASK & ~0x7FFF0000) == 0);
code.and_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000));
code.and_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000));
code.mov(tmp, 0x000f0220);
code.pext(cpsr, cpsr, tmp);
code.mov(tmp.cvt64(), 0x01010101'00000003ull);
@ -479,14 +479,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(tmp2.cvt64(), tmp.cvt64());
code.sub(tmp.cvt64(), cpsr.cvt64());
code.xor_(tmp.cvt64(), tmp2.cvt64());
code.or_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64());
code.or_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64());
} else {
code.and_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000));
code.and_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000));
code.mov(tmp, cpsr);
code.and_(tmp, 0x00000220);
code.imul(tmp, tmp, 0x00900000);
code.shr(tmp, 28);
code.or_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp);
code.or_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp);
code.and_(cpsr, 0x000f0000);
code.shr(cpsr, 16);
@ -495,14 +495,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(tmp, 0x80808080);
code.sub(tmp, cpsr);
code.xor_(tmp, 0x80808080);
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], tmp);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], tmp);
}
}
void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], to_store);
}
void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
@ -510,7 +510,7 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
if (args[0].IsImmediate()) {
const u32 imm = args[0].GetImmediateU32();
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
@ -518,14 +518,14 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
code.shr(a, 28);
code.mov(b, NZCV::x64_mask);
code.pdep(a, a, b);
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
} else {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
code.shr(a, 28);
code.imul(a, a, NZCV::to_x64_multiplier);
code.and_(a, NZCV::x64_mask);
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
}
}
@ -534,25 +534,25 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
if (args[0].IsImmediate()) {
const u32 imm = args[0].GetImmediateU32();
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
code.shr(a, 28);
code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]);
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
code.mov(b, NZCV::x64_mask);
code.pdep(a, a, b);
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
} else {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
code.shr(a, 28);
code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]);
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
code.imul(a, a, NZCV::to_x64_multiplier);
code.and_(a, NZCV::x64_mask);
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
}
}
@ -562,10 +562,10 @@ void A32EmitX64::EmitA32SetCpsrNZ(A32EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
code.movzx(tmp, code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1]);
code.movzx(tmp, code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1]);
code.and_(tmp, 1);
code.or_(tmp, nz);
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8());
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8());
}
void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
@ -575,11 +575,11 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
if (args[1].IsImmediate()) {
const bool c = args[1].GetImmediateU1();
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
} else {
const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(args[1]).cvt8();
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
}
} else {
const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
@ -588,19 +588,19 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
const bool c = args[1].GetImmediateU1();
code.or_(nz, c);
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
} else {
const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(args[1]).cvt32();
code.or_(nz, c);
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
}
}
}
static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]);
if (flag_bit != 0) {
code.shr(result, static_cast<int>(flag_bit));
}
@ -616,18 +616,18 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code.mov(dword[r15 + offsetof(A32JitState, cpsr_q)], 1);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], 1);
}
} else {
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
code.or_(code.byte[r15 + offsetof(A32JitState, cpsr_q)], to_store);
code.or_(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], to_store);
}
}
void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.movd(result, dword[r15 + offsetof(A32JitState, cpsr_ge)]);
code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -637,10 +637,10 @@ void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
if (args[0].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
code.movd(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store);
code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
} else {
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32();
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
}
}
@ -654,7 +654,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
ge |= mcl::bit::get_bit<17>(imm) ? 0x0000FF00 : 0;
ge |= mcl::bit::get_bit<16>(imm) ? 0x000000FF : 0;
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], ge);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], ge);
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
@ -663,7 +663,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
code.shr(a, 16);
code.pdep(a, a, b);
code.imul(a, a, 0xFF);
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a);
} else {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
@ -672,7 +672,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
code.imul(a, a, 0x00204081);
code.and_(a, 0x01010101);
code.imul(a, a, 0xFF);
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a);
}
}
@ -716,7 +716,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
const u32 new_upper = upper_without_t | (mcl::bit::get_bit<0>(new_pc) ? 1 : 0);
code.mov(MJitStateReg(A32::Reg::PC), new_pc & mask);
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
} else {
const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32();
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
@ -728,7 +728,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code.and_(new_pc, mask);
code.mov(MJitStateReg(A32::Reg::PC), new_pc);
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
}
}
@ -819,7 +819,7 @@ void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
code.mov(result, dword[r15 + offsetof(A32JitState, fpsr_nzcv)]);
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)]);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -833,7 +833,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(tmp, NZCV::x64_mask);
code.pext(tmp, value, tmp);
code.shl(tmp, 28);
code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], tmp);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], tmp);
return;
}
@ -843,7 +843,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
code.and_(value, NZCV::x64_mask);
code.imul(value, value, NZCV::from_x64_multiplier);
code.and_(value, NZCV::arm_mask);
code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], value);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], value);
}
static void EmitCoprocessorException() {
@ -1155,7 +1155,7 @@ void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat
}();
if (old_upper != new_upper) {
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
}
}
@ -1175,7 +1175,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
EmitPatchJg(terminal.next);
}
} else {
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0);
patch_information[terminal.next].jz.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
@ -1240,7 +1240,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr
}
void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0);
code.jne(code.GetForceReturnFromRunCodeAddress());
EmitTerminal(terminal.else_, initial_location, is_single_step);
}

View file

@ -168,7 +168,7 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
}
void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) {
code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0));
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, exclusive_state)], u8(0));
}
void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) {
@ -244,14 +244,14 @@ void A32EmitX64::EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak
const A32::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
code.test(dword[r15 + offsetof(A32JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
code.test(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
if (end) {
code.jz(*end, code.T_NEAR);
} else {
code.jz(skip, code.T_NEAR);
}
EmitSetUpperLocationDescriptor(current_location, ctx.Location());
code.mov(dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC());
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC());
code.ForceReturnFromRunCode();
code.L(skip);
}

View file

@ -192,10 +192,10 @@ void A64EmitX64::GenTerminalHandlers() {
const auto calculate_location_descriptor = [this] {
// This calculation has to match up with A64::LocationDescriptor::UniqueHash
// TODO: Optimization is available here based on known state of fpcr.
code.mov(rbp, qword[r15 + offsetof(A64JitState, pc)]);
code.mov(rbp, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]);
code.mov(rcx, A64::LocationDescriptor::pc_mask);
code.and_(rcx, rbp);
code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]);
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
code.and_(ebx, A64::LocationDescriptor::fpcr_mask);
code.shl(rbx, A64::LocationDescriptor::fpcr_shift);
code.or_(rbx, rcx);
@ -207,17 +207,17 @@ void A64EmitX64::GenTerminalHandlers() {
code.align();
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
calculate_location_descriptor();
code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]);
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)]);
code.dec(eax);
code.and_(eax, u32(A64JitState::RSBPtrMask));
code.mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax);
code.cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)], eax);
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
if (conf.HasOptimization(OptimizationFlag::FastDispatch)) {
code.jne(rsb_cache_miss, code.T_NEAR);
} else {
code.jne(code.GetReturnFromRunCodeAddress());
}
code.mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]);
code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]);
code.jmp(rax);
PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a64_terminal_handler_pop_rsb_hint");
@ -272,7 +272,7 @@ void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
code.mov(result, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]);
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
code.shr(result, NZCV::x64_c_flag_bit);
code.and_(result, 1);
ctx.reg_alloc.DefineValue(inst, result);
@ -281,7 +281,7 @@ void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32();
code.mov(nzcv_raw, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]);
code.mov(nzcv_raw, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
@ -310,20 +310,20 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
code.imul(nzcv_raw, nzcv_raw, NZCV::to_x64_multiplier);
code.and_(nzcv_raw, NZCV::x64_mask);
}
code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw);
}
void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], to_store);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], to_store);
}
void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
code.mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -331,13 +331,13 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
code.mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
ctx.reg_alloc.DefineValue(inst, result);
}
void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.movd(result, addr);
@ -346,7 +346,7 @@ void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.movq(result, addr);
@ -355,7 +355,7 @@ void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.movaps(result, addr);
@ -364,13 +364,13 @@ void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
code.mov(result, qword[r15 + offsetof(A64JitState, sp)]);
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]);
ctx.reg_alloc.DefineValue(inst, result);
}
void A64EmitX64::EmitA64GetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
code.mov(result, dword[r15 + offsetof(A64JitState, fpcr)]);
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -388,7 +388,7 @@ void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
if (args[1].FitsInImmediateS32()) {
code.mov(addr, args[1].GetImmediateS32());
} else {
@ -402,7 +402,7 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
if (args[1].FitsInImmediateS32()) {
code.mov(addr, args[1].GetImmediateS32());
} else if (args[1].IsInXmm()) {
@ -417,7 +417,7 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
@ -430,7 +430,7 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
code.movq(to_store, to_store); // TODO: Remove when able
@ -440,7 +440,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
code.movaps(addr, to_store);
@ -448,7 +448,7 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto addr = qword[r15 + offsetof(A64JitState, sp)];
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)];
if (args[0].FitsInImmediateS32()) {
code.mov(addr, args[0].GetImmediateS32());
} else if (args[0].IsInXmm()) {
@ -486,7 +486,7 @@ void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto addr = qword[r15 + offsetof(A64JitState, pc)];
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)];
if (args[0].FitsInImmediateS32()) {
code.mov(addr, args[0].GetImmediateS32());
} else if (args[0].IsInXmm()) {
@ -507,7 +507,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
code.mov(param[0], imm);
});
// The kernel would have to execute ERET to get here, which would clear exclusive state.
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0));
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0));
}
void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
@ -621,7 +621,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc
code.SwitchMxcsrOnExit();
Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, [&](RegList param) {
code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]);
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], param[0]);
code.mov(param[1].cvt32(), terminal.num_instructions);
});
code.ReturnFromRunCode(true); // TODO: Check cycles
@ -635,7 +635,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
// Used for patches and linking
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
} else {
if (conf.enable_cycle_counting) {
@ -647,7 +647,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
EmitPatchJg(terminal.next);
}
} else {
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0);
patch_information[terminal.next].jz.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
@ -656,7 +656,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
}
}
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode();
}
}
@ -664,7 +664,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
} else {
patch_information[terminal.next].jmp.push_back(code.getCurr());
@ -719,7 +719,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr
}
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0);
code.jne(code.GetForceReturnFromRunCodeAddress());
EmitTerminal(terminal.else_, initial_location, is_single_step);
}
@ -730,7 +730,7 @@ void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr
code.jg(target_code_ptr);
} else {
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.jg(code.GetReturnFromRunCodeAddress());
}
code.EnsurePatchLocationSize(patch_location, 23);
@ -742,7 +742,7 @@ void A64EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr
code.jz(target_code_ptr);
} else {
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.jz(code.GetReturnFromRunCodeAddress());
}
code.EnsurePatchLocationSize(patch_location, 23);
@ -754,7 +754,7 @@ void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr
code.jmp(target_code_ptr);
} else {
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.jmp(code.GetReturnFromRunCodeAddress());
}
code.EnsurePatchLocationSize(patch_location, 22);

View file

@ -324,7 +324,7 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
}
void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) {
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0));
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0));
}
void A64EmitX64::EmitA64ExclusiveReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
@ -416,14 +416,14 @@ void A64EmitX64::EmitCheckMemoryAbort(A64EmitContext&, IR::Inst* inst, Xbyak::La
const A64::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
code.test(dword[r15 + offsetof(A64JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
code.test(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
if (end) {
code.jz(*end, code.T_NEAR);
} else {
code.jz(skip, code.T_NEAR);
}
code.mov(rax, current_location.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode();
code.L(skip);
}

View file

@ -17,6 +17,7 @@ namespace Dynarmic::Backend::X64 {
class BlockOfCode;
constexpr HostLoc ABI_JIT_PTR = HostLoc::R15;
#ifdef _WIN32
constexpr HostLoc ABI_RETURN = HostLoc::RAX;

View file

@ -36,6 +36,7 @@
namespace Dynarmic::Backend::X64 {
const Xbyak::Reg64 BlockOfCode::ABI_JIT_PTR = HostLocToReg64(Dynarmic::Backend::X64::ABI_JIT_PTR);
#ifdef _WIN32
const Xbyak::Reg64 BlockOfCode::ABI_RETURN = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN);
const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM1);
@ -322,7 +323,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
// that the stack is appropriately aligned for CALLs.
ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
mov(r15, ABI_PARAM1);
mov(ABI_JIT_PTR, ABI_PARAM1);
mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
if (cb.enable_cycle_counting) {
@ -335,7 +336,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
// r13 = fastmem pointer
rcp(*this);
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
SwitchMxcsrOnEntry();
@ -346,7 +347,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
mov(r15, ABI_PARAM1);
mov(ABI_JIT_PTR, ABI_PARAM1);
if (cb.enable_cycle_counting) {
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1);
@ -355,10 +356,10 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
rcp(*this);
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
lock();
or_(dword[r15 + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
SwitchMxcsrOnEntry();
jmp(ABI_PARAM2);
@ -368,7 +369,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
align();
return_from_run_code[0] = getCurr<const void*>();
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
jne(return_to_caller);
if (cb.enable_cycle_counting) {
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
@ -380,7 +381,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
align();
return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr<const void*>();
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited);
if (cb.enable_cycle_counting) {
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
@ -409,7 +410,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
xor_(eax, eax);
lock();
xchg(dword[r15 + jsi.offsetof_halt_reason], eax);
xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax);
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
ret();
@ -419,22 +420,22 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
void BlockOfCode::SwitchMxcsrOnEntry() {
stmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]);
ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
}
void BlockOfCode::SwitchMxcsrOnExit() {
stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
ldmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]);
}
void BlockOfCode::EnterStandardASIMD() {
stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
ldmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]);
stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]);
}
void BlockOfCode::LeaveStandardASIMD() {
stmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]);
ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]);
ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
}
void BlockOfCode::UpdateTicks() {

View file

@ -155,6 +155,7 @@ public:
void SetCodePtr(CodePtr code_ptr);
void EnsurePatchLocationSize(CodePtr begin, size_t size);
static const Xbyak::Reg64 ABI_JIT_PTR;
// ABI registers
#ifdef _WIN32
static const Xbyak::Reg64 ABI_RETURN;

View file

@ -91,18 +91,18 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I
? iter->second.entrypoint
: code.GetReturnFromRunCodeAddress();
code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]);
code.mov(index_reg.cvt32(), dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr]);
code.mov(loc_desc_reg, target.Value());
patch_information[target].mov_rcx.push_back(code.getCurr());
EmitPatchMovRcx(target_code_ptr);
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
// Byte size hack
DEBUG_ASSERT(code.GetJitStateInfo().rsb_ptr_mask <= 0xFF);
code.add(index_reg.cvt32(), 1); //flags trashed, 1 single byte, haswell doesn't care
code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask)); //trashes flags
// Results ready and sort by least needed: give OOO some break
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
code.mov(dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
}
void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) {
@ -284,7 +284,7 @@ void EmitX64::EmitAddCycles(size_t cycles) {
Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
Xbyak::Label pass;
code.mov(eax, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
code.mov(eax, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
code.LoadRequiredFlagsForCondFromRax(cond);

View file

@ -18,24 +18,20 @@ namespace CRC32 = Common::Crypto::CRC32;
static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::SSE42)) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size);
if (data_size != 64) {
code.crc32(crc, value);
} else {
code.crc32(crc.cvt64(), value);
}
ctx.reg_alloc.DefineValue(inst, crc);
return;
} else {
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
code.mov(code.ABI_PARAM3.cvt32(), data_size / CHAR_BIT); //zext
code.CallFunction(&CRC32::ComputeCRC32Castagnoli);
}
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
code.CallFunction(&CRC32::ComputeCRC32Castagnoli);
}
static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
@ -69,10 +65,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
return;
}
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
@ -90,10 +83,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
return;
}
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
@ -111,12 +101,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
return;
} else {
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
code.CallFunction(&CRC32::ComputeCRC32ISO);
}
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
code.CallFunction(&CRC32::ComputeCRC32ISO);
}
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {

View file

@ -143,7 +143,7 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst*
const Xbyak::Reg then_ = ctx.reg_alloc.UseGpr(args[1]).changeBit(bitsize);
const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize);
code.mov(nzcv, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
code.mov(nzcv, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
code.LoadRequiredFlagsForCondFromRax(args[0].GetImmediateCond());

View file

@ -227,7 +227,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, {}, args[1]);
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
if (ordered) {
code.mfence();
@ -245,7 +245,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
@ -285,9 +285,9 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
Xbyak::Label end;
code.mov(code.ABI_RETURN, u32(1));
code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
code.je(end);
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
if constexpr (bitsize != 128) {
using T = mcl::unsigned_integer_of_size<bitsize>;
@ -355,7 +355,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
EmitExclusiveLock(code, conf, tmp, tmp2.cvt32());
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
code.mov(qword[tmp], vaddr);
@ -439,14 +439,14 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
code.mov(status, u32(1));
code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
code.je(*end, code.T_NEAR);
code.cmp(qword[tmp], vaddr);
code.jne(*end, code.T_NEAR);
EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax);
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id)));
if constexpr (bitsize == 128) {
@ -501,7 +501,6 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
}
code.setnz(status.cvt8());
ctx.deferred_emits.emplace_back([=, this] {
code.L(*abort);
code.call(wrapped_fn);

View file

@ -46,26 +46,25 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi
code.test(vaddr, align_mask);
if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
const u32 page_align_mask = static_cast<u32>(page_size - 1) & ~align_mask;
SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel();
code.jnz(*detect_boundary, code.T_NEAR);
code.L(*resume);
ctx.deferred_emits.emplace_back([=, &code] {
code.L(*detect_boundary);
code.mov(tmp, vaddr);
code.and_(tmp, page_align_mask);
code.cmp(tmp, page_align_mask);
code.jne(*resume, code.T_NEAR);
// NOTE: We expect to fallthrough into abort code here.
});
} else {
code.jnz(abort, code.T_NEAR);
return;
}
const u32 page_align_mask = static_cast<u32>(page_size - 1) & ~align_mask;
SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel();
code.jnz(*detect_boundary, code.T_NEAR);
code.L(*resume);
ctx.deferred_emits.emplace_back([=, &code] {
code.L(*detect_boundary);
code.mov(tmp, vaddr);
code.and_(tmp, page_align_mask);
code.cmp(tmp, page_align_mask);
code.jne(*resume, code.T_NEAR);
// NOTE: We expect to fallthrough into abort code here.
});
}
template<typename EmitContext>
@ -244,29 +243,29 @@ const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::Reg
ASSERT_FALSE("Invalid bitsize");
}
return fastmem_location;
} else {
const void* fastmem_location = code.getCurr();
switch (bitsize) {
case 8:
code.movzx(Xbyak::Reg64(value_idx).cvt32(), code.byte[addr]);
break;
case 16:
code.movzx(Xbyak::Reg64(value_idx).cvt32(), word[addr]);
break;
case 32:
code.mov(Xbyak::Reg64(value_idx).cvt32(), dword[addr]);
break;
case 64:
code.mov(Xbyak::Reg64(value_idx), qword[addr]);
break;
case 128:
code.movups(Xbyak::Xmm(value_idx), xword[addr]);
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
return fastmem_location;
}
const void* fastmem_location = code.getCurr();
switch (bitsize) {
case 8:
code.movzx(Xbyak::Reg64(value_idx).cvt32(), code.byte[addr]);
break;
case 16:
code.movzx(Xbyak::Reg64(value_idx).cvt32(), word[addr]);
break;
case 32:
code.mov(Xbyak::Reg64(value_idx).cvt32(), dword[addr]);
break;
case 64:
code.mov(Xbyak::Reg64(value_idx), qword[addr]);
break;
case 128:
code.movups(Xbyak::Xmm(value_idx), xword[addr]);
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
return fastmem_location;
}
template<std::size_t bitsize>
@ -312,29 +311,29 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int
ASSERT_FALSE("Invalid bitsize");
}
return fastmem_location;
} else {
const void* fastmem_location = code.getCurr();
switch (bitsize) {
case 8:
code.mov(code.byte[addr], Xbyak::Reg64(value_idx).cvt8());
break;
case 16:
code.mov(word[addr], Xbyak::Reg64(value_idx).cvt16());
break;
case 32:
code.mov(dword[addr], Xbyak::Reg64(value_idx).cvt32());
break;
case 64:
code.mov(qword[addr], Xbyak::Reg64(value_idx));
break;
case 128:
code.movups(xword[addr], Xbyak::Xmm(value_idx));
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
return fastmem_location;
}
const void* fastmem_location = code.getCurr();
switch (bitsize) {
case 8:
code.mov(code.byte[addr], Xbyak::Reg64(value_idx).cvt8());
break;
case 16:
code.mov(word[addr], Xbyak::Reg64(value_idx).cvt16());
break;
case 32:
code.mov(dword[addr], Xbyak::Reg64(value_idx).cvt32());
break;
case 64:
code.mov(qword[addr], Xbyak::Reg64(value_idx));
break;
case 128:
code.movups(xword[addr], Xbyak::Xmm(value_idx));
break;
default:
ASSERT_FALSE("Invalid bitsize");
}
return fastmem_location;
}
template<typename UserConfig>

View file

@ -245,11 +245,11 @@ private:
HostLoc FindFreeSpill() const noexcept;
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
ASSERT(loc != HostLoc::RSP);
return hostloc_info[static_cast<size_t>(loc)];
}
inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept {
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
ASSERT(loc != HostLoc::RSP);
return hostloc_info[static_cast<size_t>(loc)];
}