diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 67e6e63c85..8aea5db583 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -47,6 +47,7 @@ constexpr std::bitset<32> BuildRegSet(std::initializer_list regs) { constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000); +constexpr inline Xbyak::Reg ABI_JIT_REG = Xbyak::util::rbx; #ifdef _WIN32 // Microsoft x64 ABI diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index e5298cad92..6ffe41bcc6 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -79,7 +79,7 @@ contain a prediction with the same `UniqueHash`. ? u64(unique_hash_to_code_ptr[imm64]) : u64(code->GetReturnFromRunCodeAddress()); - code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]); + code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); code->add(index_reg, 1); code->and_(index_reg, u32(JitState::RSBSize - 1)); @@ -91,13 +91,13 @@ contain a prediction with the same `UniqueHash`. Xbyak::Label label; for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); code->je(label, code->T_SHORT); } - code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); + code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); code->L(label); } @@ -122,14 +122,14 @@ To check if a predicition is in the RSB, we linearly scan the RSB. // This calculation has to match up with IREmitter::PushRSB code->mov(ecx, MJitStateReg(Arm::Reg::PC)); code->shl(rcx, 32); - code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]); - code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]); + code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); code->or_(rbx, rcx); code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); + code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); } code->jmp(rax); diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index 9840dd1ecd..6d284f3086 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -44,21 +44,21 @@ namespace Dynarmic::Backend::X64 { using namespace Xbyak::util; static Xbyak::Address MJitStateReg(A32::Reg reg) { - return dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast(reg)]; + return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast(reg)]; } static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) { if (A32::IsSingleExtReg(reg)) { const size_t index = static_cast(reg) - static_cast(A32::ExtReg::S0); - return dword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u32) * index]; + return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u32) * index]; } if (A32::IsDoubleExtReg(reg)) { const size_t index = static_cast(reg) - static_cast(A32::ExtReg::D0); - return qword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u64) * index]; + return qword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u64) * index]; } if (A32::IsQuadExtReg(reg)) { const size_t index = static_cast(reg) - static_cast(A32::ExtReg::Q0); - return xword[r15 + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index]; + return xword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index]; } ASSERT_FALSE("Should never happen."); } @@ -220,7 +220,7 @@ void A32EmitX64::GenTerminalHandlers() { // PC ends up in ebp, location_descriptor ends up in rbx const auto calculate_location_descriptor = [this] { // This calculation has to match up with IREmitter::PushRSB - code.mov(ebx, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]); + code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]); code.shl(rbx, 32); code.mov(ecx, MJitStateReg(A32::Reg::PC)); code.mov(ebp, ecx); @@ -232,17 +232,17 @@ void A32EmitX64::GenTerminalHandlers() { code.align(); terminal_handler_pop_rsb_hint = code.getCurr(); calculate_location_descriptor(); - code.mov(eax, dword[r15 + offsetof(A32JitState, rsb_ptr)]); + code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)]); code.dec(eax); code.and_(eax, u32(A32JitState::RSBPtrMask)); - code.mov(dword[r15 + offsetof(A32JitState, rsb_ptr)], eax); - code.cmp(rbx, qword[r15 + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax); + code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]); if (conf.HasOptimization(OptimizationFlag::FastDispatch)) { code.jne(rsb_cache_miss); } else { code.jne(code.GetReturnFromRunCodeAddress()); } - code.mov(rax, qword[r15 + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]); + code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]); code.jmp(rax); PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a32_terminal_handler_pop_rsb_hint"); @@ -392,17 +392,17 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // so we load them both at the same time with one 64-bit read. This allows us to // extract all of their bits together at once with one pext. static_assert(offsetof(A32JitState, upper_location_descriptor) + 4 == offsetof(A32JitState, cpsr_ge)); - code.mov(result.cvt64(), qword[r15 + offsetof(A32JitState, upper_location_descriptor)]); + code.mov(result.cvt64(), qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]); code.mov(tmp.cvt64(), 0x80808080'00000003ull); code.pext(result.cvt64(), result.cvt64(), tmp.cvt64()); code.mov(tmp, 0x000f0220); code.pdep(result, result, tmp); } else { - code.mov(result, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]); code.imul(result, result, 0x120); code.and_(result, 0x00000220); - code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_ge)]); + code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]); code.and_(tmp, 0x80808080); code.imul(tmp, tmp, 0x00204081); code.shr(tmp, 12); @@ -410,11 +410,11 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.or_(result, tmp); } - code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_q)]); + code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); code.shl(tmp, 27); code.or_(result, tmp); - code.mov(tmp2, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); + code.mov(tmp2, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]); if (code.HasHostFeature(HostFeature::FastBMI2)) { code.mov(tmp, NZCV::x64_mask); code.pext(tmp2, tmp2, tmp); @@ -426,7 +426,7 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { } code.or_(result, tmp2); - code.or_(result, dword[r15 + offsetof(A32JitState, cpsr_jaifm)]); + code.or_(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -444,7 +444,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // cpsr_q code.bt(cpsr, 27); - code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]); + code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); // cpsr_nzcv code.mov(tmp, cpsr); @@ -456,12 +456,12 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.imul(tmp, tmp, NZCV::to_x64_multiplier); code.and_(tmp, NZCV::x64_mask); } - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], tmp); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], tmp); // cpsr_jaifm code.mov(tmp, cpsr); code.and_(tmp, 0x010001DF); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_jaifm)], tmp); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)], tmp); if (code.HasHostFeature(HostFeature::FastBMI2)) { // cpsr_et and cpsr_ge @@ -469,7 +469,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // This mask is 0x7FFF0000, because we do not want the MSB to be sign extended to the upper dword. static_assert((A32::LocationDescriptor::FPSCR_MODE_MASK & ~0x7FFF0000) == 0); - code.and_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000)); + code.and_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000)); code.mov(tmp, 0x000f0220); code.pext(cpsr, cpsr, tmp); code.mov(tmp.cvt64(), 0x01010101'00000003ull); @@ -479,14 +479,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.mov(tmp2.cvt64(), tmp.cvt64()); code.sub(tmp.cvt64(), cpsr.cvt64()); code.xor_(tmp.cvt64(), tmp2.cvt64()); - code.or_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64()); + code.or_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64()); } else { - code.and_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000)); + code.and_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000)); code.mov(tmp, cpsr); code.and_(tmp, 0x00000220); code.imul(tmp, tmp, 0x00900000); code.shr(tmp, 28); - code.or_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp); + code.or_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp); code.and_(cpsr, 0x000f0000); code.shr(cpsr, 16); @@ -495,14 +495,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.mov(tmp, 0x80808080); code.sub(tmp, cpsr); code.xor_(tmp, 0x80808080); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], tmp); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], tmp); } } void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], to_store); } void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) { @@ -510,7 +510,7 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsImmediate()) { const u32 imm = args[0].GetImmediateU32(); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -518,14 +518,14 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) { code.shr(a, 28); code.mov(b, NZCV::x64_mask); code.pdep(a, a, b); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); } else { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code.shr(a, 28); code.imul(a, a, NZCV::to_x64_multiplier); code.and_(a, NZCV::x64_mask); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); } } @@ -534,25 +534,25 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsImmediate()) { const u32 imm = args[0].GetImmediateU32(); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); code.shr(a, 28); - code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]); + code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); code.mov(b, NZCV::x64_mask); code.pdep(a, a, b); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); } else { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code.shr(a, 28); - code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]); + code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); code.imul(a, a, NZCV::to_x64_multiplier); code.and_(a, NZCV::x64_mask); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); } } @@ -562,10 +562,10 @@ void A32EmitX64::EmitA32SetCpsrNZ(A32EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(args[0]).cvt32(); const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - code.movzx(tmp, code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1]); + code.movzx(tmp, code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1]); code.and_(tmp, 1); code.or_(tmp, nz); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8()); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8()); } void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { @@ -575,11 +575,11 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { if (args[1].IsImmediate()) { const bool c = args[1].GetImmediateU1(); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c); } else { const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(args[1]).cvt8(); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c); } } else { const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -588,19 +588,19 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { const bool c = args[1].GetImmediateU1(); code.or_(nz, c); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); } else { const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(args[1]).cvt32(); code.or_(nz, c); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); } } } static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]); if (flag_bit != 0) { code.shr(result, static_cast(flag_bit)); } @@ -616,18 +616,18 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code.mov(dword[r15 + offsetof(A32JitState, cpsr_q)], 1); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], 1); } } else { const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); - code.or_(code.byte[r15 + offsetof(A32JitState, cpsr_q)], to_store); + code.or_(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], to_store); } } void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code.movd(result, dword[r15 + offsetof(A32JitState, cpsr_ge)]); + code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -637,10 +637,10 @@ void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsInXmm()) { const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]); - code.movd(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store); + code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store); } else { const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store); } } @@ -654,7 +654,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst ge |= mcl::bit::get_bit<17>(imm) ? 0x0000FF00 : 0; ge |= mcl::bit::get_bit<16>(imm) ? 0x000000FF : 0; - code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], ge); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], ge); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -663,7 +663,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst code.shr(a, 16); code.pdep(a, a, b); code.imul(a, a, 0xFF); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a); } else { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -672,7 +672,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst code.imul(a, a, 0x00204081); code.and_(a, 0x01010101); code.imul(a, a, 0xFF); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a); } } @@ -716,7 +716,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { const u32 new_upper = upper_without_t | (mcl::bit::get_bit<0>(new_pc) ? 1 : 0); code.mov(MJitStateReg(A32::Reg::PC), new_pc & mask); - code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper); } else { const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32(); const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -728,7 +728,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC code.and_(new_pc, mask); code.mov(MJitStateReg(A32::Reg::PC), new_pc); - code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper); } } @@ -819,7 +819,7 @@ void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A32JitState, fpsr_nzcv)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -833,7 +833,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { code.mov(tmp, NZCV::x64_mask); code.pext(tmp, value, tmp); code.shl(tmp, 28); - code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], tmp); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], tmp); return; } @@ -843,7 +843,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { code.and_(value, NZCV::x64_mask); code.imul(value, value, NZCV::from_x64_multiplier); code.and_(value, NZCV::arm_mask); - code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], value); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], value); } static void EmitCoprocessorException() { @@ -1155,7 +1155,7 @@ void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat }(); if (old_upper != new_upper) { - code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper); } } @@ -1175,7 +1175,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc EmitPatchJg(terminal.next); } } else { - code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0); + code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0); patch_information[terminal.next].jz.push_back(code.getCurr()); if (const auto next_bb = GetBasicBlock(terminal.next)) { EmitPatchJz(terminal.next, next_bb->entrypoint); @@ -1240,7 +1240,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr } void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0); + code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0); code.jne(code.GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location, is_single_step); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp index f2919485be..a1fca21f47 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp @@ -168,7 +168,7 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { } void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) { - code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, exclusive_state)], u8(0)); } void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { @@ -244,14 +244,14 @@ void A32EmitX64::EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak const A32::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}}; - code.test(dword[r15 + offsetof(A32JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); + code.test(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); if (end) { code.jz(*end, code.T_NEAR); } else { code.jz(skip, code.T_NEAR); } EmitSetUpperLocationDescriptor(current_location, ctx.Location()); - code.mov(dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC()); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC()); code.ForceReturnFromRunCode(); code.L(skip); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 9a1a53330e..b091e4310c 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -192,10 +192,10 @@ void A64EmitX64::GenTerminalHandlers() { const auto calculate_location_descriptor = [this] { // This calculation has to match up with A64::LocationDescriptor::UniqueHash // TODO: Optimization is available here based on known state of fpcr. - code.mov(rbp, qword[r15 + offsetof(A64JitState, pc)]); + code.mov(rbp, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]); code.mov(rcx, A64::LocationDescriptor::pc_mask); code.and_(rcx, rbp); - code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]); + code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]); code.and_(ebx, A64::LocationDescriptor::fpcr_mask); code.shl(rbx, A64::LocationDescriptor::fpcr_shift); code.or_(rbx, rcx); @@ -207,17 +207,17 @@ void A64EmitX64::GenTerminalHandlers() { code.align(); terminal_handler_pop_rsb_hint = code.getCurr(); calculate_location_descriptor(); - code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]); + code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)]); code.dec(eax); code.and_(eax, u32(A64JitState::RSBPtrMask)); - code.mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax); - code.cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)], eax); + code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]); if (conf.HasOptimization(OptimizationFlag::FastDispatch)) { code.jne(rsb_cache_miss, code.T_NEAR); } else { code.jne(code.GetReturnFromRunCodeAddress()); } - code.mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]); + code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]); code.jmp(rax); PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a64_terminal_handler_pop_rsb_hint"); @@ -272,7 +272,7 @@ void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]); code.shr(result, NZCV::x64_c_flag_bit); code.and_(result, 1); ctx.reg_alloc.DefineValue(inst, result); @@ -281,7 +281,7 @@ void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(nzcv_raw, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]); + code.mov(nzcv_raw, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]); if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -310,20 +310,20 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { code.imul(nzcv_raw, nzcv_raw, NZCV::to_x64_multiplier); code.and_(nzcv_raw, NZCV::x64_mask); } - code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw); } void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], to_store); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], to_store); } void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) { const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -331,13 +331,13 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) { const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - code.mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); + code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); ctx.reg_alloc.DefineValue(inst, result); } void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code.movd(result, addr); @@ -346,7 +346,7 @@ void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code.movq(result, addr); @@ -355,7 +355,7 @@ void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code.movaps(result, addr); @@ -364,13 +364,13 @@ void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - code.mov(result, qword[r15 + offsetof(A64JitState, sp)]); + code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]); ctx.reg_alloc.DefineValue(inst, result); } void A64EmitX64::EmitA64GetFPCR(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A64JitState, fpcr)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -388,7 +388,7 @@ void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); - const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; if (args[1].FitsInImmediateS32()) { code.mov(addr, args[1].GetImmediateS32()); } else { @@ -402,7 +402,7 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); - const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; if (args[1].FitsInImmediateS32()) { code.mov(addr, args[1].GetImmediateS32()); } else if (args[1].IsInXmm()) { @@ -417,7 +417,7 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); @@ -430,7 +430,7 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]); code.movq(to_store, to_store); // TODO: Remove when able @@ -440,7 +440,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); code.movaps(addr, to_store); @@ -448,7 +448,7 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto addr = qword[r15 + offsetof(A64JitState, sp)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]; if (args[0].FitsInImmediateS32()) { code.mov(addr, args[0].GetImmediateS32()); } else if (args[0].IsInXmm()) { @@ -486,7 +486,7 @@ void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto addr = qword[r15 + offsetof(A64JitState, pc)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]; if (args[0].FitsInImmediateS32()) { code.mov(addr, args[0].GetImmediateS32()); } else if (args[0].IsInXmm()) { @@ -507,7 +507,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) { code.mov(param[0], imm); }); // The kernel would have to execute ERET to get here, which would clear exclusive state. - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0)); } void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) { @@ -621,7 +621,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc code.SwitchMxcsrOnExit(); Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, [&](RegList param) { code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], param[0]); code.mov(param[1].cvt32(), terminal.num_instructions); }); code.ReturnFromRunCode(true); // TODO: Check cycles @@ -635,7 +635,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc // Used for patches and linking if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.ReturnFromRunCode(); } else { if (conf.enable_cycle_counting) { @@ -647,7 +647,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc EmitPatchJg(terminal.next); } } else { - code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0); + code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0); patch_information[terminal.next].jz.push_back(code.getCurr()); if (const auto next_bb = GetBasicBlock(terminal.next)) { EmitPatchJz(terminal.next, next_bb->entrypoint); @@ -656,7 +656,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc } } code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.ForceReturnFromRunCode(); } } @@ -664,7 +664,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) { if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.ReturnFromRunCode(); } else { patch_information[terminal.next].jmp.push_back(code.getCurr()); @@ -719,7 +719,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr } void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0); + code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0); code.jne(code.GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location, is_single_step); } @@ -730,7 +730,7 @@ void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr code.jg(target_code_ptr); } else { code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.jg(code.GetReturnFromRunCodeAddress()); } code.EnsurePatchLocationSize(patch_location, 23); @@ -742,7 +742,7 @@ void A64EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr code.jz(target_code_ptr); } else { code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.jz(code.GetReturnFromRunCodeAddress()); } code.EnsurePatchLocationSize(patch_location, 23); @@ -754,7 +754,7 @@ void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr code.jmp(target_code_ptr); } else { code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.jmp(code.GetReturnFromRunCodeAddress()); } code.EnsurePatchLocationSize(patch_location, 22); diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp index fe7dfa011f..8fd6777542 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp @@ -324,7 +324,7 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { } void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) { - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0)); } void A64EmitX64::EmitA64ExclusiveReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { @@ -416,14 +416,14 @@ void A64EmitX64::EmitCheckMemoryAbort(A64EmitContext&, IR::Inst* inst, Xbyak::La const A64::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}}; - code.test(dword[r15 + offsetof(A64JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); + code.test(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); if (end) { code.jz(*end, code.T_NEAR); } else { code.jz(skip, code.T_NEAR); } code.mov(rax, current_location.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.ForceReturnFromRunCode(); code.L(skip); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.h b/src/dynarmic/src/dynarmic/backend/x64/abi.h index 32f2bdac67..307817a864 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.h +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.h @@ -17,6 +17,7 @@ namespace Dynarmic::Backend::X64 { class BlockOfCode; +constexpr HostLoc ABI_JIT_PTR = HostLoc::R15; #ifdef _WIN32 constexpr HostLoc ABI_RETURN = HostLoc::RAX; diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index 3116d0aeb1..dbd56de79b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -36,6 +36,7 @@ namespace Dynarmic::Backend::X64 { +const Xbyak::Reg64 BlockOfCode::ABI_JIT_PTR = HostLocToReg64(Dynarmic::Backend::X64::ABI_JIT_PTR); #ifdef _WIN32 const Xbyak::Reg64 BlockOfCode::ABI_RETURN = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN); const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM1); @@ -322,7 +323,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { // that the stack is appropriately aligned for CALLs. ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); - mov(r15, ABI_PARAM1); + mov(ABI_JIT_PTR, ABI_PARAM1); mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register if (cb.enable_cycle_counting) { @@ -335,7 +336,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { // r13 = fastmem pointer rcp(*this); - cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); SwitchMxcsrOnEntry(); @@ -346,7 +347,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); - mov(r15, ABI_PARAM1); + mov(ABI_JIT_PTR, ABI_PARAM1); if (cb.enable_cycle_counting) { mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1); @@ -355,10 +356,10 @@ void BlockOfCode::GenRunCode(std::function rcp) { rcp(*this); - cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); lock(); - or_(dword[r15 + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); + or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); SwitchMxcsrOnEntry(); jmp(ABI_PARAM2); @@ -368,7 +369,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { align(); return_from_run_code[0] = getCurr(); - cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller); if (cb.enable_cycle_counting) { cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); @@ -380,7 +381,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { align(); return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr(); - cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited); if (cb.enable_cycle_counting) { cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); @@ -409,7 +410,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { xor_(eax, eax); lock(); - xchg(dword[r15 + jsi.offsetof_halt_reason], eax); + xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); ret(); @@ -419,22 +420,22 @@ void BlockOfCode::GenRunCode(std::function rcp) { void BlockOfCode::SwitchMxcsrOnEntry() { stmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]); - ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); + ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); } void BlockOfCode::SwitchMxcsrOnExit() { - stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); + stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); ldmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]); } void BlockOfCode::EnterStandardASIMD() { - stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); - ldmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]); + stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); + ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]); } void BlockOfCode::LeaveStandardASIMD() { - stmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]); - ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); + stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]); + ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); } void BlockOfCode::UpdateTicks() { diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h index 4cc8663e11..095e75336b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h @@ -155,6 +155,7 @@ public: void SetCodePtr(CodePtr code_ptr); void EnsurePatchLocationSize(CodePtr begin, size_t size); + static const Xbyak::Reg64 ABI_JIT_PTR; // ABI registers #ifdef _WIN32 static const Xbyak::Reg64 ABI_RETURN; diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 20e6144069..a13baa6a97 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -91,18 +91,18 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I ? iter->second.entrypoint : code.GetReturnFromRunCodeAddress(); - code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]); + code.mov(index_reg.cvt32(), dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr]); code.mov(loc_desc_reg, target.Value()); patch_information[target].mov_rcx.push_back(code.getCurr()); EmitPatchMovRcx(target_code_ptr); - code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg); - code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx); + code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg); + code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx); // Byte size hack DEBUG_ASSERT(code.GetJitStateInfo().rsb_ptr_mask <= 0xFF); code.add(index_reg.cvt32(), 1); //flags trashed, 1 single byte, haswell doesn't care code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask)); //trashes flags // Results ready and sort by least needed: give OOO some break - code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32()); + code.mov(dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32()); } void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) { @@ -284,7 +284,7 @@ void EmitX64::EmitAddCycles(size_t cycles) { Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { Xbyak::Label pass; - code.mov(eax, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]); + code.mov(eax, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]); code.LoadRequiredFlagsForCondFromRax(cond); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp index 842a8612ee..9d7c57cb57 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp @@ -18,24 +18,20 @@ namespace CRC32 = Common::Crypto::CRC32; static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (code.HasHostFeature(HostFeature::SSE42)) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size); - if (data_size != 64) { code.crc32(crc, value); } else { code.crc32(crc.cvt64(), value); } - ctx.reg_alloc.DefineValue(inst, crc); - return; + } else { + ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); + code.mov(code.ABI_PARAM3.cvt32(), data_size / CHAR_BIT); //zext + code.CallFunction(&CRC32::ComputeCRC32Castagnoli); } - - ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); - code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); - code.CallFunction(&CRC32::ComputeCRC32Castagnoli); } static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) { @@ -69,10 +65,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - return; - } - - if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) { + } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32(); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); @@ -90,10 +83,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - return; - } - - if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) { + } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); @@ -111,12 +101,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - return; + } else { + ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); + code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); + code.CallFunction(&CRC32::ComputeCRC32ISO); } - - ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); - code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); - code.CallFunction(&CRC32::ComputeCRC32ISO); } void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp index 71ee9de57a..515f0743d0 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp @@ -143,7 +143,7 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst* const Xbyak::Reg then_ = ctx.reg_alloc.UseGpr(args[1]).changeBit(bitsize); const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize); - code.mov(nzcv, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]); + code.mov(nzcv, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]); code.LoadRequiredFlagsForCondFromRax(args[0].GetImmediateCond()); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index d28f513988..d18e650100 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -227,7 +227,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, {}, args[1]); - code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); if (ordered) { code.mfence(); @@ -245,7 +245,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); @@ -285,9 +285,9 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { Xbyak::Label end; code.mov(code.ABI_RETURN, u32(1)); - code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.je(end); - code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); if constexpr (bitsize != 128) { using T = mcl::unsigned_integer_of_size; @@ -355,7 +355,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in EmitExclusiveLock(code, conf, tmp, tmp2.cvt32()); - code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.mov(qword[tmp], vaddr); @@ -439,14 +439,14 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.mov(status, u32(1)); - code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.je(*end, code.T_NEAR); code.cmp(qword[tmp], vaddr); code.jne(*end, code.T_NEAR); EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax); - code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); if constexpr (bitsize == 128) { @@ -501,7 +501,6 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i } code.setnz(status.cvt8()); - ctx.deferred_emits.emplace_back([=, this] { code.L(*abort); code.call(wrapped_fn); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index ebdf011559..75a47c6a80 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -46,26 +46,25 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi code.test(vaddr, align_mask); - if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { + if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { + const u32 page_align_mask = static_cast(page_size - 1) & ~align_mask; + + SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel(); + + code.jnz(*detect_boundary, code.T_NEAR); + code.L(*resume); + + ctx.deferred_emits.emplace_back([=, &code] { + code.L(*detect_boundary); + code.mov(tmp, vaddr); + code.and_(tmp, page_align_mask); + code.cmp(tmp, page_align_mask); + code.jne(*resume, code.T_NEAR); + // NOTE: We expect to fallthrough into abort code here. + }); + } else { code.jnz(abort, code.T_NEAR); - return; } - - const u32 page_align_mask = static_cast(page_size - 1) & ~align_mask; - - SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel(); - - code.jnz(*detect_boundary, code.T_NEAR); - code.L(*resume); - - ctx.deferred_emits.emplace_back([=, &code] { - code.L(*detect_boundary); - code.mov(tmp, vaddr); - code.and_(tmp, page_align_mask); - code.cmp(tmp, page_align_mask); - code.jne(*resume, code.T_NEAR); - // NOTE: We expect to fallthrough into abort code here. - }); } template @@ -244,29 +243,29 @@ const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::Reg ASSERT_FALSE("Invalid bitsize"); } return fastmem_location; + } else { + const void* fastmem_location = code.getCurr(); + switch (bitsize) { + case 8: + code.movzx(Xbyak::Reg64(value_idx).cvt32(), code.byte[addr]); + break; + case 16: + code.movzx(Xbyak::Reg64(value_idx).cvt32(), word[addr]); + break; + case 32: + code.mov(Xbyak::Reg64(value_idx).cvt32(), dword[addr]); + break; + case 64: + code.mov(Xbyak::Reg64(value_idx), qword[addr]); + break; + case 128: + code.movups(Xbyak::Xmm(value_idx), xword[addr]); + break; + default: + ASSERT_FALSE("Invalid bitsize"); + } + return fastmem_location; } - - const void* fastmem_location = code.getCurr(); - switch (bitsize) { - case 8: - code.movzx(Xbyak::Reg64(value_idx).cvt32(), code.byte[addr]); - break; - case 16: - code.movzx(Xbyak::Reg64(value_idx).cvt32(), word[addr]); - break; - case 32: - code.mov(Xbyak::Reg64(value_idx).cvt32(), dword[addr]); - break; - case 64: - code.mov(Xbyak::Reg64(value_idx), qword[addr]); - break; - case 128: - code.movups(Xbyak::Xmm(value_idx), xword[addr]); - break; - default: - ASSERT_FALSE("Invalid bitsize"); - } - return fastmem_location; } template @@ -312,29 +311,29 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int ASSERT_FALSE("Invalid bitsize"); } return fastmem_location; + } else { + const void* fastmem_location = code.getCurr(); + switch (bitsize) { + case 8: + code.mov(code.byte[addr], Xbyak::Reg64(value_idx).cvt8()); + break; + case 16: + code.mov(word[addr], Xbyak::Reg64(value_idx).cvt16()); + break; + case 32: + code.mov(dword[addr], Xbyak::Reg64(value_idx).cvt32()); + break; + case 64: + code.mov(qword[addr], Xbyak::Reg64(value_idx)); + break; + case 128: + code.movups(xword[addr], Xbyak::Xmm(value_idx)); + break; + default: + ASSERT_FALSE("Invalid bitsize"); + } + return fastmem_location; } - - const void* fastmem_location = code.getCurr(); - switch (bitsize) { - case 8: - code.mov(code.byte[addr], Xbyak::Reg64(value_idx).cvt8()); - break; - case 16: - code.mov(word[addr], Xbyak::Reg64(value_idx).cvt16()); - break; - case 32: - code.mov(dword[addr], Xbyak::Reg64(value_idx).cvt32()); - break; - case 64: - code.mov(qword[addr], Xbyak::Reg64(value_idx)); - break; - case 128: - code.movups(xword[addr], Xbyak::Xmm(value_idx)); - break; - default: - ASSERT_FALSE("Invalid bitsize"); - } - return fastmem_location; } template diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 12b6010aa8..e99e337090 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -245,11 +245,11 @@ private: HostLoc FindFreeSpill() const noexcept; inline HostLocInfo& LocInfo(const HostLoc loc) noexcept { - ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); + ASSERT(loc != HostLoc::RSP); return hostloc_info[static_cast(loc)]; } inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept { - ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); + ASSERT(loc != HostLoc::RSP); return hostloc_info[static_cast(loc)]; }