diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index 3559c4fa87..fb306336cf 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -233,7 +233,7 @@ void A32EmitX64::GenTerminalHandlers() { terminal_handler_pop_rsb_hint = code.getCurr(); calculate_location_descriptor(); code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)]); - code.dec(eax); + code.sub(eax, 1); code.and_(eax, u32(A32JitState::RSBPtrMask)); code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax); code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index a25ecd4d23..1e673338a8 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -208,7 +208,7 @@ void A64EmitX64::GenTerminalHandlers() { terminal_handler_pop_rsb_hint = code.getCurr(); calculate_location_descriptor(); code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)]); - code.dec(eax); + code.sub(eax, 1); code.and_(eax, u32(A64JitState::RSBPtrMask)); code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)], eax); code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 581faedfb7..47e51acb03 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -1206,7 +1206,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i } // a > 0 && a < 0x00800000; - code.dec(tmp); + code.sub(tmp, 1); code.cmp(tmp, 0x007FFFFF); code.jb(fallback, code.T_NEAR); //within -127,128 needs_fallback = true; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index 6bea3c2c95..e1b9e54df8 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -3326,7 +3326,7 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { code.paddb(mask, mask); code.paddb(xmm_a, xmm_a); code.pblendvb(result, alternate); - code.dec(counter); + code.sub(counter, 1); code.jnz(loop); ctx.reg_alloc.DefineValue(inst, result); @@ -3370,7 +3370,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst code.paddw(mask, mask); code.paddw(xmm_a, xmm_a); code.pblendvb(result, alternate); - code.dec(counter); + code.sub(counter, 1); code.jnz(loop); ctx.reg_alloc.DefineValue(inst, result); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/hostloc.h b/externals/dynarmic/src/dynarmic/backend/x64/hostloc.h index a39826cfe0..1f0dfa80a6 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/hostloc.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/hostloc.h @@ -78,16 +78,16 @@ inline bool HostLocIsFlag(HostLoc reg) { inline HostLoc HostLocRegIdx(int idx) { ASSERT(idx >= 0 && idx <= 15); - return static_cast(idx); + return HostLoc(idx); } inline HostLoc HostLocXmmIdx(int idx) { ASSERT(idx >= 0 && idx <= 15); - return static_cast(static_cast(HostLoc::XMM0) + idx); + return HostLoc(size_t(HostLoc::XMM0) + idx); } inline HostLoc HostLocSpill(size_t i) { - return static_cast(static_cast(HostLoc::FirstSpill) + i); + return HostLoc(size_t(HostLoc::FirstSpill) + i); } inline bool HostLocIsSpill(HostLoc reg) { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 910425f8ac..f8323e7ded 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -440,10 +440,13 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector= HostLoc::R13 && *it <= HostLoc::R15) { // skip, do not touch + // Intel recommends to reuse registers as soon as they're overwritable (DO NOT SPILL) + } else if (loc_info.IsEmpty()) { + it_empty_candidate = it; + break; + // No empty registers for some reason (very evil) - just do normal LRU } else { if (loc_info.lru_counter < min_lru_counter) { - if (loc_info.IsEmpty()) - it_empty_candidate = it; // Otherwise a "quasi"-LRU min_lru_counter = loc_info.lru_counter; if (*it >= HostLoc::R8 && *it <= HostLoc::R15) { @@ -454,9 +457,6 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector(HostLoc::FirstSpill); i < hostloc_info.size(); i++) { - const auto loc = static_cast(i); - if (LocInfo(loc).IsEmpty()) { - return loc; - } +HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { + // Do not spill XMM into other XMM silly + if (!is_xmm) { + // TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY + // Intel recommends to spill GPR onto XMM registers IF POSSIBLE + for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM0); --i) + if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) + return loc; } - + // Otherwise go to stack spilling + for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i) + if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) + return loc; ASSERT_FALSE("All spill locations are full"); -} - -inline static Xbyak::RegExp SpillToOpArg_Helper1(HostLoc loc, size_t reserved_stack_space) noexcept { - ASSERT(HostLocIsSpill(loc)); - size_t i = static_cast(loc) - static_cast(HostLoc::FirstSpill); - ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations"); - return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]); -} +}; void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept { + auto const spill_to_op_arg_helper = [&](HostLoc loc, size_t reserved_stack_space) { + ASSERT(HostLocIsSpill(loc)); + size_t i = size_t(loc) - size_t(HostLoc::FirstSpill); + ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations"); + return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]); + }; + auto const spill_xmm_to_op = [&](const HostLoc loc) { + return Xbyak::util::xword[spill_to_op_arg_helper(loc, reserved_stack_space)]; + }; if (HostLocIsXMM(to) && HostLocIsXMM(from)) { MAYBE_AVX(movaps, HostLocToXmm(to), HostLocToXmm(from)); } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) { @@ -613,7 +618,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc MAYBE_AVX(movd, HostLocToReg64(to).cvt32(), HostLocToXmm(from)); } } else if (HostLocIsXMM(to) && HostLocIsSpill(from)) { - const Xbyak::Address spill_addr = SpillToOpArg(from); + const Xbyak::Address spill_addr = spill_xmm_to_op(from); ASSERT(spill_addr.getBit() >= bit_width); switch (bit_width) { case 128: @@ -631,7 +636,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc UNREACHABLE(); } } else if (HostLocIsSpill(to) && HostLocIsXMM(from)) { - const Xbyak::Address spill_addr = SpillToOpArg(to); + const Xbyak::Address spill_addr = spill_xmm_to_op(to); ASSERT(spill_addr.getBit() >= bit_width); switch (bit_width) { case 128: @@ -651,16 +656,16 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc } else if (HostLocIsGPR(to) && HostLocIsSpill(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(HostLocToReg64(to), Xbyak::util::qword[SpillToOpArg_Helper1(from, reserved_stack_space)]); + code->mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]); } else { - code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[SpillToOpArg_Helper1(from, reserved_stack_space)]); + code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]); } } else if (HostLocIsSpill(to) && HostLocIsGPR(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(Xbyak::util::qword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from)); + code->mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from)); } else { - code->mov(Xbyak::util::dword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from).cvt32()); + code->mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32()); } } else { ASSERT_FALSE("Invalid RegAlloc::EmitMove"); @@ -677,8 +682,4 @@ void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept { } } -Xbyak::Address RegAlloc::SpillToOpArg(const HostLoc loc) noexcept { - return Xbyak::util::xword[SpillToOpArg_Helper1(loc, reserved_stack_space)]; -} - } // namespace Dynarmic::Backend::X64 diff --git a/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 37adbb32e7..0637d738aa 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -243,7 +243,7 @@ private: void MoveOutOfTheWay(HostLoc reg) noexcept; void SpillRegister(HostLoc loc) noexcept; - HostLoc FindFreeSpill() const noexcept; + HostLoc FindFreeSpill(bool is_xmm) const noexcept; inline HostLocInfo& LocInfo(const HostLoc loc) noexcept { ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR); @@ -256,7 +256,6 @@ private: void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept; void EmitExchange(const HostLoc a, const HostLoc b) noexcept; - Xbyak::Address SpillToOpArg(const HostLoc loc) noexcept; //data alignas(64) boost::container::static_vector gpr_order; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp b/externals/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp index 3378786c46..b3a02005eb 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp @@ -22,7 +22,7 @@ void PrintVerboseDebuggingOutputLine(RegisterData& reg_data, HostLoc hostloc, si } else if (HostLocIsXMM(hostloc)) { return reg_data.xmms[HostLocToXmm(hostloc).getIdx()]; } else if (HostLocIsSpill(hostloc)) { - return (*reg_data.spill)[static_cast(hostloc) - static_cast(HostLoc::FirstSpill)]; + return (*reg_data.spill)[size_t(hostloc) - size_t(HostLoc::FirstSpill)]; } else { fmt::print("invalid hostloc! "); return {0, 0};