diff --git a/docs/Development.md b/docs/Development.md index c28c3aa32e..3e1ff62ae6 100644 --- a/docs/Development.md +++ b/docs/Development.md @@ -1,11 +1,11 @@ # Development -* **Windows**: [Windows Building Guide](./build/Windows.md) -* **Linux**: [Linux Building Guide](./build/Linux.md) -* **Android**: [Android Building Guide](./build/Android.md) -* **Solaris**: [Solaris Building Guide](./build/Solaris.md) -* **FreeBSD**: [FreeBSD Building Guide](./build/FreeBSD.md) -* **macOS**: [macOS Building Guide](./build/macOS.md) +* **Windows**: [Windows Building Guide](./docs/build/Windows.md) +* **Linux**: [Linux Building Guide](./docs/build/Linux.md) +* **Android**: [Android Building Guide](./docs/build/Android.md) +* **Solaris**: [Solaris Building Guide](./docs/build/Solaris.md) +* **FreeBSD**: [FreeBSD Building Guide](./docs/build/FreeBSD.md) +* **macOS**: [macOS Building Guide](./docs/build/macOS.md) # Guidelines @@ -87,7 +87,7 @@ Then type `target remote localhost:1234` and type `c` (for continue) - and then ### gdb cheatsheet -- `mo `: Monitor commands, `get info`, `get fastmem` and `get mappings` are available. Type `mo help` for more info. +- `mo `: Monitor commands, `get info`, `get fastmem` and `get mappings` are available. - `detach`: Detach from remote (i.e restarting the emulator). - `c`: Continue - `p `: Print variable, `p/x ` for hexadecimal. diff --git a/externals/libusb/libusb b/externals/libusb/libusb index 3dbfa16f0c..c060e9ce30 160000 --- a/externals/libusb/libusb +++ b/externals/libusb/libusb @@ -1 +1 @@ -Subproject commit 3dbfa16f0cd9e8ed4fec916c6c00f41c738cb8f4 +Subproject commit c060e9ce30ac2e3ffb49d94209c4dae77b6642f7 diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 8aea5db583..67e6e63c85 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -47,7 +47,6 @@ constexpr std::bitset<32> BuildRegSet(std::initializer_list regs) { constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000); -constexpr inline Xbyak::Reg ABI_JIT_REG = Xbyak::util::rbx; #ifdef _WIN32 // Microsoft x64 ABI diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 2a388d4cd9..99a80644ad 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -136,7 +136,6 @@ public: case Dynarmic::A64::Exception::SendEvent: case Dynarmic::A64::Exception::SendEventLocal: case Dynarmic::A64::Exception::Yield: - LOG_TRACE(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast(exception), pc, m_memory.Read32(pc)); return; case Dynarmic::A64::Exception::NoExecuteFault: LOG_CRITICAL(Core_ARM, "Cannot execute instruction at unmapped address {:#016x}", pc); @@ -145,10 +144,12 @@ public: default: if (m_debugger_enabled) { ReturnException(pc, InstructionBreakpoint); - } else { - m_parent.LogBacktrace(m_process); - LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast(exception), pc, m_memory.Read32(pc)); + return; } + + m_parent.LogBacktrace(m_process); + LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", + static_cast(exception), pc, m_memory.Read32(pc)); } } diff --git a/src/core/debugger/gdbstub.cpp b/src/core/debugger/gdbstub.cpp index fcb5787147..80091cc7e0 100644 --- a/src/core/debugger/gdbstub.cpp +++ b/src/core/debugger/gdbstub.cpp @@ -554,31 +554,32 @@ void GDBStub::HandleVCont(std::string_view command, std::vector& } } +constexpr std::array, 22> MemoryStateNames{{ + {"----- Free ------", Kernel::Svc::MemoryState::Free}, + {"Io ", Kernel::Svc::MemoryState::Io}, + {"Static ", Kernel::Svc::MemoryState::Static}, + {"Code ", Kernel::Svc::MemoryState::Code}, + {"CodeData ", Kernel::Svc::MemoryState::CodeData}, + {"Normal ", Kernel::Svc::MemoryState::Normal}, + {"Shared ", Kernel::Svc::MemoryState::Shared}, + {"AliasCode ", Kernel::Svc::MemoryState::AliasCode}, + {"AliasCodeData ", Kernel::Svc::MemoryState::AliasCodeData}, + {"Ipc ", Kernel::Svc::MemoryState::Ipc}, + {"Stack ", Kernel::Svc::MemoryState::Stack}, + {"ThreadLocal ", Kernel::Svc::MemoryState::ThreadLocal}, + {"Transferred ", Kernel::Svc::MemoryState::Transferred}, + {"SharedTransferred", Kernel::Svc::MemoryState::SharedTransferred}, + {"SharedCode ", Kernel::Svc::MemoryState::SharedCode}, + {"Inaccessible ", Kernel::Svc::MemoryState::Inaccessible}, + {"NonSecureIpc ", Kernel::Svc::MemoryState::NonSecureIpc}, + {"NonDeviceIpc ", Kernel::Svc::MemoryState::NonDeviceIpc}, + {"Kernel ", Kernel::Svc::MemoryState::Kernel}, + {"GeneratedCode ", Kernel::Svc::MemoryState::GeneratedCode}, + {"CodeOut ", Kernel::Svc::MemoryState::CodeOut}, + {"Coverage ", Kernel::Svc::MemoryState::Coverage}, +}}; + static constexpr const char* GetMemoryStateName(Kernel::Svc::MemoryState state) { - constexpr std::array, 22> MemoryStateNames{{ - {"----- Free ------", Kernel::Svc::MemoryState::Free}, - {"Io ", Kernel::Svc::MemoryState::Io}, - {"Static ", Kernel::Svc::MemoryState::Static}, - {"Code ", Kernel::Svc::MemoryState::Code}, - {"CodeData ", Kernel::Svc::MemoryState::CodeData}, - {"Normal ", Kernel::Svc::MemoryState::Normal}, - {"Shared ", Kernel::Svc::MemoryState::Shared}, - {"AliasCode ", Kernel::Svc::MemoryState::AliasCode}, - {"AliasCodeData ", Kernel::Svc::MemoryState::AliasCodeData}, - {"Ipc ", Kernel::Svc::MemoryState::Ipc}, - {"Stack ", Kernel::Svc::MemoryState::Stack}, - {"ThreadLocal ", Kernel::Svc::MemoryState::ThreadLocal}, - {"Transferred ", Kernel::Svc::MemoryState::Transferred}, - {"SharedTransferred", Kernel::Svc::MemoryState::SharedTransferred}, - {"SharedCode ", Kernel::Svc::MemoryState::SharedCode}, - {"Inaccessible ", Kernel::Svc::MemoryState::Inaccessible}, - {"NonSecureIpc ", Kernel::Svc::MemoryState::NonSecureIpc}, - {"NonDeviceIpc ", Kernel::Svc::MemoryState::NonDeviceIpc}, - {"Kernel ", Kernel::Svc::MemoryState::Kernel}, - {"GeneratedCode ", Kernel::Svc::MemoryState::GeneratedCode}, - {"CodeOut ", Kernel::Svc::MemoryState::CodeOut}, - {"Coverage ", Kernel::Svc::MemoryState::Coverage}, - }}; for (size_t i = 0; i < MemoryStateNames.size(); i++) { if (std::get<1>(MemoryStateNames[i]) == state) { return std::get<0>(MemoryStateNames[i]); @@ -610,7 +611,13 @@ void GDBStub::HandleRcmd(const std::vector& command) { auto* process = GetProcess(); auto& page_table = process->GetPageTable(); - if (command_str == "fastmem" || command_str == "get fastmem") { + + const char* commands = "Commands:\n" + " get fastmem\n" + " get info\n" + " get mappings\n"; + + if (command_str == "get fastmem") { if (Settings::IsFastmemEnabled()) { const auto& impl = page_table.GetImpl(); const auto region = reinterpret_cast(impl.fastmem_arena); @@ -623,7 +630,7 @@ void GDBStub::HandleRcmd(const std::vector& command) { } else { reply = "Fastmem is not enabled.\n"; } - } else if (command_str == "info" || command_str == "get info") { + } else if (command_str == "get info") { auto modules = Core::FindModules(process); reply = fmt::format("Process: {:#x} ({})\n" @@ -641,7 +648,8 @@ void GDBStub::HandleRcmd(const std::vector& command) { GetInteger(page_table.GetHeapRegionStart()), GetInteger(page_table.GetHeapRegionStart()) + page_table.GetHeapRegionSize() - 1, GetInteger(page_table.GetAliasCodeRegionStart()), - GetInteger(page_table.GetAliasCodeRegionStart()) + page_table.GetAliasCodeRegionSize() - 1, + GetInteger(page_table.GetAliasCodeRegionStart()) + page_table.GetAliasCodeRegionSize() - + 1, GetInteger(page_table.GetStackRegionStart()), GetInteger(page_table.GetStackRegionStart()) + page_table.GetStackRegionSize() - 1); @@ -649,7 +657,7 @@ void GDBStub::HandleRcmd(const std::vector& command) { reply += fmt::format(" {:#012x} - {:#012x} {}\n", vaddr, GetInteger(Core::GetModuleEnd(process, vaddr)), name); } - } else if (command_str == "mappings" || command_str == "get mappings") { + } else if (command_str == "get mappings") { reply = "Mappings:\n"; VAddr cur_addr = 0; @@ -667,11 +675,15 @@ void GDBStub::HandleRcmd(const std::vector& command) { std::numeric_limits::max()) { const char* state = GetMemoryStateName(svc_mem_info.state); const char* perm = GetMemoryPermissionString(svc_mem_info); + const char l = True(svc_mem_info.attribute & MemoryAttribute::Locked) ? 'L' : '-'; - const char i = True(svc_mem_info.attribute & MemoryAttribute::IpcLocked) ? 'I' : '-'; - const char d = True(svc_mem_info.attribute & MemoryAttribute::DeviceShared) ? 'D' : '-'; + const char i = + True(svc_mem_info.attribute & MemoryAttribute::IpcLocked) ? 'I' : '-'; + const char d = + True(svc_mem_info.attribute & MemoryAttribute::DeviceShared) ? 'D' : '-'; const char u = True(svc_mem_info.attribute & MemoryAttribute::Uncached) ? 'U' : '-'; - const char p =True(svc_mem_info.attribute & MemoryAttribute::PermissionLocked) ? 'P' : '-'; + const char p = + True(svc_mem_info.attribute & MemoryAttribute::PermissionLocked) ? 'P' : '-'; reply += fmt::format( " {:#012x} - {:#012x} {} {} {}{}{}{}{} [{}, {}]\n", svc_mem_info.base_address, @@ -686,8 +698,11 @@ void GDBStub::HandleRcmd(const std::vector& command) { cur_addr = next_address; } + } else if (command_str == "help") { + reply = commands; } else { - reply += "Commands: fastmem, info, mappings\n"; + reply = "Unknown command.\n"; + reply += commands; } std::span reply_span{reinterpret_cast(&reply.front()), reply.size()}; diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index 6ffe41bcc6..e5298cad92 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -79,7 +79,7 @@ contain a prediction with the same `UniqueHash`. ? u64(unique_hash_to_code_ptr[imm64]) : u64(code->GetReturnFromRunCodeAddress()); - code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); + code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]); code->add(index_reg, 1); code->and_(index_reg, u32(JitState::RSBSize - 1)); @@ -91,13 +91,13 @@ contain a prediction with the same `UniqueHash`. Xbyak::Label label; for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); code->je(label, code->T_SHORT); } - code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); + code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg); + code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); code->L(label); } @@ -122,14 +122,14 @@ To check if a predicition is in the RSB, we linearly scan the RSB. // This calculation has to match up with IREmitter::PushRSB code->mov(ecx, MJitStateReg(Arm::Reg::PC)); code->shl(rcx, 32); - code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); - code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); + code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]); code->or_(rbx, rcx); code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); + code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); } code->jmp(rax); diff --git a/src/dynarmic/src/dynarmic/backend/arm64/verbose_debugging_output.h b/src/dynarmic/src/dynarmic/backend/arm64/verbose_debugging_output.h index b5187f6375..84beda4057 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/verbose_debugging_output.h +++ b/src/dynarmic/src/dynarmic/backend/arm64/verbose_debugging_output.h @@ -20,7 +20,7 @@ struct Label; } // namespace oaknut namespace Dynarmic::IR { -enum class Type : u16; +enum class Type; } // namespace Dynarmic::IR namespace Dynarmic::Backend::Arm64 { diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index fb306336cf..43e0750d68 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -44,21 +44,21 @@ namespace Dynarmic::Backend::X64 { using namespace Xbyak::util; static Xbyak::Address MJitStateReg(A32::Reg reg) { - return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast(reg)]; + return dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast(reg)]; } static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) { if (A32::IsSingleExtReg(reg)) { const size_t index = static_cast(reg) - static_cast(A32::ExtReg::S0); - return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u32) * index]; + return dword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u32) * index]; } if (A32::IsDoubleExtReg(reg)) { const size_t index = static_cast(reg) - static_cast(A32::ExtReg::D0); - return qword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u64) * index]; + return qword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u64) * index]; } if (A32::IsQuadExtReg(reg)) { const size_t index = static_cast(reg) - static_cast(A32::ExtReg::Q0); - return xword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index]; + return xword[r15 + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index]; } ASSERT_FALSE("Should never happen."); } @@ -109,12 +109,12 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { const boost::container::static_vector gpr_order = [this] { boost::container::static_vector gprs{any_gpr}; - if (conf.fastmem_pointer) { - gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13)); - } if (conf.page_table) { gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); } + if (conf.fastmem_pointer) { + gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13)); + } return gprs; }(); @@ -220,7 +220,7 @@ void A32EmitX64::GenTerminalHandlers() { // PC ends up in ebp, location_descriptor ends up in rbx const auto calculate_location_descriptor = [this] { // This calculation has to match up with IREmitter::PushRSB - code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]); + code.mov(ebx, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]); code.shl(rbx, 32); code.mov(ecx, MJitStateReg(A32::Reg::PC)); code.mov(ebp, ecx); @@ -232,17 +232,17 @@ void A32EmitX64::GenTerminalHandlers() { code.align(); terminal_handler_pop_rsb_hint = code.getCurr(); calculate_location_descriptor(); - code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)]); - code.sub(eax, 1); + code.mov(eax, dword[r15 + offsetof(A32JitState, rsb_ptr)]); + code.dec(eax); code.and_(eax, u32(A32JitState::RSBPtrMask)); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax); - code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]); + code.mov(dword[r15 + offsetof(A32JitState, rsb_ptr)], eax); + code.cmp(rbx, qword[r15 + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]); if (conf.HasOptimization(OptimizationFlag::FastDispatch)) { code.jne(rsb_cache_miss); } else { code.jne(code.GetReturnFromRunCodeAddress()); } - code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]); + code.mov(rax, qword[r15 + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]); code.jmp(rax); PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a32_terminal_handler_pop_rsb_hint"); @@ -392,17 +392,17 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // so we load them both at the same time with one 64-bit read. This allows us to // extract all of their bits together at once with one pext. static_assert(offsetof(A32JitState, upper_location_descriptor) + 4 == offsetof(A32JitState, cpsr_ge)); - code.mov(result.cvt64(), qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]); + code.mov(result.cvt64(), qword[r15 + offsetof(A32JitState, upper_location_descriptor)]); code.mov(tmp.cvt64(), 0x80808080'00000003ull); code.pext(result.cvt64(), result.cvt64(), tmp.cvt64()); code.mov(tmp, 0x000f0220); code.pdep(result, result, tmp); } else { - code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]); + code.mov(result, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]); code.imul(result, result, 0x120); code.and_(result, 0x00000220); - code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]); + code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_ge)]); code.and_(tmp, 0x80808080); code.imul(tmp, tmp, 0x00204081); code.shr(tmp, 12); @@ -410,11 +410,11 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.or_(result, tmp); } - code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); + code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_q)]); code.shl(tmp, 27); code.or_(result, tmp); - code.mov(tmp2, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]); + code.mov(tmp2, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); if (code.HasHostFeature(HostFeature::FastBMI2)) { code.mov(tmp, NZCV::x64_mask); code.pext(tmp2, tmp2, tmp); @@ -426,7 +426,7 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { } code.or_(result, tmp2); - code.or_(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)]); + code.or_(result, dword[r15 + offsetof(A32JitState, cpsr_jaifm)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -444,7 +444,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // cpsr_q code.bt(cpsr, 27); - code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); + code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]); // cpsr_nzcv code.mov(tmp, cpsr); @@ -456,12 +456,12 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.imul(tmp, tmp, NZCV::to_x64_multiplier); code.and_(tmp, NZCV::x64_mask); } - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], tmp); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], tmp); // cpsr_jaifm code.mov(tmp, cpsr); code.and_(tmp, 0x010001DF); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)], tmp); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_jaifm)], tmp); if (code.HasHostFeature(HostFeature::FastBMI2)) { // cpsr_et and cpsr_ge @@ -469,7 +469,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // This mask is 0x7FFF0000, because we do not want the MSB to be sign extended to the upper dword. static_assert((A32::LocationDescriptor::FPSCR_MODE_MASK & ~0x7FFF0000) == 0); - code.and_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000)); + code.and_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000)); code.mov(tmp, 0x000f0220); code.pext(cpsr, cpsr, tmp); code.mov(tmp.cvt64(), 0x01010101'00000003ull); @@ -479,14 +479,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.mov(tmp2.cvt64(), tmp.cvt64()); code.sub(tmp.cvt64(), cpsr.cvt64()); code.xor_(tmp.cvt64(), tmp2.cvt64()); - code.or_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64()); + code.or_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64()); } else { - code.and_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000)); + code.and_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000)); code.mov(tmp, cpsr); code.and_(tmp, 0x00000220); code.imul(tmp, tmp, 0x00900000); code.shr(tmp, 28); - code.or_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp); + code.or_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp); code.and_(cpsr, 0x000f0000); code.shr(cpsr, 16); @@ -495,14 +495,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.mov(tmp, 0x80808080); code.sub(tmp, cpsr); code.xor_(tmp, 0x80808080); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], tmp); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], tmp); } } void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], to_store); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store); } void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) { @@ -510,7 +510,7 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsImmediate()) { const u32 imm = args[0].GetImmediateU32(); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -518,14 +518,14 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) { code.shr(a, 28); code.mov(b, NZCV::x64_mask); code.pdep(a, a, b); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); } else { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code.shr(a, 28); code.imul(a, a, NZCV::to_x64_multiplier); code.and_(a, NZCV::x64_mask); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); } } @@ -534,25 +534,25 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsImmediate()) { const u32 imm = args[0].GetImmediateU32(); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); + code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); code.shr(a, 28); - code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); + code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]); code.mov(b, NZCV::x64_mask); code.pdep(a, a, b); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); } else { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code.shr(a, 28); - code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); + code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]); code.imul(a, a, NZCV::to_x64_multiplier); code.and_(a, NZCV::x64_mask); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); } } @@ -562,10 +562,10 @@ void A32EmitX64::EmitA32SetCpsrNZ(A32EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(args[0]).cvt32(); const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - code.movzx(tmp, code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1]); + code.movzx(tmp, code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1]); code.and_(tmp, 1); code.or_(tmp, nz); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8()); + code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8()); } void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { @@ -575,11 +575,11 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { if (args[1].IsImmediate()) { const bool c = args[1].GetImmediateU1(); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c); + code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c); } else { const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(args[1]).cvt8(); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c); + code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c); } } else { const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -588,19 +588,19 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { const bool c = args[1].GetImmediateU1(); code.or_(nz, c); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); + code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); } else { const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(args[1]).cvt32(); code.or_(nz, c); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); + code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); } } } static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]); + code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); if (flag_bit != 0) { code.shr(result, static_cast(flag_bit)); } @@ -616,18 +616,18 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], 1); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_q)], 1); } } else { const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); - code.or_(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], to_store); + code.or_(code.byte[r15 + offsetof(A32JitState, cpsr_q)], to_store); } } void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]); + code.movd(result, dword[r15 + offsetof(A32JitState, cpsr_ge)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -637,10 +637,10 @@ void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsInXmm()) { const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]); - code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store); + code.movd(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store); } else { const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store); } } @@ -654,7 +654,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst ge |= mcl::bit::get_bit<17>(imm) ? 0x0000FF00 : 0; ge |= mcl::bit::get_bit<16>(imm) ? 0x000000FF : 0; - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], ge); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], ge); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -663,7 +663,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst code.shr(a, 16); code.pdep(a, a, b); code.imul(a, a, 0xFF); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a); } else { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -672,7 +672,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst code.imul(a, a, 0x00204081); code.and_(a, 0x01010101); code.imul(a, a, 0xFF); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a); + code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a); } } @@ -716,7 +716,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { const u32 new_upper = upper_without_t | (mcl::bit::get_bit<0>(new_pc) ? 1 : 0); code.mov(MJitStateReg(A32::Reg::PC), new_pc & mask); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper); + code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper); } else { const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32(); const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -728,7 +728,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC code.and_(new_pc, mask); code.mov(MJitStateReg(A32::Reg::PC), new_pc); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper); + code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper); } } @@ -798,9 +798,9 @@ static u32 GetFpscrImpl(A32JitState* jit_state) { void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst); - code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); + code.mov(code.ABI_PARAM1, code.r15); - code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]); + code.stmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]); code.CallFunction(&GetFpscrImpl); } @@ -811,15 +811,15 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) { void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, args[0]); - code.mov(code.ABI_PARAM2, code.ABI_JIT_PTR); + code.mov(code.ABI_PARAM2, code.r15); code.CallFunction(&SetFpscrImpl); - code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]); + code.ldmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]); } void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)]); + code.mov(result, dword[r15 + offsetof(A32JitState, fpsr_nzcv)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -833,7 +833,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { code.mov(tmp, NZCV::x64_mask); code.pext(tmp, value, tmp); code.shl(tmp, 28); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], tmp); + code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], tmp); return; } @@ -843,7 +843,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { code.and_(value, NZCV::x64_mask); code.imul(value, value, NZCV::from_x64_multiplier); code.and_(value, NZCV::arm_mask); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], value); + code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], value); } static void EmitCoprocessorException() { @@ -1155,7 +1155,7 @@ void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat }(); if (old_upper != new_upper) { - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper); + code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper); } } @@ -1165,28 +1165,32 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC()); code.ReturnFromRunCode(); - } else { - if (conf.enable_cycle_counting) { - code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); - patch_information[terminal.next].jg.push_back(code.getCurr()); - if (const auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJg(terminal.next, next_bb->entrypoint); - } else { - EmitPatchJg(terminal.next); - } - } else { - code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0); - patch_information[terminal.next].jz.push_back(code.getCurr()); - if (const auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJz(terminal.next, next_bb->entrypoint); - } else { - EmitPatchJz(terminal.next); - } - } - code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC()); - PushRSBHelper(rax, rbx, terminal.next); - code.ForceReturnFromRunCode(); + return; } + + if (conf.enable_cycle_counting) { + code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); + + patch_information[terminal.next].jg.push_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJg(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJg(terminal.next); + } + } else { + code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0); + + patch_information[terminal.next].jz.push_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJz(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJz(terminal.next); + } + } + + code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC()); + PushRSBHelper(rax, rbx, terminal.next); + code.ForceReturnFromRunCode(); } void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) { @@ -1195,13 +1199,14 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::Location if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC()); code.ReturnFromRunCode(); + return; + } + + patch_information[terminal.next].jmp.push_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJmp(terminal.next, next_bb->entrypoint); } else { - patch_information[terminal.next].jmp.push_back(code.getCurr()); - if (const auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJmp(terminal.next, next_bb->entrypoint); - } else { - EmitPatchJmp(terminal.next); - } + EmitPatchJmp(terminal.next); } } @@ -1240,7 +1245,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr } void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0); + code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0); code.jne(code.GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location, is_single_step); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp index a1fca21f47..f2919485be 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp @@ -168,7 +168,7 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { } void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) { - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, exclusive_state)], u8(0)); + code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); } void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { @@ -244,14 +244,14 @@ void A32EmitX64::EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak const A32::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}}; - code.test(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); + code.test(dword[r15 + offsetof(A32JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); if (end) { code.jz(*end, code.T_NEAR); } else { code.jz(skip, code.T_NEAR); } EmitSetUpperLocationDescriptor(current_location, ctx.Location()); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC()); + code.mov(dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC()); code.ForceReturnFromRunCode(); code.L(skip); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1e673338a8..47a2236a87 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -80,12 +80,12 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { const boost::container::static_vector gpr_order = [this] { boost::container::static_vector gprs{any_gpr}; - if (conf.fastmem_pointer) { - gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13)); - } if (conf.page_table) { gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); } + if (conf.fastmem_pointer) { + gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13)); + } return gprs; }(); @@ -192,10 +192,10 @@ void A64EmitX64::GenTerminalHandlers() { const auto calculate_location_descriptor = [this] { // This calculation has to match up with A64::LocationDescriptor::UniqueHash // TODO: Optimization is available here based on known state of fpcr. - code.mov(rbp, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]); + code.mov(rbp, qword[r15 + offsetof(A64JitState, pc)]); code.mov(rcx, A64::LocationDescriptor::pc_mask); code.and_(rcx, rbp); - code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]); + code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]); code.and_(ebx, A64::LocationDescriptor::fpcr_mask); code.shl(rbx, A64::LocationDescriptor::fpcr_shift); code.or_(rbx, rcx); @@ -207,17 +207,17 @@ void A64EmitX64::GenTerminalHandlers() { code.align(); terminal_handler_pop_rsb_hint = code.getCurr(); calculate_location_descriptor(); - code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)]); - code.sub(eax, 1); + code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]); + code.dec(eax); code.and_(eax, u32(A64JitState::RSBPtrMask)); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)], eax); - code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]); + code.mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax); + code.cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]); if (conf.HasOptimization(OptimizationFlag::FastDispatch)) { code.jne(rsb_cache_miss, code.T_NEAR); } else { code.jne(code.GetReturnFromRunCodeAddress()); } - code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]); + code.mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]); code.jmp(rax); PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a64_terminal_handler_pop_rsb_hint"); @@ -272,7 +272,7 @@ void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]); + code.mov(result, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]); code.shr(result, NZCV::x64_c_flag_bit); code.and_(result, 1); ctx.reg_alloc.DefineValue(inst, result); @@ -281,7 +281,7 @@ void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(nzcv_raw, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]); + code.mov(nzcv_raw, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]); if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -310,20 +310,20 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { code.imul(nzcv_raw, nzcv_raw, NZCV::to_x64_multiplier); code.and_(nzcv_raw, NZCV::x64_mask); } - code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw); + code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw); } void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], to_store); + code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], to_store); } void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) { const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); + code.mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -331,13 +331,13 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) { const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); + code.mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); ctx.reg_alloc.DefineValue(inst, result); } void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code.movd(result, addr); @@ -346,7 +346,7 @@ void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code.movq(result, addr); @@ -355,7 +355,7 @@ void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code.movaps(result, addr); @@ -364,13 +364,13 @@ void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]); + code.mov(result, qword[r15 + offsetof(A64JitState, sp)]); ctx.reg_alloc.DefineValue(inst, result); } void A64EmitX64::EmitA64GetFPCR(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]); + code.mov(result, dword[r15 + offsetof(A64JitState, fpcr)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -380,15 +380,15 @@ static u32 GetFPSRImpl(A64JitState* jit_state) { void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst); - code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); - code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); + code.mov(code.ABI_PARAM1, code.r15); + code.stmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); code.CallFunction(GetFPSRImpl); } void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); - const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; + const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; if (args[1].FitsInImmediateS32()) { code.mov(addr, args[1].GetImmediateS32()); } else { @@ -402,7 +402,7 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); - const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; + const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; if (args[1].FitsInImmediateS32()) { code.mov(addr, args[1].GetImmediateS32()); } else if (args[1].IsInXmm()) { @@ -417,7 +417,7 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); @@ -430,7 +430,7 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]); code.movq(to_store, to_store); // TODO: Remove when able @@ -440,7 +440,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); code.movaps(addr, to_store); @@ -448,7 +448,7 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]; + const auto addr = qword[r15 + offsetof(A64JitState, sp)]; if (args[0].FitsInImmediateS32()) { code.mov(addr, args[0].GetImmediateS32()); } else if (args[0].IsInXmm()) { @@ -467,9 +467,9 @@ static void SetFPCRImpl(A64JitState* jit_state, u32 value) { void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); - code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); + code.mov(code.ABI_PARAM1, code.r15); code.CallFunction(SetFPCRImpl); - code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); + code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); } static void SetFPSRImpl(A64JitState* jit_state, u32 value) { @@ -479,14 +479,14 @@ static void SetFPSRImpl(A64JitState* jit_state, u32 value) { void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); - code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); + code.mov(code.ABI_PARAM1, code.r15); code.CallFunction(SetFPSRImpl); - code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); + code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); } void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]; + const auto addr = qword[r15 + offsetof(A64JitState, pc)]; if (args[0].FitsInImmediateS32()) { code.mov(addr, args[0].GetImmediateS32()); } else if (args[0].IsInXmm()) { @@ -507,7 +507,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) { code.mov(param[0], imm); }); // The kernel would have to execute ERET to get here, which would clear exclusive state. - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0)); + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); } void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) { @@ -621,7 +621,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc code.SwitchMxcsrOnExit(); Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, [&](RegList param) { code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], param[0]); + code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]); code.mov(param[1].cvt32(), terminal.num_instructions); }); code.ReturnFromRunCode(true); // TODO: Check cycles @@ -632,56 +632,61 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescri } void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) { - // Used for patches and linking - if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { - if (conf.enable_cycle_counting) { - code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); - patch_information[terminal.next].jg.push_back(code.getCurr()); - if (const auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJg(terminal.next, next_bb->entrypoint); - } else { - EmitPatchJg(terminal.next); - } - } else { - code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0); - patch_information[terminal.next].jz.push_back(code.getCurr()); - if (const auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJz(terminal.next, next_bb->entrypoint); - } else { - EmitPatchJz(terminal.next); - } - } + if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); - code.ForceReturnFromRunCode(); - } else { - code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); code.ReturnFromRunCode(); + return; } + + if (conf.enable_cycle_counting) { + code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); + + patch_information[terminal.next].jg.push_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJg(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJg(terminal.next); + } + } else { + code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0); + + patch_information[terminal.next].jz.push_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJz(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJz(terminal.next); + } + } + + code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.ForceReturnFromRunCode(); } void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) { - if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { - patch_information[terminal.next].jmp.push_back(code.getCurr()); - if (auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJmp(terminal.next, next_bb->entrypoint); - } else { - EmitPatchJmp(terminal.next); - } - } else { + if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); code.ReturnFromRunCode(); + return; + } + + patch_information[terminal.next].jmp.push_back(code.getCurr()); + if (auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJmp(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJmp(terminal.next); } } void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) { - if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) { - code.jmp(terminal_handler_pop_rsb_hint); - } else { + if (!conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) || is_single_step) { code.ReturnFromRunCode(); + return; } + + code.jmp(terminal_handler_pop_rsb_hint); } void A64EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) { @@ -718,7 +723,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr } void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0); + code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0); code.jne(code.GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location, is_single_step); } @@ -729,7 +734,7 @@ void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr code.jg(target_code_ptr); } else { code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); - code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); code.jg(code.GetReturnFromRunCodeAddress()); } code.EnsurePatchLocationSize(patch_location, 23); @@ -741,7 +746,7 @@ void A64EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr code.jz(target_code_ptr); } else { code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); - code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); code.jz(code.GetReturnFromRunCodeAddress()); } code.EnsurePatchLocationSize(patch_location, 23); @@ -753,7 +758,7 @@ void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr code.jmp(target_code_ptr); } else { code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); - code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); code.jmp(code.GetReturnFromRunCodeAddress()); } code.EnsurePatchLocationSize(patch_location, 22); diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h index a1917a3594..f26723092f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h @@ -127,10 +127,10 @@ protected: BlockRangeInformation block_ranges; std::array fast_dispatch_table; ankerl::unordered_dense::map fastmem_patch_info; - ankerl::unordered_dense::map, void (*)()> read_fallbacks; - ankerl::unordered_dense::map, void (*)()> write_fallbacks; - ankerl::unordered_dense::map, void (*)()> exclusive_write_fallbacks; - ankerl::unordered_dense::set do_not_fastmem; + std::map, void (*)()> read_fallbacks; + std::map, void (*)()> write_fallbacks; + std::map, void (*)()> exclusive_write_fallbacks; + std::set do_not_fastmem; const void* terminal_handler_pop_rsb_hint = nullptr; const void* terminal_handler_fast_dispatch_hint = nullptr; FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr; diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp index 8fd6777542..fe7dfa011f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp @@ -324,7 +324,7 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { } void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) { - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0)); + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); } void A64EmitX64::EmitA64ExclusiveReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { @@ -416,14 +416,14 @@ void A64EmitX64::EmitCheckMemoryAbort(A64EmitContext&, IR::Inst* inst, Xbyak::La const A64::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}}; - code.test(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); + code.test(dword[r15 + offsetof(A64JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); if (end) { code.jz(*end, code.T_NEAR); } else { code.jz(skip, code.T_NEAR); } code.mov(rax, current_location.PC()); - code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); code.ForceReturnFromRunCode(); code.L(skip); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp index 235e2e227d..e8eaddcbac 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp @@ -119,20 +119,6 @@ void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE); } -// Windows ABI registers are not in the same allocation algorithm as unix's -#ifdef _MSC_VER -void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) { - std::vector regs; - std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); - ABI_PushRegistersAndAdjustStack(code, 0, regs); -} - -void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) { - std::vector regs; - std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); - ABI_PopRegistersAndAdjustStack(code, 0, regs); -} -#else static consteval size_t ABI_AllCallerSaveSize() noexcept { return ABI_ALL_CALLER_SAVE.max_size(); } @@ -180,14 +166,24 @@ alignas(64) static constinit std::array AB }; void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) { +#ifdef _MSC_VER + std::vector regs; + std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); + ABI_PushRegistersAndAdjustStack(code, 0, regs); +#else ASSUME(size_t(exception) < 32); ABI_PushRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]); +#endif } void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) { +#ifdef _MSC_VER + std::vector regs; + std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); + ABI_PopRegistersAndAdjustStack(code, 0, regs); +#else ASSUME(size_t(exception) < 32); ABI_PopRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]); -} #endif - +} } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.h b/src/dynarmic/src/dynarmic/backend/x64/abi.h index 307817a864..32f2bdac67 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.h +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.h @@ -17,7 +17,6 @@ namespace Dynarmic::Backend::X64 { class BlockOfCode; -constexpr HostLoc ABI_JIT_PTR = HostLoc::R15; #ifdef _WIN32 constexpr HostLoc ABI_RETURN = HostLoc::RAX; diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index 5a33ac7727..41603abf86 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -36,7 +36,6 @@ namespace Dynarmic::Backend::X64 { -const Xbyak::Reg64 BlockOfCode::ABI_JIT_PTR = HostLocToReg64(Dynarmic::Backend::X64::ABI_JIT_PTR); #ifdef _WIN32 const Xbyak::Reg64 BlockOfCode::ABI_RETURN = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN); const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM1); @@ -323,8 +322,8 @@ void BlockOfCode::GenRunCode(std::function rcp) { // that the stack is appropriately aligned for CALLs. ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); - mov(ABI_JIT_PTR, ABI_PARAM1); - mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register + mov(r15, ABI_PARAM1); + mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register if (cb.enable_cycle_counting) { cb.GetTicksRemaining->EmitCall(*this); @@ -332,11 +331,9 @@ void BlockOfCode::GenRunCode(std::function rcp) { mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN); } - // r14 = page table - // r13 = fastmem pointer rcp(*this); - cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); SwitchMxcsrOnEntry(); @@ -347,7 +344,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); - mov(ABI_JIT_PTR, ABI_PARAM1); + mov(r15, ABI_PARAM1); if (cb.enable_cycle_counting) { mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1); @@ -356,10 +353,10 @@ void BlockOfCode::GenRunCode(std::function rcp) { rcp(*this); - cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); lock(); - or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); + or_(dword[r15 + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); SwitchMxcsrOnEntry(); jmp(ABI_PARAM2); @@ -369,7 +366,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { align(); return_from_run_code[0] = getCurr(); - cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); jne(return_to_caller); if (cb.enable_cycle_counting) { cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); @@ -381,7 +378,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { align(); return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr(); - cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited); if (cb.enable_cycle_counting) { cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); @@ -410,7 +407,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { xor_(eax, eax); lock(); - xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax); + xchg(dword[r15 + jsi.offsetof_halt_reason], eax); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); ret(); @@ -420,22 +417,22 @@ void BlockOfCode::GenRunCode(std::function rcp) { void BlockOfCode::SwitchMxcsrOnEntry() { stmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]); - ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); + ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); } void BlockOfCode::SwitchMxcsrOnExit() { - stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); + stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); ldmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]); } void BlockOfCode::EnterStandardASIMD() { - stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); - ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]); + stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); + ldmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]); } void BlockOfCode::LeaveStandardASIMD() { - stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]); - ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); + stmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]); + ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); } void BlockOfCode::UpdateTicks() { diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h index 095e75336b..4cc8663e11 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h @@ -155,7 +155,6 @@ public: void SetCodePtr(CodePtr code_ptr); void EnsurePatchLocationSize(CodePtr begin, size_t size); - static const Xbyak::Reg64 ABI_JIT_PTR; // ABI registers #ifdef _WIN32 static const Xbyak::Reg64 ABI_RETURN; diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index a13baa6a97..d428199585 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -91,18 +91,19 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I ? iter->second.entrypoint : code.GetReturnFromRunCodeAddress(); - code.mov(index_reg.cvt32(), dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr]); + code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]); + code.mov(loc_desc_reg, target.Value()); + patch_information[target].mov_rcx.push_back(code.getCurr()); EmitPatchMovRcx(target_code_ptr); - code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg); - code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx); - // Byte size hack - DEBUG_ASSERT(code.GetJitStateInfo().rsb_ptr_mask <= 0xFF); - code.add(index_reg.cvt32(), 1); //flags trashed, 1 single byte, haswell doesn't care - code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask)); //trashes flags - // Results ready and sort by least needed: give OOO some break - code.mov(dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32()); + + code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg); + code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx); + + code.add(index_reg.cvt32(), 1); + code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask)); + code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32()); } void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) { @@ -118,7 +119,7 @@ void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) { code.movaps(xword[rsp + offsetof(RegisterData, xmms) + 2 * sizeof(u64) * i], Xbyak::Xmm{i}); } code.lea(rax, ptr[rsp + sizeof(RegisterData) + offsetof(StackLayout, spill)]); - code.mov(qword[rsp + offsetof(RegisterData, spill)], rax); + code.mov(xword[rsp + offsetof(RegisterData, spill)], rax); reg_alloc.EmitVerboseDebuggingOutput(); @@ -284,7 +285,7 @@ void EmitX64::EmitAddCycles(size_t cycles) { Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { Xbyak::Label pass; - code.mov(eax, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]); + code.mov(eax, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]); code.LoadRequiredFlagsForCondFromRax(cond); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp index 9d7c57cb57..842a8612ee 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp @@ -18,20 +18,24 @@ namespace CRC32 = Common::Crypto::CRC32; static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (code.HasHostFeature(HostFeature::SSE42)) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size); + if (data_size != 64) { code.crc32(crc, value); } else { code.crc32(crc.cvt64(), value); } + ctx.reg_alloc.DefineValue(inst, crc); - } else { - ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); - code.mov(code.ABI_PARAM3.cvt32(), data_size / CHAR_BIT); //zext - code.CallFunction(&CRC32::ComputeCRC32Castagnoli); + return; } + + ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); + code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); + code.CallFunction(&CRC32::ComputeCRC32Castagnoli); } static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) { @@ -65,7 +69,10 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) { + return; + } + + if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32(); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); @@ -83,7 +90,10 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) { + return; + } + + if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); @@ -101,11 +111,12 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - } else { - ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); - code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); - code.CallFunction(&CRC32::ComputeCRC32ISO); + return; } + + ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); + code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); + code.CallFunction(&CRC32::ComputeCRC32ISO); } void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp index 7e03e3dcd1..4128ef1721 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp @@ -143,7 +143,7 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst* const Xbyak::Reg then_ = ctx.reg_alloc.UseGpr(args[1]).changeBit(bitsize); const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize); - code.mov(nzcv, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]); + code.mov(nzcv, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]); code.LoadRequiredFlagsForCondFromRax(args[0].GetImmediateCond()); @@ -909,11 +909,11 @@ static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* ca } } -// AL contains flags (after LAHF + SETO sequence) static Xbyak::Reg64 DoNZCV(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* nzcv_out) { if (!nzcv_out) { return Xbyak::Reg64{-1}; } + const Xbyak::Reg64 nzcv = reg_alloc.ScratchGpr(HostLoc::RAX); code.xor_(nzcv.cvt32(), nzcv.cvt32()); return nzcv; @@ -1168,7 +1168,7 @@ void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) { code.xor_(eax, eax); code.test(divisor, divisor); - code.jz(end, code.T_NEAR); + code.jz(end); code.mov(eax, dividend); code.xor_(edx, edx); code.div(divisor); @@ -1189,7 +1189,7 @@ void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) { code.xor_(eax, eax); code.test(divisor, divisor); - code.jz(end, code.T_NEAR); + code.jz(end); code.mov(rax, dividend); code.xor_(edx, edx); code.div(divisor); @@ -1568,14 +1568,14 @@ void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { } else { const Xbyak::Reg32 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 temp = ctx.reg_alloc.ScratchGpr().cvt32(); // The result of a bsr of zero is undefined, but zf is set after it. code.bsr(result, source); - code.mov(temp, 32); - code.xor_(result, 31); - code.test(source, source); - code.cmove(result, temp); + code.mov(source, 0xFFFFFFFF); + code.cmovz(result, source); + code.neg(result); + code.add(result, 31); + ctx.reg_alloc.DefineValue(inst, result); } } @@ -1592,14 +1592,14 @@ void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { } else { const Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64(); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); - const Xbyak::Reg64 temp = ctx.reg_alloc.ScratchGpr().cvt64(); // The result of a bsr of zero is undefined, but zf is set after it. code.bsr(result, source); - code.mov(temp.cvt32(), 64); - code.xor_(result.cvt32(), 63); - code.test(source, source); - code.cmove(result.cvt32(), temp.cvt32()); + code.mov(source.cvt32(), 0xFFFFFFFF); + code.cmovz(result.cvt32(), source.cvt32()); + code.neg(result.cvt32()); + code.add(result.cvt32(), 63); + ctx.reg_alloc.DefineValue(inst, result); } } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 47e51acb03..63b9659618 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -712,12 +712,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); #ifdef _WIN32 code.lea(rsp, ptr[rsp - (16 + ABI_SHADOW_SPACE)]); - code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); code.CallFunction(fallback_fn); code.add(rsp, 16 + ABI_SHADOW_SPACE); #else - code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(fallback_fn); #endif code.movq(result, code.ABI_RETURN); @@ -821,12 +821,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); #ifdef _WIN32 ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); - code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); code.CallFunction(fallback_fn); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); #else - code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(fallback_fn); #endif } @@ -945,7 +945,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipEstimate); } @@ -968,7 +968,7 @@ static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipExponent); } @@ -1026,7 +1026,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.movq(code.ABI_PARAM1, operand1); code.movq(code.ABI_PARAM2, operand2); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipStepFused); code.movq(result, code.ABI_RETURN); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); @@ -1055,7 +1055,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* ctx.reg_alloc.HostCall(inst, args[0], args[1]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipStepFused); } @@ -1119,7 +1119,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); - code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.CallFunction(lut.at(std::make_tuple(fsize, rounding_mode, exact))); } @@ -1206,7 +1206,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i } // a > 0 && a < 0x00800000; - code.sub(tmp, 1); + code.dec(tmp); code.cmp(tmp, 0x007FFFFF); code.jb(fallback, code.T_NEAR); //within -127,128 needs_fallback = true; @@ -1284,7 +1284,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); code.movq(code.ABI_PARAM1, operand); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtEstimate); code.movq(result, rax); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); @@ -1298,7 +1298,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtEstimate); } } @@ -1368,7 +1368,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.movq(code.ABI_PARAM1, operand1); code.movq(code.ABI_PARAM2, operand2); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtStepFused); code.movq(result, code.ABI_RETURN); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); @@ -1398,7 +1398,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* ctx.reg_alloc.HostCall(inst, args[0], args[1]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtStepFused); } @@ -1511,7 +1511,7 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1535,7 +1535,7 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1556,7 +1556,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } } @@ -1581,7 +1581,7 @@ void EmitX64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1595,7 +1595,7 @@ void EmitX64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1616,7 +1616,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } } @@ -1757,7 +1757,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { mp::cartesian_product{}); ctx.reg_alloc.HostCall(inst, args[0]); - code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.CallFunction(lut.at(std::make_tuple(fbits, rounding_mode))); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 34f77b0446..272b896ae3 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -28,24 +28,27 @@ std::optional AxxEmitX64::ShouldFastmem(AxxEmitC FakeCall AxxEmitX64::FastmemCallback(u64 rip_) { const auto iter = fastmem_patch_info.find(rip_); - if (iter != fastmem_patch_info.end()) { - FakeCall result{ - .call_rip = iter->second.callback, - .ret_rip = iter->second.resume_rip, - }; - if (iter->second.recompile) { - const auto marker = iter->second.marker; - do_not_fastmem.insert(marker); - InvalidateBasicBlocks({std::get<0>(marker)}); - } - return result; - } else { + + if (iter == fastmem_patch_info.end()) { fmt::print("dynarmic: Segfault happened within JITted code at rip = {:016x}\n", rip_); fmt::print("Segfault wasn't at a fastmem patch location!\n"); fmt::print("Now dumping code.......\n\n"); Common::DumpDisassembledX64((void*)(rip_ & ~u64(0xFFF)), 0x1000); ASSERT_FALSE("iter != fastmem_patch_info.end()"); } + + FakeCall result{ + .call_rip = iter->second.callback, + .ret_rip = iter->second.resume_rip, + }; + + if (iter->second.recompile) { + const auto marker = iter->second.marker; + do_not_fastmem.insert(marker); + InvalidateBasicBlocks({std::get<0>(marker)}); + } + + return result; } template @@ -92,7 +95,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { if (fastmem_marker) { // Use fastmem - bool require_abort_handling = false; + bool require_abort_handling; const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling); const auto location = EmitReadMemoryMov(code, value_idx, src_ptr, ordered); @@ -179,7 +182,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { if (fastmem_marker) { // Use fastmem - bool require_abort_handling = false; + bool require_abort_handling; const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling); const auto location = EmitWriteMemoryMov(code, dest_ptr, value_idx, ordered); @@ -227,7 +230,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, {}, args[1]); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); if (ordered) { code.mfence(); @@ -245,7 +248,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); @@ -285,9 +288,9 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { Xbyak::Label end; code.mov(code.ABI_RETURN, u32(1)); - code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); code.je(end); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); if constexpr (bitsize != 128) { using T = mcl::unsigned_integer_of_size; @@ -355,7 +358,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in EmitExclusiveLock(code, conf, tmp, tmp2.cvt32()); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.mov(qword[tmp], vaddr); @@ -439,14 +442,14 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.mov(status, u32(1)); - code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); code.je(*end, code.T_NEAR); code.cmp(qword[tmp], vaddr); code.jne(*end, code.T_NEAR); EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); if constexpr (bitsize == 128) { @@ -501,6 +504,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i } code.setnz(status.cvt8()); + ctx.deferred_emits.emplace_back([=, this] { code.L(*abort); code.call(wrapped_fn); @@ -514,21 +518,24 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i conf.recompile_on_exclusive_fastmem_failure, }); - code.xor_(status.cvt32(), status.cvt32()); //dep-break - code.test(code.al, code.al); + code.cmp(al, 0); code.setz(status.cvt8()); + code.movzx(status.cvt32(), status.cvt8()); code.jmp(*end, code.T_NEAR); }); } else { code.call(wrapped_fn); - code.xor_(status.cvt32(), status.cvt32()); //dep-break - code.test(code.al, code.al); + code.cmp(al, 0); code.setz(status.cvt8()); + code.movzx(status.cvt32(), status.cvt8()); } code.L(*end); + EmitExclusiveUnlock(code, conf, tmp, eax); + ctx.reg_alloc.DefineValue(inst, status); + EmitCheckMemoryAbort(ctx, inst); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index 75a47c6a80..b25b33101c 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -46,25 +46,26 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi code.test(vaddr, align_mask); - if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { - const u32 page_align_mask = static_cast(page_size - 1) & ~align_mask; - - SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel(); - - code.jnz(*detect_boundary, code.T_NEAR); - code.L(*resume); - - ctx.deferred_emits.emplace_back([=, &code] { - code.L(*detect_boundary); - code.mov(tmp, vaddr); - code.and_(tmp, page_align_mask); - code.cmp(tmp, page_align_mask); - code.jne(*resume, code.T_NEAR); - // NOTE: We expect to fallthrough into abort code here. - }); - } else { + if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { code.jnz(abort, code.T_NEAR); + return; } + + const u32 page_align_mask = static_cast(page_size - 1) & ~align_mask; + + SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel(); + + code.jnz(*detect_boundary, code.T_NEAR); + code.L(*resume); + + ctx.deferred_emits.emplace_back([=, &code] { + code.L(*detect_boundary); + code.mov(tmp, vaddr); + code.and_(tmp, page_align_mask); + code.cmp(tmp, page_align_mask); + code.jne(*resume, code.T_NEAR); + // NOTE: We expect to fallthrough into abort code here. + }); } template @@ -201,7 +202,7 @@ template const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::RegExp& addr, bool ordered) { if (ordered) { if constexpr (bitsize != 128) { - code.xor_(Xbyak::Reg32(value_idx), Xbyak::Reg32(value_idx)); + code.xor_(Xbyak::Reg32{value_idx}, Xbyak::Reg32{value_idx}); } else { code.xor_(eax, eax); code.xor_(ebx, ebx); @@ -213,59 +214,59 @@ const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::Reg switch (bitsize) { case 8: code.lock(); - code.xadd(code.byte[addr], Xbyak::Reg32(value_idx).cvt8()); + code.xadd(code.byte[addr], Xbyak::Reg32{value_idx}.cvt8()); break; case 16: code.lock(); - code.xadd(word[addr], Xbyak::Reg64(value_idx).cvt16()); + code.xadd(word[addr], Xbyak::Reg16{value_idx}); break; case 32: code.lock(); - code.xadd(dword[addr], Xbyak::Reg64(value_idx).cvt32()); + code.xadd(dword[addr], Xbyak::Reg32{value_idx}); break; case 64: code.lock(); - code.xadd(qword[addr], Xbyak::Reg64(value_idx)); + code.xadd(qword[addr], Xbyak::Reg64{value_idx}); break; case 128: code.lock(); code.cmpxchg16b(xword[addr]); if (code.HasHostFeature(HostFeature::SSE41)) { - code.movq(Xbyak::Xmm(value_idx), rax); - code.pinsrq(Xbyak::Xmm(value_idx), rdx, 1); + code.movq(Xbyak::Xmm{value_idx}, rax); + code.pinsrq(Xbyak::Xmm{value_idx}, rdx, 1); } else { - code.movq(Xbyak::Xmm(value_idx), rax); + code.movq(Xbyak::Xmm{value_idx}, rax); code.movq(xmm0, rdx); - code.punpcklqdq(Xbyak::Xmm(value_idx), xmm0); + code.punpcklqdq(Xbyak::Xmm{value_idx}, xmm0); } break; default: ASSERT_FALSE("Invalid bitsize"); } return fastmem_location; - } else { - const void* fastmem_location = code.getCurr(); - switch (bitsize) { - case 8: - code.movzx(Xbyak::Reg64(value_idx).cvt32(), code.byte[addr]); - break; - case 16: - code.movzx(Xbyak::Reg64(value_idx).cvt32(), word[addr]); - break; - case 32: - code.mov(Xbyak::Reg64(value_idx).cvt32(), dword[addr]); - break; - case 64: - code.mov(Xbyak::Reg64(value_idx), qword[addr]); - break; - case 128: - code.movups(Xbyak::Xmm(value_idx), xword[addr]); - break; - default: - ASSERT_FALSE("Invalid bitsize"); - } - return fastmem_location; } + + const void* fastmem_location = code.getCurr(); + switch (bitsize) { + case 8: + code.movzx(Xbyak::Reg32{value_idx}, code.byte[addr]); + break; + case 16: + code.movzx(Xbyak::Reg32{value_idx}, word[addr]); + break; + case 32: + code.mov(Xbyak::Reg32{value_idx}, dword[addr]); + break; + case 64: + code.mov(Xbyak::Reg64{value_idx}, qword[addr]); + break; + case 128: + code.movups(Xbyak::Xmm{value_idx}, xword[addr]); + break; + default: + ASSERT_FALSE("Invalid bitsize"); + } + return fastmem_location; } template @@ -275,10 +276,10 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int code.xor_(eax, eax); code.xor_(edx, edx); if (code.HasHostFeature(HostFeature::SSE41)) { - code.movq(rbx, Xbyak::Xmm(value_idx)); - code.pextrq(rcx, Xbyak::Xmm(value_idx), 1); + code.movq(rbx, Xbyak::Xmm{value_idx}); + code.pextrq(rcx, Xbyak::Xmm{value_idx}, 1); } else { - code.movaps(xmm0, Xbyak::Xmm(value_idx)); + code.movaps(xmm0, Xbyak::Xmm{value_idx}); code.movq(rbx, xmm0); code.punpckhqdq(xmm0, xmm0); code.movq(rcx, xmm0); @@ -288,16 +289,16 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int const void* fastmem_location = code.getCurr(); switch (bitsize) { case 8: - code.xchg(code.byte[addr], Xbyak::Reg64(value_idx).cvt8()); + code.xchg(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8()); break; case 16: - code.xchg(word[addr], Xbyak::Reg64(value_idx).cvt16()); + code.xchg(word[addr], Xbyak::Reg16{value_idx}); break; case 32: - code.xchg(dword[addr], Xbyak::Reg64(value_idx).cvt32()); + code.xchg(dword[addr], Xbyak::Reg32{value_idx}); break; case 64: - code.xchg(qword[addr], Xbyak::Reg64(value_idx)); + code.xchg(qword[addr], Xbyak::Reg64{value_idx}); break; case 128: { Xbyak::Label loop; @@ -311,29 +312,29 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int ASSERT_FALSE("Invalid bitsize"); } return fastmem_location; - } else { - const void* fastmem_location = code.getCurr(); - switch (bitsize) { - case 8: - code.mov(code.byte[addr], Xbyak::Reg64(value_idx).cvt8()); - break; - case 16: - code.mov(word[addr], Xbyak::Reg64(value_idx).cvt16()); - break; - case 32: - code.mov(dword[addr], Xbyak::Reg64(value_idx).cvt32()); - break; - case 64: - code.mov(qword[addr], Xbyak::Reg64(value_idx)); - break; - case 128: - code.movups(xword[addr], Xbyak::Xmm(value_idx)); - break; - default: - ASSERT_FALSE("Invalid bitsize"); - } - return fastmem_location; } + + const void* fastmem_location = code.getCurr(); + switch (bitsize) { + case 8: + code.mov(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8()); + break; + case 16: + code.mov(word[addr], Xbyak::Reg16{value_idx}); + break; + case 32: + code.mov(dword[addr], Xbyak::Reg32{value_idx}); + break; + case 64: + code.mov(qword[addr], Xbyak::Reg64{value_idx}); + break; + case 128: + code.movups(xword[addr], Xbyak::Xmm{value_idx}); + break; + default: + ASSERT_FALSE("Invalid bitsize"); + } + return fastmem_location; } template diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp index e795181872..d36a75426a 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp @@ -69,7 +69,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) ctx.reg_alloc.DefineValue(overflow_inst, overflow); } } else { - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); } ctx.reg_alloc.DefineValue(inst, result); @@ -98,7 +98,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(); code.setb(overflow.cvt8()); - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); ctx.reg_alloc.DefineValue(inst, addend); } @@ -226,7 +226,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, code.cmovns(y, tmp); code.sets(tmp.cvt8()); - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); ctx.reg_alloc.DefineValue(inst, y); } @@ -250,7 +250,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, code.cmovns(y.cvt32(), tmp.cvt32()); code.sets(tmp.cvt8()); - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); ctx.reg_alloc.DefineValue(inst, y); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index e1b9e54df8..e9b8866b52 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -25,7 +25,6 @@ #include "dynarmic/backend/x64/constants.h" #include "dynarmic/backend/x64/emit_x64.h" #include "dynarmic/common/math_util.h" -#include "dynarmic/interface/optimization_flags.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -110,7 +109,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } @@ -138,7 +137,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } @@ -165,7 +164,7 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code, ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } @@ -1010,7 +1009,10 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) { code.gf2p8affineqb(result, code.BConst<64>(xword, 0xaaccf0ff'00000000), 8); ctx.reg_alloc.DefineValue(inst, result); - } else if (code.HasHostFeature(HostFeature::SSSE3)) { + return; + } + + if (code.HasHostFeature(HostFeature::SSSE3)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -1032,9 +1034,10 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) { code.paddb(data, tmp1); ctx.reg_alloc.DefineValue(inst, data); - } else { - EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); + return; } + + EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { @@ -1067,7 +1070,10 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { code.vpshufb(result, result, data); ctx.reg_alloc.DefineValue(inst, result); - } else if (code.HasHostFeature(HostFeature::SSSE3)) { + return; + } + + if (code.HasHostFeature(HostFeature::SSSE3)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -1100,33 +1106,24 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { code.pshufb(result, data); ctx.reg_alloc.DefineValue(inst, result); - } else { - EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); + return; } + + EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } void EmitX64::EmitVectorCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512CD)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); code.vplzcntd(data, data); + ctx.reg_alloc.DefineValue(inst, data); - // See https://stackoverflow.com/questions/58823140/count-leading-zero-bits-for-each-element-in-avx2-vector-emulate-mm256-lzcnt-ep/58827596#58827596 - } else if (code.HasHostFeature(HostFeature::AVX2)) { - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(); - code.vmovdqa(temp, data); - code.vpsrld(data, data, 8); - code.vpandn(data, data, temp); - code.vmovdqa(temp, code.Const(xword, 0x0000009E0000009E, 0x0000009E0000009E)); - code.vcvtdq2ps(data, data); - code.vpsrld(data, data, 23); - code.vpsubusw(data, temp, data); - code.vpminsw(data, data, code.Const(xword, 0x0000002000000020, 0x0000002000000020)); - ctx.reg_alloc.DefineValue(inst, data); - } else { - EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); + return; } + + EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) { @@ -3326,7 +3323,7 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { code.paddb(mask, mask); code.paddb(xmm_a, xmm_a); code.pblendvb(result, alternate); - code.sub(counter, 1); + code.dec(counter); code.jnz(loop); ctx.reg_alloc.DefineValue(inst, result); @@ -3370,7 +3367,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst code.paddw(mask, mask); code.paddw(xmm_a, xmm_a); code.pblendvb(result, alternate); - code.sub(counter, 1); + code.dec(counter); code.jnz(loop); ctx.reg_alloc.DefineValue(inst, result); @@ -4261,7 +4258,7 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo UNREACHABLE(); } - code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, data); } @@ -4396,7 +4393,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(mask, xmm0); - code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], mask); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask); if (code.HasHostFeature(HostFeature::SSE41)) { code.pblendvb(result, tmp); @@ -4482,7 +4479,7 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(bit, upper_tmp); - code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, result); } @@ -4533,7 +4530,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); code.vpxor(result, result, mask); code.pmovmskb(bit, mask); - code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.Release(mask); ctx.reg_alloc.Release(bit); @@ -4589,7 +4586,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.pcmpeqd(tmp, result); code.pxor(result, tmp); code.pmovmskb(bit, tmp); - code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, result); } @@ -4623,7 +4620,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(bit, y); - code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, x); } @@ -4676,7 +4673,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, code.pxor(x, y); code.pmovmskb(bit, y); } - code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, x); } @@ -4715,7 +4712,7 @@ static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, Block code.pcmpeqd(reconstructed, src); code.movmskps(bit, reconstructed); code.xor_(bit, 0b1111); - code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, dest); } @@ -4770,7 +4767,7 @@ static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, Blo code.pcmpeqd(reconstructed, src); code.movmskps(bit, reconstructed); code.xor_(bit, 0b1111); - code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, dest); } @@ -4873,7 +4870,7 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo // Check if any elements matched the mask prior to performing saturation. If so, set the Q bit. const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(bit, tmp); - code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, zero); } @@ -5644,7 +5641,6 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, break; } case 32: - // See https://stackoverflow.com/questions/3380785/compute-the-absolute-difference-between-unsigned-integers-using-sse/3527267#3527267 if (code.HasHostFeature(HostFeature::SSE41)) { const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); @@ -5656,33 +5652,16 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, } else { const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - if (ctx.HasOptimization(OptimizationFlag::CodeSpeed)) { - // About 45 bytes - const Xbyak::Xmm temp_x = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm temp_y = ctx.reg_alloc.ScratchXmm(); - code.pcmpeqd(temp, temp); - code.pslld(temp, 31); - code.movdqa(temp_x, x); - code.movdqa(temp_y, y); - code.paddd(temp_x, x); - code.paddd(temp_y, y); - code.pcmpgtd(temp_y, temp_x); - code.psubd(x, y); - code.pandn(temp, temp_y); - code.pxor(x, y); - code.psubd(x, y); - } else { - // Smaller code size - about 36 bytes - code.movdqa(temp, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); - code.pxor(x, temp); - code.pxor(y, temp); - code.movdqa(temp, x); - code.psubd(temp, y); - code.pcmpgtd(y, x); - code.psrld(y, 1); - code.pxor(temp, y); - code.psubd(temp, y); - } + + code.movdqa(temp, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); + code.pxor(x, temp); + code.pxor(y, temp); + code.movdqa(temp, x); + code.psubd(temp, y); + code.pcmpgtd(y, x); + code.psrld(y, 1); + code.pxor(temp, y); + code.psubd(temp, y); } break; } @@ -5748,7 +5727,10 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { code.vpmulld(result, x, y); ctx.reg_alloc.DefineValue(lower_inst, result); - } else if (code.HasHostFeature(HostFeature::AVX)) { + return; + } + + if (code.HasHostFeature(HostFeature::AVX)) { const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); @@ -5767,33 +5749,39 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { code.shufps(result, x, 0b11011101); ctx.reg_alloc.DefineValue(upper_inst, result); - } else { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm upper_result = upper_inst ? ctx.reg_alloc.ScratchXmm() : Xbyak::Xmm{-1}; - const Xbyak::Xmm lower_result = lower_inst ? ctx.reg_alloc.ScratchXmm() : Xbyak::Xmm{-1}; + return; + } - // calculate unsigned multiply - code.movdqa(tmp, x); - code.pmuludq(tmp, y); - code.psrlq(x, 32); - code.psrlq(y, 32); - code.pmuludq(x, y); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(); - // put everything into place - only if needed - if (upper_inst) code.pcmpeqw(upper_result, upper_result); - if (lower_inst) code.pcmpeqw(lower_result, lower_result); - if (upper_inst) code.psllq(upper_result, 32); - if (lower_inst) code.psrlq(lower_result, 32); - if (upper_inst) code.pand(upper_result, x); - if (lower_inst) code.pand(lower_result, tmp); - if (upper_inst) code.psrlq(tmp, 32); - if (lower_inst) code.psllq(x, 32); - if (upper_inst) code.por(upper_result, tmp); - if (lower_inst) code.por(lower_result, x); - if (upper_inst) ctx.reg_alloc.DefineValue(upper_inst, upper_result); - if (lower_inst) ctx.reg_alloc.DefineValue(lower_inst, lower_result); + // calculate unsigned multiply + code.movdqa(tmp, x); + code.pmuludq(tmp, y); + code.psrlq(x, 32); + code.psrlq(y, 32); + code.pmuludq(x, y); + + // put everything into place + code.pcmpeqw(upper_result, upper_result); + code.pcmpeqw(lower_result, lower_result); + code.psllq(upper_result, 32); + code.psrlq(lower_result, 32); + code.pand(upper_result, x); + code.pand(lower_result, tmp); + code.psrlq(tmp, 32); + code.psllq(x, 32); + code.por(upper_result, tmp); + code.por(lower_result, x); + + if (upper_inst) { + ctx.reg_alloc.DefineValue(upper_inst, upper_result); + } + if (lower_inst) { + ctx.reg_alloc.DefineValue(lower_inst, lower_result); } } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index c8f0d9575c..b24120c346 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -450,7 +450,7 @@ void EmitTwoOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.mov(code.ABI_PARAM3.cvt32(), fpcr); - code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.movaps(xword[code.ABI_PARAM2], arg1); code.CallFunction(fn); @@ -487,7 +487,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.mov(code.ABI_PARAM4.cvt32(), fpcr); - code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax); #else constexpr u32 stack_space = 3 * 16; @@ -496,7 +496,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.mov(code.ABI_PARAM4.cvt32(), fpcr); - code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); #endif code.movaps(xword[code.ABI_PARAM2], arg1); @@ -545,7 +545,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 4 * 16]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR(fpcr_controlled).Value()); - code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 8], rax); #else constexpr u32 stack_space = 4 * 16; @@ -555,7 +555,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR(fpcr_controlled).Value()); - code.lea(code.ABI_PARAM6, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM6, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); #endif if constexpr (load_previous_result == LoadPreviousResult::Yes) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp index 580a32dec8..88bd41a47e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp @@ -62,7 +62,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.test(overflow.cvt32(), overflow.cvt32()); } code.setnz(overflow); - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); ctx.reg_alloc.DefineValue(inst, result); } @@ -104,7 +104,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.ktestb(k1, k1); code.setnz(overflow); - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); ctx.reg_alloc.DefineValue(inst, result); return; @@ -160,7 +160,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.test(overflow.cvt32(), overflow.cvt32()); } code.setnz(overflow); - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); if (code.HasHostFeature(HostFeature::SSE41)) { FCODE(blendvp)(result, tmp); @@ -204,7 +204,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.ktestb(k1, k1); code.setnz(overflow); - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); ctx.reg_alloc.DefineValue(inst, result); return; @@ -263,7 +263,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* } code.setnz(overflow); - code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); if constexpr (op == Op::Add) { code.por(result, tmp); diff --git a/src/dynarmic/src/dynarmic/backend/x64/hostloc.h b/src/dynarmic/src/dynarmic/backend/x64/hostloc.h index c96a18628a..1b27edbdee 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/hostloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/hostloc.h @@ -78,16 +78,16 @@ inline bool HostLocIsFlag(HostLoc reg) { inline HostLoc HostLocRegIdx(int idx) { ASSERT(idx >= 0 && idx <= 15); - return HostLoc(idx); + return static_cast(idx); } inline HostLoc HostLocXmmIdx(int idx) { ASSERT(idx >= 0 && idx <= 15); - return HostLoc(size_t(HostLoc::XMM0) + idx); + return static_cast(static_cast(HostLoc::XMM0) + idx); } inline HostLoc HostLocSpill(size_t i) { - return HostLoc(size_t(HostLoc::FirstSpill) + i); + return static_cast(static_cast(HostLoc::FirstSpill) + i); } inline bool HostLocIsSpill(HostLoc reg) { @@ -109,8 +109,6 @@ inline size_t HostLocBitWidth(HostLoc loc) { using HostLocList = std::initializer_list; // RSP is preserved for function calls -// R13 contains fastmem pointer if any -// R14 contains the pagetable pointer // R15 contains the JitState pointer const HostLocList any_gpr = { HostLoc::RAX, @@ -127,16 +125,12 @@ const HostLocList any_gpr = { HostLoc::R12, HostLoc::R13, HostLoc::R14, - //HostLoc::R15, }; // XMM0 is reserved for use by instructions that implicitly use it as an argument -// XMM1 is used by 128 mem accessors -// XMM2 is also used by that (and other stuff) -// Basically dont use either XMM0, XMM1 or XMM2 ever; they're left for the regsel const HostLocList any_xmm = { - //HostLoc::XMM1, - //HostLoc::XMM2, + HostLoc::XMM1, + HostLoc::XMM2, HostLoc::XMM3, HostLoc::XMM4, HostLoc::XMM5, diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index c42a0c43b5..916c74193c 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -429,22 +429,13 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector= HostLoc::R13 && *it <= HostLoc::R15) { - // skip, do not touch - // Intel recommends to reuse registers as soon as they're overwritable (DO NOT SPILL) - } else if (loc_info.IsEmpty()) { - it_empty_candidate = it; - break; - // No empty registers for some reason (very evil) - just do normal LRU } else { if (loc_info.lru_counter < min_lru_counter) { + if (loc_info.IsEmpty()) + it_empty_candidate = it; // Otherwise a "quasi"-LRU min_lru_counter = loc_info.lru_counter; if (*it >= HostLoc::R8 && *it <= HostLoc::R15) { @@ -455,6 +446,9 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector= size_t(HostLoc::XMM3); --i) - if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) - return loc; -#else - for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i) - if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) - return loc; -#endif - } - // Otherwise go to stack spilling - for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i) - if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) +HostLoc RegAlloc::FindFreeSpill() const noexcept { + for (size_t i = static_cast(HostLoc::FirstSpill); i < hostloc_info.size(); i++) { + const auto loc = static_cast(i); + if (LocInfo(loc).IsEmpty()) { return loc; + } + } + ASSERT_FALSE("All spill locations are full"); -}; +} + +inline static Xbyak::RegExp SpillToOpArg_Helper1(HostLoc loc, size_t reserved_stack_space) noexcept { + ASSERT(HostLocIsSpill(loc)); + size_t i = static_cast(loc) - static_cast(HostLoc::FirstSpill); + ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations"); + return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]); +} void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept { - auto const spill_to_op_arg_helper = [&](HostLoc loc, size_t reserved_stack_space) { - ASSERT(HostLocIsSpill(loc)); - size_t i = size_t(loc) - size_t(HostLoc::FirstSpill); - ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations"); - return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]); - }; - auto const spill_xmm_to_op = [&](const HostLoc loc) { - return Xbyak::util::xword[spill_to_op_arg_helper(loc, reserved_stack_space)]; - }; if (HostLocIsXMM(to) && HostLocIsXMM(from)) { MAYBE_AVX(movaps, HostLocToXmm(to), HostLocToXmm(from)); } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) { @@ -624,7 +605,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc MAYBE_AVX(movd, HostLocToReg64(to).cvt32(), HostLocToXmm(from)); } } else if (HostLocIsXMM(to) && HostLocIsSpill(from)) { - const Xbyak::Address spill_addr = spill_xmm_to_op(from); + const Xbyak::Address spill_addr = SpillToOpArg(from); ASSERT(spill_addr.getBit() >= bit_width); switch (bit_width) { case 128: @@ -642,7 +623,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc UNREACHABLE(); } } else if (HostLocIsSpill(to) && HostLocIsXMM(from)) { - const Xbyak::Address spill_addr = spill_xmm_to_op(to); + const Xbyak::Address spill_addr = SpillToOpArg(to); ASSERT(spill_addr.getBit() >= bit_width); switch (bit_width) { case 128: @@ -662,16 +643,16 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc } else if (HostLocIsGPR(to) && HostLocIsSpill(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]); + code->mov(HostLocToReg64(to), Xbyak::util::qword[SpillToOpArg_Helper1(from, reserved_stack_space)]); } else { - code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]); + code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[SpillToOpArg_Helper1(from, reserved_stack_space)]); } } else if (HostLocIsSpill(to) && HostLocIsGPR(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from)); + code->mov(Xbyak::util::qword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from)); } else { - code->mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32()); + code->mov(Xbyak::util::dword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from).cvt32()); } } else { ASSERT_FALSE("Invalid RegAlloc::EmitMove"); @@ -688,4 +669,8 @@ void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept { } } +Xbyak::Address RegAlloc::SpillToOpArg(const HostLoc loc) noexcept { + return Xbyak::util::xword[SpillToOpArg_Helper1(loc, reserved_stack_space)]; +} + } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index e673f40263..12b6010aa8 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -22,7 +22,6 @@ #include "dynarmic/backend/x64/hostloc.h" #include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/oparg.h" -#include "dynarmic/backend/x64/abi.h" #include "dynarmic/ir/cond.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/value.h" @@ -243,19 +242,20 @@ private: void MoveOutOfTheWay(HostLoc reg) noexcept; void SpillRegister(HostLoc loc) noexcept; - HostLoc FindFreeSpill(bool is_xmm) const noexcept; + HostLoc FindFreeSpill() const noexcept; inline HostLocInfo& LocInfo(const HostLoc loc) noexcept { - ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR); + ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); return hostloc_info[static_cast(loc)]; } inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept { - ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR); + ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); return hostloc_info[static_cast(loc)]; } void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept; void EmitExchange(const HostLoc a, const HostLoc b) noexcept; + Xbyak::Address SpillToOpArg(const HostLoc loc) noexcept; //data alignas(64) boost::container::static_vector gpr_order; diff --git a/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp b/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp index b3a02005eb..3378786c46 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp @@ -22,7 +22,7 @@ void PrintVerboseDebuggingOutputLine(RegisterData& reg_data, HostLoc hostloc, si } else if (HostLocIsXMM(hostloc)) { return reg_data.xmms[HostLocToXmm(hostloc).getIdx()]; } else if (HostLocIsSpill(hostloc)) { - return (*reg_data.spill)[size_t(hostloc) - size_t(HostLoc::FirstSpill)]; + return (*reg_data.spill)[static_cast(hostloc) - static_cast(HostLoc::FirstSpill)]; } else { fmt::print("invalid hostloc! "); return {0, 0}; diff --git a/src/dynarmic/src/dynarmic/common/crypto/crc32.cpp b/src/dynarmic/src/dynarmic/common/crypto/crc32.cpp index 6b9c129a44..c2821fa2c3 100644 --- a/src/dynarmic/src/dynarmic/common/crypto/crc32.cpp +++ b/src/dynarmic/src/dynarmic/common/crypto/crc32.cpp @@ -152,9 +152,11 @@ constexpr CRC32Table iso_table{ static u32 ComputeCRC32(const CRC32Table& table, u32 crc, const u64 value, int length) { const auto* data = reinterpret_cast(&value); + while (length-- > 0) { crc = (crc >> 8) ^ table[(crc ^ (*data++)) & 0xFF]; } + return crc; } diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp index f0e44dc62e..fdea94f4be 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp @@ -16,14 +16,15 @@ namespace Dynarmic { void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { Xbyak::Label start, loop; - code.jmp(start, code.T_NEAR); + code.jmp(start); code.L(loop); code.pause(); code.L(start); code.mov(tmp, 1); - /*code.lock();*/ code.xchg(code.dword[ptr], tmp); + code.lock(); + code.xchg(code.dword[ptr], tmp); code.test(tmp, tmp); - code.jnz(loop, code.T_NEAR); + code.jnz(loop); } void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp index 6a25eb97c6..7ef8b7e890 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp @@ -109,11 +109,13 @@ bool TranslatorVisitor::arm_LDR_imm(Cond cond, bool P, bool U, bool W, Reg n, Re if (t == Reg::PC) { ir.LoadWritePC(data); + if (!P && W && n == Reg::R13) { ir.SetTerm(IR::Term::PopRSBHint{}); } else { ir.SetTerm(IR::Term::FastDispatchHint{}); } + return false; } @@ -143,11 +145,7 @@ bool TranslatorVisitor::arm_LDR_reg(Cond cond, bool P, bool U, bool W, Reg n, Re if (t == Reg::PC) { ir.LoadWritePC(data); - if (!P && W && n == Reg::R13) { - ir.SetTerm(IR::Term::PopRSBHint{}); - } else { - ir.SetTerm(IR::Term::FastDispatchHint{}); - } + ir.SetTerm(IR::Term::FastDispatchHint{}); return false; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp index faf0686231..01cc1390c7 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp @@ -21,7 +21,6 @@ bool TranslatorVisitor::B_uncond(Imm<26> imm26) { const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend(); const u64 target = ir.PC() + offset; - //ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); return false; } diff --git a/src/dynarmic/src/dynarmic/interface/optimization_flags.h b/src/dynarmic/src/dynarmic/interface/optimization_flags.h index 743d902767..2f65f0bfa4 100644 --- a/src/dynarmic/src/dynarmic/interface/optimization_flags.h +++ b/src/dynarmic/src/dynarmic/interface/optimization_flags.h @@ -32,8 +32,6 @@ enum class OptimizationFlag : std::uint32_t { ConstProp = 0x00000010, /// This is enables miscellaneous safe IR optimizations. MiscIROpt = 0x00000020, - /// Optimize for code speed rather than for code size (this serves well for tight loops) - CodeSpeed = 0x00000040, /// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations. /// This unfuses fused instructions to improve performance on host CPUs without FMA support. diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index b00ab3cb20..12765e26a8 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -86,9 +86,11 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept { } std::string DumpBlock(const IR::Block& block) noexcept { - std::string ret = fmt::format("Block: location={}-{}\n", block.Location(), block.EndLocation()) - + fmt::format("cycles={}", block.CycleCount()) - + fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition())); + std::string ret; + + ret += fmt::format("Block: location={}\n", block.Location()); + ret += fmt::format("cycles={}", block.CycleCount()); + ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition())); if (block.GetCondition() != Cond::AL) { ret += fmt::format(", cond_fail={}", block.ConditionFailedLocation()); } @@ -114,8 +116,6 @@ std::string DumpBlock(const IR::Block& block) noexcept { return fmt::format("#{:#x}", arg.GetU32()); case Type::U64: return fmt::format("#{:#x}", arg.GetU64()); - case Type::U128: - return fmt::format("#"); case Type::A32Reg: return A32::RegToString(arg.GetA32RegRef()); case Type::A32ExtReg: @@ -124,18 +124,8 @@ std::string DumpBlock(const IR::Block& block) noexcept { return A64::RegToString(arg.GetA64RegRef()); case Type::A64Vec: return A64::VecToString(arg.GetA64VecRef()); - case Type::CoprocInfo: - return fmt::format("#"); - case Type::NZCVFlags: - return fmt::format("#"); - case Type::Cond: - return fmt::format("#", A32::CondToString(arg.GetCond())); - case Type::Table: - return fmt::format("#"); - case Type::AccType: - return fmt::format("#", u32(arg.GetAccType())); default: - return fmt::format("", arg.GetType()); + return ""; } }; diff --git a/src/dynarmic/src/dynarmic/ir/microinstruction.h b/src/dynarmic/src/dynarmic/ir/microinstruction.h index 6651aab7c5..bc5a355793 100644 --- a/src/dynarmic/src/dynarmic/ir/microinstruction.h +++ b/src/dynarmic/src/dynarmic/ir/microinstruction.h @@ -19,7 +19,7 @@ namespace Dynarmic::IR { enum class Opcode; -enum class Type : u16; +enum class Type; constexpr size_t max_arg_count = 4; diff --git a/src/dynarmic/src/dynarmic/ir/opcodes.cpp b/src/dynarmic/src/dynarmic/ir/opcodes.cpp index e2059f1ef2..e7e73b7032 100644 --- a/src/dynarmic/src/dynarmic/ir/opcodes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opcodes.cpp @@ -16,6 +16,12 @@ namespace Dynarmic::IR { namespace OpcodeInfo { +struct Meta { + std::vector arg_types; + const char* name; + Type type; +}; + constexpr Type Void = Type::Void; constexpr Type A32Reg = Type::A32Reg; constexpr Type A32ExtReg = Type::A32ExtReg; @@ -34,22 +40,10 @@ constexpr Type Cond = Type::Cond; constexpr Type Table = Type::Table; constexpr Type AccType = Type::AccType; -struct Meta { - std::array arg_types; - Type type; - uint8_t count; -}; - -// Evil macro magic for Intel C++ compiler -// Helper macro to force expanding __VA_ARGS__ to satisfy MSVC compiler. -#define PP_EXPAND(x) x -#define PP_NARGS(...) PP_EXPAND(PP_ARG_N(__VA_ARGS__, 5, 4, 3, 2, 1, 0)) -#define PP_ARG_N(_1, _2, _3, _4, _5, N, ...) N - -alignas(64) static const Meta opcode_info[] = { -#define OPCODE(name, type, ...) Meta{{__VA_ARGS__}, type, PP_EXPAND(PP_NARGS(__VA_ARGS__))}, -#define A32OPC(name, type, ...) Meta{{__VA_ARGS__}, type, PP_EXPAND(PP_NARGS(__VA_ARGS__))}, -#define A64OPC(name, type, ...) Meta{{__VA_ARGS__}, type, PP_EXPAND(PP_NARGS(__VA_ARGS__))}, +alignas(64) static const std::array opcode_info{ +#define OPCODE(name, type, ...) Meta{{__VA_ARGS__}, #name, type}, +#define A32OPC(name, type, ...) Meta{{__VA_ARGS__}, #name, type}, +#define A64OPC(name, type, ...) Meta{{__VA_ARGS__}, #name, type}, #include "./opcodes.inc" #undef OPCODE #undef A32OPC @@ -60,31 +54,22 @@ alignas(64) static const Meta opcode_info[] = { /// @brief Get return type of an opcode Type GetTypeOf(Opcode op) noexcept { - return OpcodeInfo::opcode_info[size_t(op)].type; + return OpcodeInfo::opcode_info.at(size_t(op)).type; } /// @brief Get the number of arguments an opcode accepts size_t GetNumArgsOf(Opcode op) noexcept { - return OpcodeInfo::opcode_info[size_t(op)].count; + return OpcodeInfo::opcode_info.at(size_t(op)).arg_types.size(); } /// @brief Get the required type of an argument of an opcode Type GetArgTypeOf(Opcode op, size_t arg_index) noexcept { - return OpcodeInfo::opcode_info[size_t(op)].arg_types[arg_index]; + return OpcodeInfo::opcode_info.at(size_t(op)).arg_types.at(arg_index); } /// @brief Get the name of an opcode. -std::string_view GetNameOf(Opcode op) noexcept { - static const std::string_view opcode_names[] = { -#define OPCODE(name, type, ...) #name, -#define A32OPC(name, type, ...) #name, -#define A64OPC(name, type, ...) #name, -#include "./opcodes.inc" -#undef OPCODE -#undef A32OPC -#undef A64OPC - }; - return opcode_names[size_t(op)]; +std::string GetNameOf(Opcode op) noexcept { + return OpcodeInfo::opcode_info.at(size_t(op)).name; } } // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opcodes.h b/src/dynarmic/src/dynarmic/ir/opcodes.h index a231365fa7..c11ad549da 100644 --- a/src/dynarmic/src/dynarmic/ir/opcodes.h +++ b/src/dynarmic/src/dynarmic/ir/opcodes.h @@ -15,7 +15,7 @@ namespace Dynarmic::IR { -enum class Type : u16; +enum class Type; /// @brief The Opcodes of our intermediate representation. /// Type signatures for each opcode can be found in opcodes.inc @@ -35,7 +35,7 @@ constexpr size_t OpcodeCount = static_cast(Opcode::NUM_OPCODE); Type GetTypeOf(Opcode op) noexcept; size_t GetNumArgsOf(Opcode op) noexcept; Type GetArgTypeOf(Opcode op, size_t arg_index) noexcept; -std::string_view GetNameOf(Opcode op) noexcept; +std::string GetNameOf(Opcode op) noexcept; /// @brief Determines whether or not this instruction performs an arithmetic shift. constexpr bool IsArithmeticShift(const Opcode op) noexcept { diff --git a/src/dynarmic/src/dynarmic/ir/type.h b/src/dynarmic/src/dynarmic/ir/type.h index e223513367..0aaf9d9414 100644 --- a/src/dynarmic/src/dynarmic/ir/type.h +++ b/src/dynarmic/src/dynarmic/ir/type.h @@ -18,7 +18,7 @@ namespace Dynarmic::IR { /** * The intermediate representation is typed. These are the used by our IR. */ -enum class Type : u16 { +enum class Type { Void = 0, A32Reg = 1 << 0, A32ExtReg = 1 << 1, diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index b3f388a938..9498f86d9b 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -445,9 +445,6 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit, } } - // TODO: Why the difference? QEMU what are you doing??? - jit.Regs()[15] = uni.GetRegisters()[15]; - REQUIRE(uni.GetRegisters() == jit.Regs()); REQUIRE(uni.GetExtRegs() == jit.ExtRegs()); REQUIRE((uni.GetCpsr() & 0xFFFFFDDF) == (jit.Cpsr() & 0xFFFFFDDF)); diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index fefb8a32a2..801b01d555 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -79,6 +79,7 @@ TEST_CASE("A64: CLZ", "[a64]") { jit.SetVector(0, {0xeff0fafbfcfdfeff, 0xff7f3f1f0f070301}); jit.SetVector(1, {0xfffcfffdfffeffff, 0x000F000700030001}); jit.SetVector(2, {0xfffffffdfffffffe, 0x0000000300000001}); + env.ticks_left = env.code_mem.size(); jit.Run(); @@ -1574,67 +1575,15 @@ TEST_CASE("A64: rand1", "[a64]") { REQUIRE(jit.GetRegister(30) == 0x9a5d96aa066e5c39); } -TEST_CASE("A64: rand3", "[a64]") { - constexpr size_t address_width = 12; - constexpr size_t memory_size = 1ull << address_width; // 4K - constexpr size_t page_size = 4 * 1024; - constexpr size_t buffer_size = 2 * page_size; - char buffer[buffer_size]; - - void* buffer_ptr = reinterpret_cast(buffer); - size_t buffer_size_nconst = buffer_size; - char* backing_memory = reinterpret_cast(std::align(page_size, memory_size, buffer_ptr, buffer_size_nconst)); - - A64FastmemTestEnv env{backing_memory}; - Dynarmic::A64::UserConfig config{}; - config.callbacks = &env; - config.fastmem_pointer = reinterpret_cast(backing_memory); - config.fastmem_address_space_bits = address_width; - config.recompile_on_fastmem_failure = false; - config.silently_mirror_fastmem = true; - config.processor_id = 0; - A64::Jit jit{config}; - memset(backing_memory, 0, memory_size); - - // cat rand2.txt | awk '{print "env.code_mem.emplace_back(0x"$2"); // "$0}' > rand2-out.txt - env.MemoryWrite32(100, 0x58028edd); // 0000000000000084 58028edd ldr x29, #20952 - env.MemoryWrite32(104, 0x14000000); // 0000000000000ea4 14000000 b #0 - - jit.SetPC(100); - jit.SetPstate(0xb0000000); - jit.SetFpcr(0x01000000); - env.ticks_left = 110; - //jit.DumpDisassembly(); - jit.Run(); -} - TEST_CASE("A64: rand2", "[a64][.]") { - constexpr size_t address_width = 12; - constexpr size_t memory_size = 1ull << address_width; // 4K - constexpr size_t page_size = 4 * 1024; - constexpr size_t buffer_size = 2 * page_size; - char buffer[buffer_size]; + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + jit_user_config.fastmem_pointer = 0xffffffff00000000; + A64::Jit jit{jit_user_config}; - void* buffer_ptr = reinterpret_cast(buffer); - size_t buffer_size_nconst = buffer_size; - char* backing_memory = reinterpret_cast(std::align(page_size, memory_size, buffer_ptr, buffer_size_nconst)); - - A64FastmemTestEnv env{backing_memory}; - Dynarmic::A64::UserConfig config{}; - config.callbacks = &env; - config.fastmem_pointer = reinterpret_cast(backing_memory); - config.fastmem_address_space_bits = address_width; - config.recompile_on_fastmem_failure = false; - config.silently_mirror_fastmem = true; - config.processor_id = 0; - A64::Jit jit{config}; - memset(backing_memory, 0, memory_size); - - // cat rand2.txt | awk '{print "env.code_mem.emplace_back(0x"$2"); // "$0}' > rand2-out.txt - const std::array code32 = {0xea80f352, 0x6e65e59d, 0x1e20c343, 0x2e3a7192, 0x2e267249, 0xd500405f, 0x6f01f461, 0x6eb684fc, 0x58028edd, 0x0ea5f5b6, 0x0ea069fb, 0x2e769517, 0x5e066063, 0x1e65c3f5, 0x4f00ff52, 0x93401cf6, 0x1e274248, 0x6f67aaf5, 0x5e0c0782, 0x5ef43f3c, 0x2e6595b7, 0x4e20590f, 0xb35aa451, 0x6ee2c5ed, 0x4e32bf46, 0x2ea1ba8f, 0x2f68a85e, 0x9237d90a, 0x5e23dd10, 0x0e762e32, 0x4e31a8cf, 0xce1f3360, 0x781a4ac0, 0x13834066, 0x5fa8101c, 0x6f7c5594, 0x0e71bb68, 0xbc0b3e8f, 0x785dbbda, 0x6f51e794, 0xce50af75, 0x1ad728ec, 0x6ee0da4c, 0xb84efa14, 0x2eb3f613, 0x4e287ade, 0x4eb8c734, 0x2e83f4e8, 0x0e397c80, 0xd08f93f8, 0xce718e48, 0x0f672a0d, 0x2e9edd40, 0x0e14128b, 0x6f5942e6, 0x8b3a0f03, 0x3c5d16b9, 0x7f7e3743, 0x4f4c54e4, 0x0ea0a9e9, 0x9e59dbe6, 0x6e7ddcd3, 0xcec08377, 0x9ba759f8, 0x2ea5046e, 0x0e24c569, 0xb8979780, 0x4e31b98c, 0x4efe4f46, 0x4ea7c762, 0x7e61c9c6, 0x6e30c880, 0x1ada0c25, 0x4e603a2f, 0xda9d7218, 0x0d40c5d9, 0x5e214b05, 0x9ba9efc5, 0x5e61b81e, 0x6e7bc31c, 0x0e61a163, 0x9e5832d2, 0x4e772248, 0x4e3d17c8, 0x92624f60, 0x7a1a02dc, 0x79891f65, 0x6eb45036, 0x0e321ee8, 0x4e2566f0, 0x4ea02b9b, 0x0f9dcb3d, 0x2e21b9f9, 0x0e21a8c3, 0xda1700bd, 0x6ea0fb38, 0x7e607a0b, 0x72845817, 0x7f61068e, 0x0d60e529, 0x4ea0ca5c, 0x1a94b20f, 0x8b87419d, 0x7ea9ed71, 0x2ea1a86e, 0x4d40c4da, 0x5ea0eada, 0x784ba96e, 0x7eb6ee02, 0x3db1c710, 0x0e217836, 0x7ee0bb96, 0x4e786c08, 0x4e976a08, 0x489ffe86, 0x4e79fc9b, 0x0e21cbce, 0x5ef7fc65, 0x4ea1286d, 0xd29c771e, 0x6f5c2839, 0x0ea00a9d, 0x6ee44c06, 0x5ee1d858, 0x5ef2fda6, 0x7eb0c9fe, 0x7f762791, 0x2e212ae6, 0x4e61c9db, 0x13003c57, 0x5ee1b8f8, 0x0f2396d2, 0x6ea0db1e, 0x0e71ba82, 0xab29c807, 0x6ef8f8b3, 0x1f18d4a1, 0x0e261d15, 0x1e290081, 0x1b0c7d12, 0x4e7771c3, 0xf845f1e4, 0x4d40c9e8, 0xce778452, 0x6eb9879d, 0x6e21c93d, 0xcec0829f, 0x52a0969f, 0x1e772b4f, 0x7ee1da88, 0x5f52fe0a, 0x7f3387b1, 0x5e214850, 0x1e65c025, 0x0e2ca294, 0x2e614829, 0x1e640077, 0x9e240048, 0x4ebe9537, 0x9bb7925e, 0x38b669c5, 0x2840d089, 0x6f43e648, 0x2e662d28, 0x4eabaff3, 0x6e734cc7, 0x0e31baee, 0x7ee0d93c, 0x5e282bde, 0x7e21bba4, 0x4e6c75fa, 0x5ac01217, 0x7f4304af, 0x1e7878ed, 0x1ada2196, 0x7ee1aba3, 0x93407f3c, 0x4f6c34eb, 0x6e3447a9, 0x7e7ae545, 0x5e0802bb, 0x6eeae63a, 0x7ee1da62, 0x5e280bb3, 0xf81d4009, 0x1e603b21, 0x5e281a14, 0x6eb0a99b, 0x1e266a25, 0x0d60cafe, 0x0e0b6194, 0x7a4ed2c5, 0x92b762ec, 0x4e6b5749, 0x3c16a6e5, 0x4ea0a92b, 0x0fa58b6a, 0x5f76148c, 0x6e30c95f, 0x1e6540fd, 0x5e28e40f, 0x0d403fd4, 0x7e30da36, 0x7fda9b51, 0x2ea04bde, 0x1e25c3d2, 0x1ee0434c, 0x5e21d8e7, 0x5ee1ba51, 0x5e61aba9, 0x4e2849fb, 0x5ee098ea, 0x4e60f63d, 0x0f280443, 0x5ee0da27, 0x2e78a6ce, 0x78054afc, 0x4e14286b, 0x4e218bd8, 0x2a3d2551, 0x3a04017a, 0x5f4317cd, 0x0e604a37, 0x9a834614, 0x0e2edf4d, 0x7a51a0a0, 0x5f8e9043, 0x6ea06bb2, 0xaa2857dd, 0x7a1903fc, 0x301ba9ba, 0x9ac929cd, 0x4e061ff0, 0x2e38fcfc, 0x0e2f614a, 0x7ee0d8e4, 0x6e73afda, 0x7f4156f7, 0x0e6078bf, 0x4ee1d9ed, 0x93403fbe, 0xce6f8640, 0x4e3855e3, 0x6f76fe23, 0x112466e8, 0x1e358a90, 0x7f45272c, 0x6ea19a9d, 0x8a696350, 0x1e3900f6, 0x5e61c866, 0x0e3fbfd0, 0x5ee09ad0, 0x0e651d27, 0x4dffc35e, 0x2e20c6ce, 0x0fbe118d, 0x1e656a15, 0xd1357365, 0x0e20a847, 0xce4a835c, 0x4e203905, 0x2e60090d, 0x7f4a27bb, 0x1e64c316, 0xce7d86a4, 0x7ebded2d, 0x6e70a97e, 0x4eb9a42b, 0x0e209bef, 0x6f151730, 0x0e7e30f7, 0x4e724509, 0xd503375f, 0xce58b6ae, 0x5e21a9b8, 0xcb2ca538, 0x5ac01131, 0x6ea19a24, 0xeb40c8b3, 0xc8df7d65, 0x78108341, 0x3218ab9b, 0x0f3da7dd, 0x2e003089, 0x4e21cab5, 0x8aa5c924, 0x1a94950c, 0x123e506f, 0x13117e37, 0x1ee6005b, 0x5ac00647, 0x5eec8cd5, 0x7ef0fb3d, 0x9223272a, 0x5ee0cb02, 0x6e66071d, 0x6ea1dbbf, 0x5e61c903, 0x5ac015ea, 0x93db6206, 0x7e62b5e3, 0x6ea0c87b, 0xdac0090e, 0x48df7d90, 0x6e206ba5, 0x9e2503c2, 0x6e25fc89, 0x4d60e2db, 0x1e3e22a0, 0x2eb81c19, 0x7856ea00, 0x5fbfb22d, 0x1e630244, 0x4e202a83, 0x1f50a722, 0x7f7b55d2, 0x0fae89b9, 0x4e781d73, 0xce738c3a, 0x4f15a591, 0x6e21c7e1, 0x586ff77e, 0x8a5d3592, 0x93401c67, 0x5e61cb86, 0xce6bc2c1, 0x6e393f10, 0x9bb70ec3, 0xdac0098c, 0x4da84b95, 0x7f494476, 0x9ace5c11, 0x7e61ca14, 0x4f7a60ef, 0x1ad32b39, 0x0ea3777f, 0x5e61da7f, 0x4f1404e2, 0x4e3244e2, 0x6e1b1ceb, 0x0dee5aac, 0x4e2f9dc4, 0x5ea1b8c3, 0x1e59f863, 0xd500403f, 0x4e3ae7d0, 0x4ef5c6ea, 0x08dffe3b, 0x6e36f4f6, 0x2e764f29, 0x0e726f23, 0x5f42375b, 0x7f71fc40, 0x6e618aad, 0x93403e5b, 0x0e205976, 0x0e7250c4, 0x6eb0abc9, 0x2e2049f0, 0x5f14754d, 0x7f6ce468, 0x6f950bbe, 0x6e31aa47, 0x4eb83396, 0x0dccc952, 0x2ea1ca90, 0xce69c701, 0xb0bed69e, 0x7c5dec39, 0x4e2868a2, 0x0e591b08, 0x5f34e6dd, 0x3a449184, 0x5e3ce6de, 0x4ea149b7, 0x4e7ad29b, 0xba198503, 0x1f683e8f, 0xfa52f2a7, 0x6e30dffc, 0x4e6c3d17, 0x2eae3248, 0xd503349f, 0x1e60002c, 0x0f180680, 0x9e240049, 0x6f75774e, 0xa90d8678, 0x9ad924c4, 0x7eb0f85b, 0x0e205aaf, 0x7ee08899, 0x5f4bffd8, 0x1b0ff5f3, 0x4ee11dcd, 0x2e218948, 0x0dcb2733, 0x4eac107c, 0x4ea04a53, 0x4e287b44, 0x0e60b82a, 0x5ee0ebbc, 0xce454ff1, 0x5e1761e7, 0x5e09202f, 0x0e0c0754, 0x1e72e6b9, 0x7e21da70, 0x0fbdb20c, 0x5efb8c84, 0xd500401f, 0x3a47526e, 0x1e680acf, 0x7f7375fc, 0xf80522da, 0x4ee60c02, 0x4d40c2e7, 0x6f89096b, 0x7ee1bb6e, 0x5e280b4a, 0x1e3120c8, 0x7eb2ef96, 0x4fd012dd, 0x0f3027ef, 0x4e2078a8, 0xd503201f, 0x2e2312d9, 0x6ebf1c6e, 0x5ee1f8df, 0x4e607a46, 0x6e30c877, 0x6c09d2d1, 0x4e61abd8, 0x0e35267e, 0x6ac17728, 0x0e861aa0, 0x6f63fe26, 0x6f157628, 0x6f30a5f9, 0x4d60cc0c, 0x4e21cb59, 0x2e68a3fb, 0x7efae601, 0x6ea0f82c, 0x9b25ec12, 0x1a1a0305, 0x0e043fe1, 0x6e73c0ed, 0x6ea1b8c0, 0x7e20380b, 0x0f0534e8, 0x1f56bc7d, 0xba0c0128, 0x1e672160, 0x6e7b259b, 0x7ee07b5d, 0x9a820443, 0x4e040581, 0x2f1d87e8, 0x1acd2f5b, 0x6e20794f, 0x2e6a3c93, 0xc8dffe13, 0xce5ab1c6, 0x6eea55f6, 0x4ea039b3, 0x0d602fec, 0x2e246e2f, 0x7857be39, 0xb80608fb, 0x1e67c017, 0x9bcf7f63, 0x0f92d857, 0x5e0812f7, 0x1e210172, 0x7e6128e9, 0x7ea94d41, 0x981179e1, 0x1effb018, 0x2e600828, 0x0eb9c6b2, 0x6ee1baae, 0x4ea0db28, 0x2ea1487b, 0x4ea6c7f0, 0x2e2374c7, 0x7e30d8dd, 0xb9991fa7, 0x4e791e3e, 0x889f7c4b, 0x0e6c753c, 0x1e740ad1, 0x1e244324, 0x1ef33010, 0x5ac01102, 0x9bd97fba, 0x6e290143, 0x1e2220d8, 0x4d8d5aee, 0x6f28570b, 0xfa4ab0c1, 0xdac00b14, 0x7ea1a90e, 0x2e3027d8, 0x6f25a733, 0x4e61a96e, 0x4e1a2fcb, 0x0e22fe0a, 0xc8df7cd0, 0x5e280a55, 0x4e012b20, 0x7e70dbf4, 0x520c5a4e, 0x6ea6c57f, 0x0e861af8, 0xd503233f, 0x889ffe3c, 0x5e274ea9, 0x4e21a89a, 0x0e170c02, 0x6efd4c0b, 0xd5033ebf, 0x6e61a92c, 0x2e205b72, 0x789fb828, 0x0e626e94, 0x2ea6724c, 0x9a10028b, 0x2c6c51fc, 0x5a9de6b9, 0x6e6881f3, 0x5ee0ea6b, 0x0faec36e, 0x0e955bca, 0x1acf206d, 0x7f6f571b, 0x4e286930, 0x12b41ceb, 0x1e770b7a, 0x0ea18ac2, 0x5e282aaf, 0xf2b7fa1e, 0x1ac34311, 0x13167d11, 0x4ea63412, 0x6e758038, 0x2f1d85d6, 0x0f275480, 0x0ead6c71, 0x6e204b69, 0x1e6303f4, 0x5e0031ef, 0x13001e40, 0x7a16006f, 0x6e6ae4c0, 0x0f0f242f, 0x6e674f50, 0x4e606b7a, 0x7e6ee684, 0x1e6b5957, 0x7ea1bbab, 0x7ea0b6cb, 0xce4da241, 0x0ea1b953, 0x0eb2af4b, 0x9ac309d0, 0x6e61d8bd, 0x5ea0d890, 0x5f47d1e7, 0xfa5981ca, 0x1e7f7959, 0x6ef24dd8, 0x0e0a41d1, 0x5ee0e898, 0x4e6038e2, 0x13097d65, 0x6f839088, 0x9e290265, 0x0e208824, 0x2e65af79, 0x6f36a561, 0x9ad3204b, 0x0e21482e, 0x1e24431d, 0xd50330bf, 0x0df641aa, 0x6e602a83, 0xce30505f, 0x5e025238, 0xd503201f, 0x4e608880, 0x4de9c38d, 0x5e0f5348, 0x6eb48ca9, 0x50fda31b, 0x2e251eec, 0x7842ba50, 0xd8a1cd86, 0x2ea09862, 0x0ea09983, 0x2ea333b0, 0x0ea6032c, 0x4f94801b, 0x7e3ee57d, 0x38135e4f, 0xd8fdd9dd, 0x5ee0fcde, 0x9e64033d, 0x6e37f547, 0x6e3dd7ef, 0x13003f3d, 0x0e602f9f, 0x4e7ad014, 0x9b3b6857, 0x5ea0cb67, 0x0eb31c9f, 0x4e7c5372, 0x5e61b8c0, 0x0ea19b23, 0x0ee6e1df, 0x6e63a626, 0x2f139405, 0x7eb0f96d, 0x9e588c63, 0x2e714c3a, 0x6e8c941e, 0x0f61b331, 0x6f01f625, 0x4e78d4ea, 0x6f403709, 0x1a0300da, 0xda0102c8, 0x7e61d9fd, 0xb89469bb, 0x0c838780, 0x2e60a590, 0x4dfd29e1, 0x4e150f2e, 0xce2810bc, 0x5f541591, 0x9ee60259, 0x2eb40e56, 0x5e014027, 0x2ef71faf, 0x4e2d452f, 0x5ee0a813, 0x4eb03301, 0x38443acf, 0x6eabd502, 0x0e2ee71e, 0x5a960364, 0xce7ec596, 0x7efbed09, 0x4ef42ea2, 0x0eb30ea5, 0x5ee0d9f8, 0x6f513552, 0xf89eb3fa, 0x7ea2eca6, 0x9b00cc19, 0xf897409e, 0x1e73485f, 0x381afa77, 0x0f169f3b, 0x5ee1aa70, 0x5e1803ee, 0x0dbf5a4c, 0xce78c7a6, 0x9b0b260c, 0x2ef8fa19, 0x6e70aa4b, 0xce45b805, 0x2ea08e86, 0x4ee0bafd, 0x2ea09a1f, 0x4e218900, 0x6e744f13, 0xce518653, 0xf81b7a68, 0xce45ac5e, 0x7e62e416, 0x1a1b02b6, 0x7e21db48, 0x381daaaf, 0x6b2c0987, 0x0e2ec651, 0x4eae8502, 0x9bde7ca0, 0x6f47201f, 0x7e61a8a3, 0x6e60d5db, 0x4e2879de, 0xf81d194e, 0x4f1b8d05, 0x4d0048b2, 0x6e203be9, 0x4e3e7eb1, 0x0e260ef8, 0x2e688518, 0x7e3fec46, 0xdac00843, 0xf85c8917, 0x2e212a0f, 0x0e8196da, 0xd503359f, 0xce4c81f2, 0x6ee19992, 0x6e21ca79, 0x4d40c1d2, 0x4f5816ef, 0x4e34c3ea, 0x4df7c283, 0x7ef7eeb6, 0x18e276ce, 0xab0d21c0, 0xd5032f7f, 0x4ea00dbf, 0x5ac01251, 0xd0121955, 0x7f1495e4, 0x7ef0fa11, 0x5e24dd9c, 0x9add25b5, 0x0eb2bdef, 0x9e1977c7, 0x6f4b26bd, 0x0e200a9c, 0x9b4f7c00, 0x0ea0392e, 0x7e212a2c, 0x0b248b90, 0x1acc27a1, 0x2e701c90, 0x5ee1b870, 0x5e280aba, 0x5ea0780e, 0x1e264246, 0x4e052d04, 0x0e731dc4, 0xce461997, 0x9a9e9413, 0x3d462048, 0x5ea1fac5, 0x2ea0c8c4, 0x9a030280, 0x2ebda4b8, 0x5eef8614, 0x6eadc4e0, 0xbd035a8f, 0x4e606b84, 0x4eb1aba1, 0x4e286928, 0x4e2858cc, 0x9add0ce9, 0x4e070d65, 0x5fd399d5, 0x0f03fde7, 0x6ee90c74, 0x4ef8e31e, 0x381d986a, 0x5ea0ebf4, 0x5ea0d87e, 0x2e76ac9e, 0x6eb36cd4, 0x2e6e1c4c, 0x2e2feebc, 0x1ace4b03, 0x5ee0db12, 0x5ea0e9b1, 0x2e1c32d5, 0x5fa49a09, 0x0e258737, 0x7e21ca8e, 0xce4f9988, 0x5f7f56a6, 0x0e739766, 0x4e28586c, 0x6e619908, 0xd500401f, 0xf88b9252, 0x6e251c8e, 0x9e20015b, 0x7f1486b9, 0x717c339b, 0x1f31ff70, 0x4ea0eb62, 0x9acb0926, 0x489f7d85, 0x4e209b54, 0x2e84cf03, 0x2e65946c, 0x0e7d80cd, 0xc8dffecc, 0xce668bd8, 0x6e2188af, 0xeb4ada34, 0x2b25ec33, 0x0d40e6e7, 0x4eb2c757, 0x4ec82ad0, 0x7e21cb0a, 0x0e21a847, 0x4e0b1ec0, 0x381e6ac0, 0x6e61c8f5, 0x0f10071c, 0x2ee21daa, 0x5e61ab31, 0x6e218892, 0x2e7e7cb5, 0x6f2826aa, 0x7f6b54df, 0x4eaa2620, 0xdac00034, 0x4f6477be, 0x7e6148ea, 0x4eef1f57, 0x78459aeb, 0x2ebc3f10, 0x2e35f4eb, 0x4fbf19ce, 0xd8d0e58e, 0x2e21bbc7, 0x6ee0cab6, 0x9bc57e3f, 0x2f854037, 0x4e92181c, 0x6e6d1f89, 0x0f305545, 0x4ee19a57, 0x0e887bdf, 0x5e1a4185, 0x7ef0c821, 0x2eb6607c, 0x2ea0d9b8, 0x9e0380f4, 0x2ebf1c83, 0x1e62597d, 0x7f6e2548, 0x5ac00205, 0x4e616adb, 0xce638b8c, 0x5e1653cf, 0x2e6069be, 0x0e2ac641, 0x1e33c76f, 0xce44956d, 0x9bb90d31, 0x1e24c20a, 0x7ee038c1, 0x93407e5e, 0x4e280127, 0xc8df7f7d, 0xba42f263, 0x1e6f199c, 0x6e212889, 0x6e92f60e, 0x6ebdc499, 0x8b9acbf8, 0x4d40c581, 0x3a020250, 0x6e6a6716, 0x9248403b, 0x9081ffea, 0x4e603856, 0x9ad1242b, 0x6f270579, 0x1a070349, 0xcec08133, 0xd503305f, 0x5a1a00ca, 0x2e60b8a2, 0x0e5f28fd, 0x0e31a3da, 0x7e61cbc1, 0xd503399f, 0x5f5e54aa, 0x0eb8bdea, 0x4eba8f10, 0x4e2a2e60, 0x2f3da7d6, 0x1e58e297, 0x6e71aa3e, 0x6b86701a, 0xce4fa5e6, 0x4ee7c463, 0x8a79307f, 0x0ebea541, 0x2e218af4, 0x4e774f8a, 0xb9b95dc5, 0x6e61abd5, 0x4dd1e814, 0x4da72098, 0x98307582, 0x3a512101, 0x7ef95497, 0x1ace5535, 0x5a0c0349, 0x4e28581b, 0x6ebf1c02, 0x5ea1da23, 0x1e274314, 0x5e25dd29, 0x6e75f594, 0x6eaf6ed5, 0x4e214abe, 0x4e064172, 0x2e21c8f4, 0xf84c5b08, 0x1e244312, 0x14000000}; - for (size_t i = 0; i < code32.size(); ++i) - env.MemoryWrite32(100 + i, code32[i]); - env.ignore_invalid_insn = true; + env.code_mem = {0xea80f352, 0x6e65e59d, 0x1e20c343, 0x2e3a7192, 0x2e267249, 0xd500405f, 0x6f01f461, 0x6eb684fc, 0x58028edd, 0x0ea5f5b6, 0x0ea069fb, 0x2e769517, 0x5e066063, 0x1e65c3f5, 0x4f00ff52, 0x93401cf6, 0x1e274248, 0x6f67aaf5, 0x5e0c0782, 0x5ef43f3c, 0x2e6595b7, 0x4e20590f, 0xb35aa451, 0x6ee2c5ed, 0x4e32bf46, 0x2ea1ba8f, 0x2f68a85e, 0x9237d90a, 0x5e23dd10, 0x0e762e32, 0x4e31a8cf, 0xce1f3360, 0x781a4ac0, 0x13834066, 0x5fa8101c, 0x6f7c5594, 0x0e71bb68, 0xbc0b3e8f, 0x785dbbda, 0x6f51e794, 0xce50af75, 0x1ad728ec, 0x6ee0da4c, 0xb84efa14, 0x2eb3f613, 0x4e287ade, 0x4eb8c734, 0x2e83f4e8, 0x0e397c80, 0xd08f93f8, 0xce718e48, 0x0f672a0d, 0x2e9edd40, 0x0e14128b, 0x6f5942e6, 0x8b3a0f03, 0x3c5d16b9, 0x7f7e3743, 0x4f4c54e4, 0x0ea0a9e9, 0x9e59dbe6, 0x6e7ddcd3, 0xcec08377, 0x9ba759f8, 0x2ea5046e, 0x0e24c569, 0xb8979780, 0x4e31b98c, 0x4efe4f46, 0x4ea7c762, 0x7e61c9c6, 0x6e30c880, 0x1ada0c25, 0x4e603a2f, 0xda9d7218, 0x0d40c5d9, 0x5e214b05, 0x9ba9efc5, 0x5e61b81e, 0x6e7bc31c, 0x0e61a163, 0x9e5832d2, 0x4e772248, 0x4e3d17c8, 0x92624f60, 0x7a1a02dc, 0x79891f65, 0x6eb45036, 0x0e321ee8, 0x4e2566f0, 0x4ea02b9b, 0x0f9dcb3d, 0x2e21b9f9, 0x0e21a8c3, 0xda1700bd, 0x6ea0fb38, 0x7e607a0b, 0x72845817, 0x7f61068e, 0x0d60e529, 0x4ea0ca5c, 0x1a94b20f, 0x8b87419d, 0x7ea9ed71, 0x2ea1a86e, 0x4d40c4da, 0x5ea0eada, 0x784ba96e, 0x7eb6ee02, 0x3db1c710, 0x0e217836, 0x7ee0bb96, 0x4e786c08, 0x4e976a08, 0x489ffe86, 0x4e79fc9b, 0x0e21cbce, 0x5ef7fc65, 0x4ea1286d, 0xd29c771e, 0x6f5c2839, 0x0ea00a9d, 0x6ee44c06, 0x5ee1d858, 0x5ef2fda6, 0x7eb0c9fe, 0x7f762791, 0x2e212ae6, 0x4e61c9db, 0x13003c57, 0x5ee1b8f8, 0x0f2396d2, 0x6ea0db1e, 0x0e71ba82, 0xab29c807, 0x6ef8f8b3, 0x1f18d4a1, 0x0e261d15, 0x1e290081, 0x1b0c7d12, 0x4e7771c3, 0xf845f1e4, 0x4d40c9e8, 0xce778452, 0x6eb9879d, 0x6e21c93d, 0xcec0829f, 0x52a0969f, 0x1e772b4f, 0x7ee1da88, 0x5f52fe0a, 0x7f3387b1, 0x5e214850, 0x1e65c025, 0x0e2ca294, 0x2e614829, 0x1e640077, 0x9e240048, 0x4ebe9537, 0x9bb7925e, 0x38b669c5, 0x2840d089, 0x6f43e648, 0x2e662d28, 0x4eabaff3, 0x6e734cc7, 0x0e31baee, 0x7ee0d93c, 0x5e282bde, 0x7e21bba4, 0x4e6c75fa, 0x5ac01217, 0x7f4304af, 0x1e7878ed, 0x1ada2196, 0x7ee1aba3, 0x93407f3c, 0x4f6c34eb, 0x6e3447a9, 0x7e7ae545, 0x5e0802bb, 0x6eeae63a, 0x7ee1da62, 0x5e280bb3, 0xf81d4009, 0x1e603b21, 0x5e281a14, 0x6eb0a99b, 0x1e266a25, 0x0d60cafe, 0x0e0b6194, 0x7a4ed2c5, 0x92b762ec, 0x4e6b5749, 0x3c16a6e5, 0x4ea0a92b, 0x0fa58b6a, 0x5f76148c, 0x6e30c95f, 0x1e6540fd, 0x5e28e40f, 0x0d403fd4, 0x7e30da36, 0x7fda9b51, 0x2ea04bde, 0x1e25c3d2, 0x1ee0434c, 0x5e21d8e7, 0x5ee1ba51, 0x5e61aba9, 0x4e2849fb, 0x5ee098ea, 0x4e60f63d, 0x0f280443, 0x5ee0da27, 0x2e78a6ce, 0x78054afc, 0x4e14286b, 0x4e218bd8, 0x2a3d2551, 0x3a04017a, 0x5f4317cd, 0x0e604a37, 0x9a834614, 0x0e2edf4d, 0x7a51a0a0, 0x5f8e9043, 0x6ea06bb2, 0xaa2857dd, 0x7a1903fc, 0x301ba9ba, 0x9ac929cd, 0x4e061ff0, 0x2e38fcfc, 0x0e2f614a, 0x7ee0d8e4, 0x6e73afda, 0x7f4156f7, 0x0e6078bf, 0x4ee1d9ed, 0x93403fbe, 0xce6f8640, 0x4e3855e3, 0x6f76fe23, 0x112466e8, 0x1e358a90, 0x7f45272c, 0x6ea19a9d, 0x8a696350, 0x1e3900f6, 0x5e61c866, 0x0e3fbfd0, 0x5ee09ad0, 0x0e651d27, 0x4dffc35e, 0x2e20c6ce, 0x0fbe118d, 0x1e656a15, 0xd1357365, 0x0e20a847, 0xce4a835c, 0x4e203905, 0x2e60090d, 0x7f4a27bb, 0x1e64c316, 0xce7d86a4, 0x7ebded2d, 0x6e70a97e, 0x4eb9a42b, 0x0e209bef, 0x6f151730, 0x0e7e30f7, 0x4e724509, 0xd503375f, 0xce58b6ae, 0x5e21a9b8, 0xcb2ca538, 0x5ac01131, 0x6ea19a24, 0xeb40c8b3, 0xc8df7d65, 0x78108341, 0x3218ab9b, 0x0f3da7dd, 0x2e003089, 0x4e21cab5, 0x8aa5c924, 0x1a94950c, 0x123e506f, 0x13117e37, 0x1ee6005b, 0x5ac00647, 0x5eec8cd5, 0x7ef0fb3d, 0x9223272a, 0x5ee0cb02, 0x6e66071d, 0x6ea1dbbf, 0x5e61c903, 0x5ac015ea, 0x93db6206, 0x7e62b5e3, 0x6ea0c87b, 0xdac0090e, 0x48df7d90, 0x6e206ba5, 0x9e2503c2, 0x6e25fc89, 0x4d60e2db, 0x1e3e22a0, 0x2eb81c19, 0x7856ea00, 0x5fbfb22d, 0x1e630244, 0x4e202a83, 0x1f50a722, 0x7f7b55d2, 0x0fae89b9, 0x4e781d73, 0xce738c3a, 0x4f15a591, 0x6e21c7e1, 0x586ff77e, 0x8a5d3592, 0x93401c67, 0x5e61cb86, 0xce6bc2c1, 0x6e393f10, 0x9bb70ec3, 0xdac0098c, 0x4da84b95, 0x7f494476, 0x9ace5c11, 0x7e61ca14, 0x4f7a60ef, 0x1ad32b39, 0x0ea3777f, 0x5e61da7f, 0x4f1404e2, 0x4e3244e2, 0x6e1b1ceb, 0x0dee5aac, 0x4e2f9dc4, 0x5ea1b8c3, 0x1e59f863, 0xd500403f, 0x4e3ae7d0, 0x4ef5c6ea, 0x08dffe3b, 0x6e36f4f6, 0x2e764f29, 0x0e726f23, 0x5f42375b, 0x7f71fc40, 0x6e618aad, 0x93403e5b, 0x0e205976, 0x0e7250c4, 0x6eb0abc9, 0x2e2049f0, 0x5f14754d, 0x7f6ce468, 0x6f950bbe, 0x6e31aa47, 0x4eb83396, 0x0dccc952, 0x2ea1ca90, 0xce69c701, 0xb0bed69e, 0x7c5dec39, 0x4e2868a2, 0x0e591b08, 0x5f34e6dd, 0x3a449184, 0x5e3ce6de, 0x4ea149b7, 0x4e7ad29b, 0xba198503, 0x1f683e8f, 0xfa52f2a7, 0x6e30dffc, 0x4e6c3d17, 0x2eae3248, 0xd503349f, 0x1e60002c, 0x0f180680, 0x9e240049, 0x6f75774e, 0xa90d8678, 0x9ad924c4, 0x7eb0f85b, 0x0e205aaf, 0x7ee08899, 0x5f4bffd8, 0x1b0ff5f3, 0x4ee11dcd, 0x2e218948, 0x0dcb2733, 0x4eac107c, 0x4ea04a53, 0x4e287b44, 0x0e60b82a, 0x5ee0ebbc, 0xce454ff1, 0x5e1761e7, 0x5e09202f, 0x0e0c0754, 0x1e72e6b9, 0x7e21da70, 0x0fbdb20c, 0x5efb8c84, 0xd500401f, 0x3a47526e, 0x1e680acf, 0x7f7375fc, 0xf80522da, 0x4ee60c02, 0x4d40c2e7, 0x6f89096b, 0x7ee1bb6e, 0x5e280b4a, 0x1e3120c8, 0x7eb2ef96, 0x4fd012dd, 0x0f3027ef, 0x4e2078a8, 0xd503201f, 0x2e2312d9, 0x6ebf1c6e, 0x5ee1f8df, 0x4e607a46, 0x6e30c877, 0x6c09d2d1, 0x4e61abd8, 0x0e35267e, 0x6ac17728, 0x0e861aa0, 0x6f63fe26, 0x6f157628, 0x6f30a5f9, 0x4d60cc0c, 0x4e21cb59, 0x2e68a3fb, 0x7efae601, 0x6ea0f82c, 0x9b25ec12, 0x1a1a0305, 0x0e043fe1, 0x6e73c0ed, 0x6ea1b8c0, 0x7e20380b, 0x0f0534e8, 0x1f56bc7d, 0xba0c0128, 0x1e672160, 0x6e7b259b, 0x7ee07b5d, 0x9a820443, 0x4e040581, 0x2f1d87e8, 0x1acd2f5b, 0x6e20794f, 0x2e6a3c93, 0xc8dffe13, 0xce5ab1c6, 0x6eea55f6, 0x4ea039b3, 0x0d602fec, 0x2e246e2f, 0x7857be39, 0xb80608fb, 0x1e67c017, 0x9bcf7f63, 0x0f92d857, 0x5e0812f7, 0x1e210172, 0x7e6128e9, 0x7ea94d41, 0x981179e1, 0x1effb018, 0x2e600828, 0x0eb9c6b2, 0x6ee1baae, 0x4ea0db28, 0x2ea1487b, 0x4ea6c7f0, 0x2e2374c7, 0x7e30d8dd, 0xb9991fa7, 0x4e791e3e, 0x889f7c4b, 0x0e6c753c, 0x1e740ad1, 0x1e244324, 0x1ef33010, 0x5ac01102, 0x9bd97fba, 0x6e290143, 0x1e2220d8, 0x4d8d5aee, 0x6f28570b, 0xfa4ab0c1, 0xdac00b14, 0x7ea1a90e, 0x2e3027d8, 0x6f25a733, 0x4e61a96e, 0x4e1a2fcb, 0x0e22fe0a, 0xc8df7cd0, 0x5e280a55, 0x4e012b20, 0x7e70dbf4, 0x520c5a4e, 0x6ea6c57f, 0x0e861af8, 0xd503233f, 0x889ffe3c, 0x5e274ea9, 0x4e21a89a, 0x0e170c02, 0x6efd4c0b, 0xd5033ebf, 0x6e61a92c, 0x2e205b72, 0x789fb828, 0x0e626e94, 0x2ea6724c, 0x9a10028b, 0x2c6c51fc, 0x5a9de6b9, 0x6e6881f3, 0x5ee0ea6b, 0x0faec36e, 0x0e955bca, 0x1acf206d, 0x7f6f571b, 0x4e286930, 0x12b41ceb, 0x1e770b7a, 0x0ea18ac2, 0x5e282aaf, 0xf2b7fa1e, 0x1ac34311, 0x13167d11, 0x4ea63412, 0x6e758038, 0x2f1d85d6, 0x0f275480, 0x0ead6c71, 0x6e204b69, 0x1e6303f4, 0x5e0031ef, 0x13001e40, 0x7a16006f, 0x6e6ae4c0, 0x0f0f242f, 0x6e674f50, 0x4e606b7a, 0x7e6ee684, 0x1e6b5957, 0x7ea1bbab, 0x7ea0b6cb, 0xce4da241, 0x0ea1b953, 0x0eb2af4b, 0x9ac309d0, 0x6e61d8bd, 0x5ea0d890, 0x5f47d1e7, 0xfa5981ca, 0x1e7f7959, 0x6ef24dd8, 0x0e0a41d1, 0x5ee0e898, 0x4e6038e2, 0x13097d65, 0x6f839088, 0x9e290265, 0x0e208824, 0x2e65af79, 0x6f36a561, 0x9ad3204b, 0x0e21482e, 0x1e24431d, 0xd50330bf, 0x0df641aa, 0x6e602a83, 0xce30505f, 0x5e025238, 0xd503201f, 0x4e608880, 0x4de9c38d, 0x5e0f5348, 0x6eb48ca9, 0x50fda31b, 0x2e251eec, 0x7842ba50, 0xd8a1cd86, 0x2ea09862, 0x0ea09983, 0x2ea333b0, 0x0ea6032c, 0x4f94801b, 0x7e3ee57d, 0x38135e4f, 0xd8fdd9dd, 0x5ee0fcde, 0x9e64033d, 0x6e37f547, 0x6e3dd7ef, 0x13003f3d, 0x0e602f9f, 0x4e7ad014, 0x9b3b6857, 0x5ea0cb67, 0x0eb31c9f, 0x4e7c5372, 0x5e61b8c0, 0x0ea19b23, 0x0ee6e1df, 0x6e63a626, 0x2f139405, 0x7eb0f96d, 0x9e588c63, 0x2e714c3a, 0x6e8c941e, 0x0f61b331, 0x6f01f625, 0x4e78d4ea, 0x6f403709, 0x1a0300da, 0xda0102c8, 0x7e61d9fd, 0xb89469bb, 0x0c838780, 0x2e60a590, 0x4dfd29e1, 0x4e150f2e, 0xce2810bc, 0x5f541591, 0x9ee60259, 0x2eb40e56, 0x5e014027, 0x2ef71faf, 0x4e2d452f, 0x5ee0a813, 0x4eb03301, 0x38443acf, 0x6eabd502, 0x0e2ee71e, 0x5a960364, 0xce7ec596, 0x7efbed09, 0x4ef42ea2, 0x0eb30ea5, 0x5ee0d9f8, 0x6f513552, 0xf89eb3fa, 0x7ea2eca6, 0x9b00cc19, 0xf897409e, 0x1e73485f, 0x381afa77, 0x0f169f3b, 0x5ee1aa70, 0x5e1803ee, 0x0dbf5a4c, 0xce78c7a6, 0x9b0b260c, 0x2ef8fa19, 0x6e70aa4b, 0xce45b805, 0x2ea08e86, 0x4ee0bafd, 0x2ea09a1f, 0x4e218900, 0x6e744f13, 0xce518653, 0xf81b7a68, 0xce45ac5e, 0x7e62e416, 0x1a1b02b6, 0x7e21db48, 0x381daaaf, 0x6b2c0987, 0x0e2ec651, 0x4eae8502, 0x9bde7ca0, 0x6f47201f, 0x7e61a8a3, 0x6e60d5db, 0x4e2879de, 0xf81d194e, 0x4f1b8d05, 0x4d0048b2, 0x6e203be9, 0x4e3e7eb1, 0x0e260ef8, 0x2e688518, 0x7e3fec46, 0xdac00843, 0xf85c8917, 0x2e212a0f, 0x0e8196da, 0xd503359f, 0xce4c81f2, 0x6ee19992, 0x6e21ca79, 0x4d40c1d2, 0x4f5816ef, 0x4e34c3ea, 0x4df7c283, 0x7ef7eeb6, 0x18e276ce, 0xab0d21c0, 0xd5032f7f, 0x4ea00dbf, 0x5ac01251, 0xd0121955, 0x7f1495e4, 0x7ef0fa11, 0x5e24dd9c, 0x9add25b5, 0x0eb2bdef, 0x9e1977c7, 0x6f4b26bd, 0x0e200a9c, 0x9b4f7c00, 0x0ea0392e, 0x7e212a2c, 0x0b248b90, 0x1acc27a1, 0x2e701c90, 0x5ee1b870, 0x5e280aba, 0x5ea0780e, 0x1e264246, 0x4e052d04, 0x0e731dc4, 0xce461997, 0x9a9e9413, 0x3d462048, 0x5ea1fac5, 0x2ea0c8c4, 0x9a030280, 0x2ebda4b8, 0x5eef8614, 0x6eadc4e0, 0xbd035a8f, 0x4e606b84, 0x4eb1aba1, 0x4e286928, 0x4e2858cc, 0x9add0ce9, 0x4e070d65, 0x5fd399d5, 0x0f03fde7, 0x6ee90c74, 0x4ef8e31e, 0x381d986a, 0x5ea0ebf4, 0x5ea0d87e, 0x2e76ac9e, 0x6eb36cd4, 0x2e6e1c4c, 0x2e2feebc, 0x1ace4b03, 0x5ee0db12, 0x5ea0e9b1, 0x2e1c32d5, 0x5fa49a09, 0x0e258737, 0x7e21ca8e, 0xce4f9988, 0x5f7f56a6, 0x0e739766, 0x4e28586c, 0x6e619908, 0xd500401f, 0xf88b9252, 0x6e251c8e, 0x9e20015b, 0x7f1486b9, 0x717c339b, 0x1f31ff70, 0x4ea0eb62, 0x9acb0926, 0x489f7d85, 0x4e209b54, 0x2e84cf03, 0x2e65946c, 0x0e7d80cd, 0xc8dffecc, 0xce668bd8, 0x6e2188af, 0xeb4ada34, 0x2b25ec33, 0x0d40e6e7, 0x4eb2c757, 0x4ec82ad0, 0x7e21cb0a, 0x0e21a847, 0x4e0b1ec0, 0x381e6ac0, 0x6e61c8f5, 0x0f10071c, 0x2ee21daa, 0x5e61ab31, 0x6e218892, 0x2e7e7cb5, 0x6f2826aa, 0x7f6b54df, 0x4eaa2620, 0xdac00034, 0x4f6477be, 0x7e6148ea, 0x4eef1f57, 0x78459aeb, 0x2ebc3f10, 0x2e35f4eb, 0x4fbf19ce, 0xd8d0e58e, 0x2e21bbc7, 0x6ee0cab6, 0x9bc57e3f, 0x2f854037, 0x4e92181c, 0x6e6d1f89, 0x0f305545, 0x4ee19a57, 0x0e887bdf, 0x5e1a4185, 0x7ef0c821, 0x2eb6607c, 0x2ea0d9b8, 0x9e0380f4, 0x2ebf1c83, 0x1e62597d, 0x7f6e2548, 0x5ac00205, 0x4e616adb, 0xce638b8c, 0x5e1653cf, 0x2e6069be, 0x0e2ac641, 0x1e33c76f, 0xce44956d, 0x9bb90d31, 0x1e24c20a, 0x7ee038c1, 0x93407e5e, 0x4e280127, 0xc8df7f7d, 0xba42f263, 0x1e6f199c, 0x6e212889, 0x6e92f60e, 0x6ebdc499, 0x8b9acbf8, 0x4d40c581, 0x3a020250, 0x6e6a6716, 0x9248403b, 0x9081ffea, 0x4e603856, 0x9ad1242b, 0x6f270579, 0x1a070349, 0xcec08133, 0xd503305f, 0x5a1a00ca, 0x2e60b8a2, 0x0e5f28fd, 0x0e31a3da, 0x7e61cbc1, 0xd503399f, 0x5f5e54aa, 0x0eb8bdea, 0x4eba8f10, 0x4e2a2e60, 0x2f3da7d6, 0x1e58e297, 0x6e71aa3e, 0x6b86701a, 0xce4fa5e6, 0x4ee7c463, 0x8a79307f, 0x0ebea541, 0x2e218af4, 0x4e774f8a, 0xb9b95dc5, 0x6e61abd5, 0x4dd1e814, 0x4da72098, 0x98307582, 0x3a512101, 0x7ef95497, 0x1ace5535, 0x5a0c0349, 0x4e28581b, 0x6ebf1c02, 0x5ea1da23, 0x1e274314, 0x5e25dd29, 0x6e75f594, 0x6eaf6ed5, 0x4e214abe, 0x4e064172, 0x2e21c8f4, 0xf84c5b08, 0x1e244312, 0x14000000}; + env.code_mem.emplace_back(0x14000000); // B . jit.SetRegister(0, 0x866524401a1d4e47); jit.SetRegister(1, 0x02ca8cec51301b60); @@ -1670,6 +1619,8 @@ TEST_CASE("A64: rand2", "[a64][.]") { jit.SetPC(100); jit.SetSP(0x000000cdfadeaff0); + env.code_mem_start_address = 100; + jit.SetVector(0, {0x4d5a180ac0ffdac8, 0xfc6eb113cd5ff2a8}); jit.SetVector(1, {0x39f8cecc9de9cefd, 0x3a6b35d333d89a6b}); jit.SetVector(2, {0x791fd8290bbdd2f4, 0xdc0e5e7aee311411}); @@ -2327,61 +2278,3 @@ TEST_CASE("A64: SQABS", "[a64]") { CHECK(jit.GetVector(13) == Vector{0x763E4B7043BC0AC5, 0x5FDD5D671D399E2}); CHECK(FP::FPSR{(uint32_t)jit.GetRegister(13)}.QC() == 0); } - -TEST_CASE("A64: RBIT{16b}", "[a64]") { - A64TestEnv env; - A64::UserConfig conf{}; - conf.callbacks = &env; - A64::Jit jit{conf}; - env.code_mem.emplace_back(0x6e605841); // rbit v1.16b, v2.16b - env.code_mem.emplace_back(0x6e605822); // rbit v2.16b, v1.16b - env.code_mem.emplace_back(0x14000000); // b . - jit.SetVector(2, { 0xcafedead, 0xbabebeef }); - jit.SetPC(0); // at _start - env.ticks_left = 4; - jit.Run(); - REQUIRE(jit.GetVector(1)[0] == 0x537f7bb5); - REQUIRE(jit.GetVector(1)[1] == 0x5d7d7df7); - REQUIRE(jit.GetVector(2)[0] == 0xcafedead); - REQUIRE(jit.GetVector(2)[1] == 0xbabebeef); -} - -TEST_CASE("A64: CLZ{X}", "[a64]") { - A64TestEnv env; - A64::UserConfig conf{}; - conf.callbacks = &env; - A64::Jit jit{conf}; - env.code_mem.emplace_back(0xdac01060); // clz x0, x3 - env.code_mem.emplace_back(0xdac01081); // clz x1, x4 - env.code_mem.emplace_back(0xdac010a2); // clz x2, x5 - env.code_mem.emplace_back(0x14000000); // b . - jit.SetRegister(3, 0xfffffffffffffff0); - jit.SetRegister(4, 0x0fffffff0ffffff0); - jit.SetRegister(5, 0x07fffffeffeffef0); - jit.SetPC(0); // at _start - env.ticks_left = 4; - jit.Run(); - REQUIRE(jit.GetRegister(0) == 0); - REQUIRE(jit.GetRegister(1) == 4); - REQUIRE(jit.GetRegister(2) == 5); -} - -TEST_CASE("A64: CLZ{W}", "[a64]") { - A64TestEnv env; - A64::UserConfig conf{}; - conf.callbacks = &env; - A64::Jit jit{conf}; - env.code_mem.emplace_back(0x5ac01060); // clz w0, w3 - env.code_mem.emplace_back(0x5ac01081); // clz w1, w4 - env.code_mem.emplace_back(0x5ac010a2); // clz w2, w5 - env.code_mem.emplace_back(0x14000000); // b . - jit.SetRegister(3, 0xffff1110); - jit.SetRegister(4, 0x0fff1110); - jit.SetRegister(5, 0x07fffffe); - jit.SetPC(0); // at _start - env.ticks_left = 4; - jit.Run(); - REQUIRE(jit.GetRegister(0) == 0); - REQUIRE(jit.GetRegister(1) == 4); - REQUIRE(jit.GetRegister(2) == 5); -} diff --git a/src/dynarmic/tests/A64/fibonacci.cpp b/src/dynarmic/tests/A64/fibonacci.cpp index 713a48cab7..cbb02d1b01 100644 --- a/src/dynarmic/tests/A64/fibonacci.cpp +++ b/src/dynarmic/tests/A64/fibonacci.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include "dynarmic/common/common_types.h" @@ -23,7 +23,7 @@ namespace { class MyEnvironment final : public A64::UserCallbacks { public: u64 ticks_left = 0; - std::unordered_map memory{}; + std::map memory{}; u8 MemoryRead8(u64 vaddr) override { return memory[vaddr]; diff --git a/src/dynarmic/tests/A64/real_world.cpp b/src/dynarmic/tests/A64/real_world.cpp deleted file mode 100644 index b9eb1c464b..0000000000 --- a/src/dynarmic/tests/A64/real_world.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -#include -#include - -#include "./testenv.h" -#include "dynarmic/interface/A64/a64.h" - -using namespace Dynarmic; -/* Following C program: -int M[64]; -int grob(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, int l) { - M[a] += M[b]; // TOTAL GCC DESTRUCTION - return a * b * c * d * e * f * g * h * i * j * k * l; -} -int _start() { - return grob( - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), - grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]) - ); -} -#ifdef __x86_64__ -#include -int main() { - return printf("%i", start_e()); -} -#endif - -cat < a64-linker.ld >> EOF -ENTRY(_start); -PHDRS { text PT_LOAD; rodata PT_LOAD; data PT_LOAD; } -SECTIONS { - . = 0; - .text : { *(.text .text.*) } :text - .rodata : { *(.rodata .rodata.*) } :rodata - .data : ALIGN(CONSTANT(MAXPAGESIZE)) { *(.data .data.*) } :data - .bss : { *(.bss .bss.*) *(COMMON) } :data - /DISCARD/ : { *(.eh_frame*) *(.note .note.*) } -} -EOF -aarch64-linux-gnu-gcc -Wl,-Ta64-linker.ld -Wall -Wextra -ffreestanding -nostdlib -fno-whole-program -O2 grob.c -o grob | aarch64-linux-gnu-objdump -SC grob | awk '{print "env.code_mem.emplace_back(0x"$2"); //" $0}' -aarch64-linux-gnu-gcc -Wl,-Ta64-linker.ld -Wall -Wextra -ffreestanding -nostdlib -fno-whole-program -O2 grob.c -o grob | aarch64-linux-gnu-objdump -SC grob | awk '{print $2", "}' -*/ -TEST_CASE("high register pressure proper handling with block linking 1", "[a64][c]") { - A64TestEnv env; - A64::UserConfig conf{}; - conf.callbacks = &env; - A64::Jit jit{conf}; - - REQUIRE(conf.HasOptimization(OptimizationFlag::BlockLinking)); - env.code_mem = { 0x90000008, 0x91230108, 0xb860d909, 0xb861d90a, 0x0b0a0129, 0xb820d909, 0x1b017c00, 0xb94003e1, 0x1b027c00, 0x1b037c00, 0x1b047c00, 0x1b057c00, 0x1b067c00, 0x1b077c00, 0x1b017c00, 0xb9400be1, 0x1b017c00, 0xb94013e1, 0x1b017c00, 0xb9401be1, 0x1b017c00, 0xd65f03c0, 0xd503201f, 0xd503201f, 0xa9a27bfd, 0x90000000, 0x91230000, 0x910003fd, 0xa90153f3, 0xa9025bf5, 0xa90363f7, 0xa9046bf9, 0xa90573fb, 0x29408c01, 0x2941b40e, 0xb863d804, 0xb861d802, 0x2942ac0c, 0x0b040042, 0x1b037c24, 0x2943a40a, 0x29449c08, 0x1b0e7c84, 0x29459406, 0xb821d802, 0x1b0d7c84, 0x29408c01, 0x2941b40e, 0x1b0c7c84, 0x1b0b7c84, 0x2942ac0c, 0x1b0a7c84, 0x1b097c84, 0x2943a40a, 0x1b087c84, 0x1b077c84, 0x29449c08, 0xb863d80f, 0x1b037c23, 0x1b067c84, 0xb861d802, 0x0b0f0042, 0x1b0e7c63, 0x1b057c84, 0x29459406, 0xb821d802, 0x1b0d7c63, 0x2943f002, 0x2940d801, 0x1b0c7c63, 0x2941e81b, 0x2942e019, 0x1b0b7c63, 0xb90067e2, 0x1b0a7c63, 0x1b097c63, 0x1b087c63, 0x1b077c63, 0x1b067c63, 0x1b057c63, 0x29449402, 0x290d17e2, 0xb876d805, 0xb861d802, 0x29459c06, 0x0b050042, 0xb821d802, 0x1b167c21, 0x290e1fe6, 0x2940d40c, 0x1b1b7c21, 0x2941a408, 0x290f27e8, 0x2942ac0a, 0x1b1a7c21, 0xb86cd802, 0xb875d805, 0x29102fea, 0x0b050042, 0x1b197c21, 0x2943b80d, 0x29113bed, 0x2944c00f, 0x291243ef, 0x1b187c21, 0x2945c811, 0xb82cd802, 0x29134bf1, 0x1b157d8c, 0x2941f813, 0x2940d00b, 0x29147bf3, 0x29429402, 0x291517e2, 0x29439c06, 0x29161fe6, 0x2944a408, 0xb874d805, 0xb86bd802, 0x291727e8, 0x0b050042, 0x2945b80a, 0xb82bd802, 0x29183bea, 0x1b147d6b, 0x2941c00f, 0x2940cc0a, 0x291943ef, 0x2942c811, 0x291a4bf1, 0x2943881e, 0x291b0bfe, 0x29449805, 0x291c1be5, 0x2945a007, 0x291d23e7, 0xb86ad802, 0xb873d805, 0x0b050042, 0xb82ad802, 0x1b137d4a, 0x2941b80d, 0x2940f809, 0x291e3bed, 0x2942c00f, 0x291f43ef, 0x2943c811, 0xb90103f1, 0xb90107f2, 0x29449402, 0xb9010be2, 0xb9010fe5, 0xb869d802, 0xb87ed805, 0x29459c06, 0x0b050042, 0xb829d802, 0x1b1e7d29, 0xb90113e6, 0xb90117e7, 0x2941bc0e, 0x2940c808, 0xb9011bee, 0xb9011fef, 0x2942c410, 0xb90123f0, 0xb90127f1, 0x29439402, 0xb9012be2, 0xb9012fe5, 0xb868d802, 0x29449c06, 0xb90133e6, 0xb90137e7, 0xb872d805, 0x2945b80d, 0x0b050042, 0xb828d802, 0x1b127d08, 0xb9013bed, 0xb9013fee, 0x2941c00f, 0x2940c407, 0xb90143ef, 0xb90147f0, 0x29429402, 0xb9014be2, 0xb9014fe5, 0x2943b806, 0xb90153e6, 0xb90157ee, 0x2944c00f, 0xb9015bef, 0xb9015ff0, 0x29459402, 0xb90163e2, 0xb867d802, 0xb90167e5, 0xb871d805, 0x0b050042, 0xb827d802, 0x1b117ce7, 0x2940c002, 0x2941b406, 0xb9016be6, 0xb9016fed, 0x2942bc0e, 0xb90173ee, 0xb90177ef, 0x29439805, 0xb9017be5, 0xb9017fe6, 0x2944bc0e, 0xb90183ee, 0xb90187ef, 0x29459805, 0xb9018be5, 0xb862d805, 0xb9018fe6, 0xb870d806, 0x0b0600a5, 0xb822d805, 0x1b107c42, 0x2941b80d, 0x2940bc06, 0xb90193ed, 0xb90197ee, 0x2942b805, 0xb9019be5, 0xb9019fee, 0x2943b405, 0xb901a3e5, 0xb901a7ed, 0xb86fd80d, 0x2944940e, 0xb901abee, 0xb901afe5, 0x2945940e, 0xb901b7e5, 0xb866d805, 0xb901b3ee, 0x0b0d00a5, 0xb826d805, 0x2941dc0d, 0x2940b805, 0xb901bbed, 0xb901bff7, 0x2942b417, 0xb901c3f7, 0xb901c7ed, 0x2943b417, 0xb901cbf7, 0xb901cfed, 0x2944b417, 0xb901d3f7, 0xb901d7ed, 0x2945b417, 0xb901dbf7, 0xb86ed817, 0xb901dfed, 0xb865d80d, 0x0b1701ad, 0xb825d80d, 0xb863d817, 0x1b047c63, 0xb864d80d, 0x0b1701ad, 0xb824d80d, 0xb94067e0, 0xb9408bed, 0x1b007c21, 0xb9406be0, 0x1b1c7c21, 0x1b007c21, 0xb9406fe0, 0x1b007c21, 0xb94073e0, 0x1b007c21, 0xb94077e0, 0x1b007c21, 0xb9407be0, 0x1b007d8c, 0xb9407fe0, 0x1b037c21, 0x1b007d8c, 0xb94083e0, 0x1b007d8c, 0xb94087e0, 0x1b007d8c, 0xb9408fe0, 0x1b0d7d8c, 0xb940f3ed, 0x1b007d8c, 0xb94093e0, 0x1b0d7d29, 0x1b007d8c, 0xb94097e0, 0x1b007d8c, 0xb9409be0, 0x1b007d8c, 0xb9409fe0, 0x1b007d8c, 0xb940a3e0, 0x1b007d6b, 0xb940a7e0, 0x1b0c7c21, 0x1b007d6b, 0xb940abe0, 0x1b007d6b, 0xb940afe0, 0x1b007d6b, 0xb940b3e0, 0x1b007d6b, 0xb940b7e0, 0x1b007d6b, 0xb940bbe0, 0xb9413bed, 0x1b007d6b, 0xb940bfe0, 0x1b007d6b, 0xb940c3e0, 0x1b007d6b, 0xb940c7e0, 0x1b007d6b, 0xb940cbe0, 0x1b007d4a, 0xb940cfe0, 0x1b0b7c21, 0x1b007d4a, 0xb940d3e0, 0x1b007d4a, 0xb940d7e0, 0x1b007d4a, 0xb940dbe0, 0x1b007d4a, 0xb940dfe0, 0x1b007d4a, 0xb940e3e0, 0x1b007d4a, 0xb940e7e0, 0x1b007d4a, 0xb940ebe0, 0x1b007d4a, 0xb940efe0, 0x1b007d4a, 0xb940f7e0, 0x1b007d29, 0xb940fbe0, 0x1b0a7c21, 0x1b007d29, 0x295f8fe0, 0x1b007d20, 0x1b037c00, 0xb94107e3, 0x1b037c00, 0xb9410be3, 0x1b037c00, 0xb9410fe3, 0x1b037c00, 0xb94113e3, 0x1b037c00, 0xb94117e3, 0x1b037c00, 0x1b007c21, 0xb9411be0, 0x1b007d08, 0xb9411fe0, 0x1b007d08, 0xb94123e0, 0x1b007d08, 0xb94127e0, 0x1b007d08, 0xb9412be0, 0x1b007d08, 0xb9412fe0, 0x1b007d08, 0xb94133e0, 0x1b007d08, 0xb94137e0, 0x1b007d08, 0xb9413fe0, 0xb941bff7, 0xa94153f3, 0x1b0d7d08, 0xb9416fed, 0xa9425bf5, 0xa9446bf9, 0x1b007d08, 0xb94143e0, 0x1b087c21, 0x1b007ce7, 0xb94147e0, 0x1b007ce7, 0xb9414be0, 0x1b007ce7, 0xb9414fe0, 0x1b007ce7, 0xb94153e0, 0x1b007ce7, 0xb94157e0, 0x1b007ce7, 0xb9415be0, 0x1b007ce7, 0xb9415fe0, 0x1b007ce7, 0xb94163e0, 0x1b007ce7, 0xb94167e0, 0x1b007ce7, 0xb9416be0, 0x1b007c42, 0xb94173e0, 0x1b077c21, 0x1b0d7c42, 0xb94193ed, 0x1b007c42, 0xb94177e0, 0x1b007c42, 0xb9417be0, 0x1b007c42, 0xb9417fe0, 0x1b007c42, 0xb94183e0, 0x1b007c40, 0xb94187e2, 0x1b027c00, 0xb9418be2, 0x1b027c00, 0xb9418fe2, 0x1b027c00, 0xb94197e2, 0x1b007c20, 0x1b0f7cc1, 0x1b0d7c21, 0xb941a7ed, 0x1b027c21, 0xb9419be2, 0x1b027c21, 0xb9419fe2, 0xa94573fb, 0x1b027c21, 0xb941a3e2, 0x1b027c21, 0xb941abe2, 0x1b0d7c21, 0xb941bbed, 0x1b027c21, 0xb941afe2, 0x1b027c21, 0xb941b3e2, 0x1b027c21, 0xb941b7e2, 0x1b027c21, 0x1b017c01, 0x1b0e7ca0, 0x1b0d7c00, 0xb941c7ed, 0x1b177c00, 0xb941c3f7, 0x1b177c00, 0xb941cbf7, 0x1b0d7c00, 0xb941cfed, 0x1b177c00, 0xb941d3f7, 0x1b0d7c00, 0xb941d7ed, 0x1b177c00, 0xb941dbf7, 0x1b0d7c00, 0xb941dfed, 0x1b177c00, 0xa94363f7, 0xa8de7bfd, 0x1b0d7c00, 0x1b007c20, 0x14000000 }; - jit.SetPC(0x60); // at _start - env.ticks_left = 4; - jit.Run(); - REQUIRE(jit.GetRegister(0) == 0); -} - -/* -Following C program: -extern int printf(const char*, ...); -int square(int num) { - return (num > 10) ? printf((void*)(num - 10)) : num * num; -} -*/ -TEST_CASE("Block branching (unpredictable)", "[a64][c]") { - A64TestEnv env; - A64::UserConfig conf{}; - conf.callbacks = &env; - //conf.very_verbose_debugging_output = true; - A64::Jit jit{conf}; - REQUIRE(conf.HasOptimization(OptimizationFlag::BlockLinking)); - oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; - { - using namespace oaknut::util; - oaknut::Label lb0_2, lb_printf, lb_hlt; - code.ADD(W0, W0, 11); - code.CMP(W0, 11); - code.B(LT, lb0_2); - code.SUB(W0, W0, 10); - code.B(lb_printf); - code.l(lb0_2); - code.MUL(W0, W0, W0); - code.l(lb_hlt); - code.B(lb_hlt); - code.l(lb_printf); - code.RET(); - } - jit.SetPC(0); // at _start - env.ticks_left = env.code_mem.size(); - jit.Run(); -} diff --git a/src/dynarmic/tests/A64/testenv.h b/src/dynarmic/tests/A64/testenv.h index 8662fc7921..2c5a500f75 100644 --- a/src/dynarmic/tests/A64/testenv.h +++ b/src/dynarmic/tests/A64/testenv.h @@ -9,7 +9,7 @@ #pragma once #include -#include +#include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" @@ -26,7 +26,7 @@ public: u64 code_mem_start_address = 0; std::vector code_mem; - std::unordered_map modified_memory; + std::map modified_memory; std::vector interrupts; bool IsInCodeMem(u64 vaddr) const { @@ -133,7 +133,6 @@ class A64FastmemTestEnv final : public Dynarmic::A64::UserCallbacks { public: u64 ticks_left = 0; char* backing_memory = nullptr; - bool ignore_invalid_insn = false; explicit A64FastmemTestEnv(char* addr) : backing_memory(addr) {} @@ -206,7 +205,7 @@ public: return true; } - void InterpreterFallback(u64 pc, size_t num_instructions) override { ASSERT_MSG(ignore_invalid_insn, "InterpreterFallback({:016x}, {})", pc, num_instructions); } + void InterpreterFallback(u64 pc, size_t num_instructions) override { ASSERT_MSG(false, "InterpreterFallback({:016x}, {})", pc, num_instructions); } void CallSVC(std::uint32_t swi) override { ASSERT_MSG(false, "CallSVC({})", swi); } diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 06b69d8510..fba74138c5 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -29,7 +29,6 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS) A64/fp_min_max.cpp A64/misaligned_page_table.cpp A64/test_invalidation.cpp - A64/real_world.cpp A64/testenv.h ) endif() diff --git a/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp b/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp index aa66ff7f9a..42b72bdb91 100644 --- a/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp +++ b/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp @@ -173,7 +173,7 @@ void A64Unicorn::InterruptHook(uc_engine* uc, u32 int_number, void* user_data) { auto* this_ = static_cast(user_data); u32 esr; - //CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR_EL0, &esr)); + CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr)); auto ec = esr >> 26; auto iss = esr & 0xFFFFFF;