diff --git a/docs/Development.md b/docs/Development.md index 0db4131dbb..e60384e8ab 100644 --- a/docs/Development.md +++ b/docs/Development.md @@ -1,11 +1,11 @@ # Development -* **Windows**: [Windows Building Guide](./docs/build/Windows.md) -* **Linux**: [Linux Building Guide](./docs/build/Linux.md) -* **Android**: [Android Building Guide](./docs/build/Android.md) -* **Solaris**: [Solaris Building Guide](./docs/build/Solaris.md) -* **FreeBSD**: [FreeBSD Building Guide](./docs/build/FreeBSD.md) -* **macOS**: [macOS Building Guide](./docs/build/macOS.md) +* **Windows**: [Windows Building Guide](./build/Windows.md) +* **Linux**: [Linux Building Guide](./build/Linux.md) +* **Android**: [Android Building Guide](./build/Android.md) +* **Solaris**: [Solaris Building Guide](./build/Solaris.md) +* **FreeBSD**: [FreeBSD Building Guide](./build/FreeBSD.md) +* **macOS**: [macOS Building Guide](./build/macOS.md) # CPM @@ -104,7 +104,7 @@ Then type `target remote localhost:1234` and type `c` (for continue) - and then ### gdb cheatsheet -- `mo `: Monitor commands, `get info`, `get fastmem` and `get mappings` are available. +- `mo `: Monitor commands, `get info`, `get fastmem` and `get mappings` are available. Type `mo help` for more info. - `detach`: Detach from remote (i.e restarting the emulator). - `c`: Continue - `p `: Print variable, `p/x ` for hexadecimal. diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 64592a8855..07e9ae7a8f 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -151,6 +151,17 @@ if (ENABLE_WEB_SERVICE) ) endif() +# unordered_dense +AddPackage( + NAME unordered_dense + REPO "Lizzie841/unordered_dense" + SHA e59d30b7b1 + HASH 71eff7bd9ba4b9226967bacd56a8ff000946f8813167cb5664bb01e96fb79e4e220684d824fe9c59c4d1cc98c606f13aff05b7940a1ed8ab3c95d6974ee34fa0 + FIND_PACKAGE_ARGUMENTS "CONFIG" + OPTIONS + "UNORDERED_DENSE_INSTALL OFF" +) + # FFMpeg if (YUZU_USE_BUNDLED_FFMPEG) add_subdirectory(ffmpeg) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e9aed1d7af..cbe1d35fc5 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -262,13 +262,13 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") endif() if (BOOST_NO_HEADERS) - target_link_libraries(common PUBLIC Boost::algorithm Boost::icl Boost::pool) + target_link_libraries(common PUBLIC Boost::algorithm Boost::icl Boost::pool) else() - target_link_libraries(common PUBLIC Boost::headers) + target_link_libraries(common PUBLIC Boost::headers) endif() if (lz4_ADDED) - target_include_directories(common PRIVATE ${lz4_SOURCE_DIR}/lib) + target_include_directories(common PRIVATE ${lz4_SOURCE_DIR}/lib) endif() target_link_libraries(common PUBLIC fmt::fmt stb::headers Threads::Threads) @@ -280,6 +280,11 @@ else() target_link_libraries(common PRIVATE zstd) endif() +if (TARGET unordered_dense::unordered_dense) + # weird quirk of system installs + target_link_libraries(common PUBLIC unordered_dense::unordered_dense) +endif() + if(ANDROID) # For ASharedMemory_create target_link_libraries(common PRIVATE android) diff --git a/src/common/heap_tracker.cpp b/src/common/heap_tracker.cpp index 6832087959..d509f2644c 100644 --- a/src/common/heap_tracker.cpp +++ b/src/common/heap_tracker.cpp @@ -1,3 +1,5 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -34,68 +36,60 @@ HeapTracker::~HeapTracker() = default; void HeapTracker::Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm, bool is_separate_heap) { + bool rebuild_required = false; // When mapping other memory, map pages immediately. if (!is_separate_heap) { m_buffer.Map(virtual_offset, host_offset, length, perm, false); return; } - { - // We are mapping part of a separate heap. + // We are mapping part of a separate heap and insert into mappings. std::scoped_lock lk{m_lock}; - - auto* const map = new SeparateHeapMap{ - .vaddr = virtual_offset, + m_map_count++; + const auto it = m_mappings.insert_or_assign(virtual_offset, SeparateHeapMap{ .paddr = host_offset, .size = length, .tick = m_tick++, .perm = perm, .is_resident = false, - }; - - // Insert into mappings. - m_map_count++; - m_mappings.insert(*map); + }); + // Update tick before possible rebuild. + it.first->second.tick = m_tick++; + // Check if we need to rebuild. + if (m_resident_map_count >= m_max_resident_map_count) + rebuild_required = true; + // Map the area. + m_buffer.Map(it.first->first, it.first->second.paddr, it.first->second.size, it.first->second.perm, false); + // This map is now resident. + it.first->second.is_resident = true; + m_resident_map_count++; + m_resident_mappings.insert(*it.first); } - - // Finally, map. - this->DeferredMapSeparateHeap(virtual_offset); + // A rebuild was required, so perform it now. + if (rebuild_required) + this->RebuildSeparateHeapAddressSpace(); } void HeapTracker::Unmap(size_t virtual_offset, size_t size, bool is_separate_heap) { // If this is a separate heap... if (is_separate_heap) { std::scoped_lock lk{m_lock}; - - const SeparateHeapMap key{ - .vaddr = virtual_offset, - }; - // Split at the boundaries of the region we are removing. this->SplitHeapMapLocked(virtual_offset); this->SplitHeapMapLocked(virtual_offset + size); - // Erase all mappings in range. - auto it = m_mappings.find(key); - while (it != m_mappings.end() && it->vaddr < virtual_offset + size) { - // Get underlying item. - auto* const item = std::addressof(*it); - + auto it = m_mappings.find(virtual_offset); + while (it != m_mappings.end() && it->first < virtual_offset + size) { // If resident, erase from resident map. - if (item->is_resident) { + if (it->second.is_resident) { ASSERT(--m_resident_map_count >= 0); - m_resident_mappings.erase(m_resident_mappings.iterator_to(*item)); + m_resident_mappings.erase(m_resident_mappings.find(it->first)); } - // Erase from map. ASSERT(--m_map_count >= 0); it = m_mappings.erase(it); - - // Free the item. - delete item; } } - // Unmap pages. m_buffer.Unmap(virtual_offset, size, false); } @@ -117,110 +111,51 @@ void HeapTracker::Protect(size_t virtual_offset, size_t size, MemoryPermission p { std::scoped_lock lk2{m_lock}; - - const SeparateHeapMap key{ - .vaddr = next, - }; - // Try to get the next mapping corresponding to this address. - const auto it = m_mappings.nfind(key); - + const auto it = m_mappings.find(next); if (it == m_mappings.end()) { // There are no separate heap mappings remaining. next = end; should_protect = true; - } else if (it->vaddr == cur) { + } else if (it->first == cur) { // We are in range. // Update permission bits. - it->perm = perm; + it->second.perm = perm; // Determine next address and whether we should protect. - next = cur + it->size; - should_protect = it->is_resident; + next = cur + it->second.size; + should_protect = it->second.is_resident; } else /* if (it->vaddr > cur) */ { // We weren't in range, but there is a block coming up that will be. - next = it->vaddr; + next = it->first; should_protect = true; } } // Clamp to end. next = std::min(next, end); - // Reprotect, if we need to. - if (should_protect) { + if (should_protect) m_buffer.Protect(cur, next - cur, perm); - } - // Advance. cur = next; } } -bool HeapTracker::DeferredMapSeparateHeap(u8* fault_address) { - if (m_buffer.IsInVirtualRange(fault_address)) { - return this->DeferredMapSeparateHeap(fault_address - m_buffer.VirtualBasePointer()); - } - - return false; -} - -bool HeapTracker::DeferredMapSeparateHeap(size_t virtual_offset) { - bool rebuild_required = false; - - { - std::scoped_lock lk{m_lock}; - - // Check to ensure this was a non-resident separate heap mapping. - const auto it = this->GetNearestHeapMapLocked(virtual_offset); - if (it == m_mappings.end() || it->is_resident) { - return false; - } - - // Update tick before possible rebuild. - it->tick = m_tick++; - - // Check if we need to rebuild. - if (m_resident_map_count > m_max_resident_map_count) { - rebuild_required = true; - } - - // Map the area. - m_buffer.Map(it->vaddr, it->paddr, it->size, it->perm, false); - - // This map is now resident. - it->is_resident = true; - m_resident_map_count++; - m_resident_mappings.insert(*it); - } - - if (rebuild_required) { - // A rebuild was required, so perform it now. - this->RebuildSeparateHeapAddressSpace(); - } - - return true; -} - void HeapTracker::RebuildSeparateHeapAddressSpace() { std::scoped_lock lk{m_rebuild_lock, m_lock}; - ASSERT(!m_resident_mappings.empty()); - // Dump half of the mappings. - // // Despite being worse in theory, this has proven to be better in practice than more // regularly dumping a smaller amount, because it significantly reduces average case // lock contention. - const size_t desired_count = std::min(m_resident_map_count, m_max_resident_map_count) / 2; - const size_t evict_count = m_resident_map_count - desired_count; + std::size_t const desired_count = std::min(m_resident_map_count, m_max_resident_map_count) / 2; + std::size_t const evict_count = m_resident_map_count - desired_count; auto it = m_resident_mappings.begin(); - - for (size_t i = 0; i < evict_count && it != m_resident_mappings.end(); i++) { + for (std::size_t i = 0; i < evict_count && it != m_resident_mappings.end(); i++) { // Unmark and unmap. - it->is_resident = false; - m_buffer.Unmap(it->vaddr, it->size, false); - + it->second.is_resident = false; + m_buffer.Unmap(it->first, it->second.size, false); // Advance. ASSERT(--m_resident_map_count >= 0); it = m_resident_mappings.erase(it); @@ -229,53 +164,32 @@ void HeapTracker::RebuildSeparateHeapAddressSpace() { void HeapTracker::SplitHeapMap(VAddr offset, size_t size) { std::scoped_lock lk{m_lock}; - this->SplitHeapMapLocked(offset); this->SplitHeapMapLocked(offset + size); } void HeapTracker::SplitHeapMapLocked(VAddr offset) { - const auto it = this->GetNearestHeapMapLocked(offset); - if (it == m_mappings.end() || it->vaddr == offset) { - // Not contained or no split required. - return; + auto it = this->GetNearestHeapMapLocked(offset); + if (it != m_mappings.end() && it->first != offset) { + // Adjust left iterator + auto const orig_size = it->second.size; + auto const left_size = offset - it->first; + it->second.size = left_size; + // Insert the new right map. + auto const right = SeparateHeapMap{ + .paddr = it->second.paddr + left_size, + .size = orig_size - left_size, + .tick = it->second.tick, + .perm = it->second.perm, + .is_resident = it->second.is_resident, + }; + m_map_count++; + auto rit = m_mappings.insert_or_assign(it->first + left_size, right); + if (rit.first->second.is_resident) { + m_resident_map_count++; + m_resident_mappings.insert(*rit.first); + } } - - // Cache the original values. - auto* const left = std::addressof(*it); - const size_t orig_size = left->size; - - // Adjust the left map. - const size_t left_size = offset - left->vaddr; - left->size = left_size; - - // Create the new right map. - auto* const right = new SeparateHeapMap{ - .vaddr = left->vaddr + left_size, - .paddr = left->paddr + left_size, - .size = orig_size - left_size, - .tick = left->tick, - .perm = left->perm, - .is_resident = left->is_resident, - }; - - // Insert the new right map. - m_map_count++; - m_mappings.insert(*right); - - // If resident, also insert into resident map. - if (right->is_resident) { - m_resident_map_count++; - m_resident_mappings.insert(*right); - } -} - -HeapTracker::AddrTree::iterator HeapTracker::GetNearestHeapMapLocked(VAddr offset) { - const SeparateHeapMap key{ - .vaddr = offset, - }; - - return m_mappings.find(key); } } // namespace Common diff --git a/src/common/heap_tracker.h b/src/common/heap_tracker.h index ee5b0bf43a..14b5401c18 100644 --- a/src/common/heap_tracker.h +++ b/src/common/heap_tracker.h @@ -1,93 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once -#include #include -#include #include - +#include #include "common/host_memory.h" -#include "common/intrusive_red_black_tree.h" namespace Common { struct SeparateHeapMap { - Common::IntrusiveRedBlackTreeNode addr_node{}; - Common::IntrusiveRedBlackTreeNode tick_node{}; - VAddr vaddr{}; - PAddr paddr{}; - size_t size{}; - size_t tick{}; - MemoryPermission perm{}; - bool is_resident{}; -}; - -struct SeparateHeapMapAddrComparator { - static constexpr int Compare(const SeparateHeapMap& lhs, const SeparateHeapMap& rhs) { - if (lhs.vaddr < rhs.vaddr) { - return -1; - } else if (lhs.vaddr <= (rhs.vaddr + rhs.size - 1)) { - return 0; - } else { - return 1; - } - } -}; - -struct SeparateHeapMapTickComparator { - static constexpr int Compare(const SeparateHeapMap& lhs, const SeparateHeapMap& rhs) { - if (lhs.tick < rhs.tick) { - return -1; - } else if (lhs.tick > rhs.tick) { - return 1; - } else { - return SeparateHeapMapAddrComparator::Compare(lhs, rhs); - } - } + PAddr paddr{}; //8 + std::size_t size{}; //8 (16) + std::size_t tick{}; //8 (24) + // 4 bits needed, sync with host_memory.h if needed + MemoryPermission perm : 4 = MemoryPermission::Read; + bool is_resident : 1 = false; }; +static_assert(sizeof(SeparateHeapMap) == 32); //half a cache line! good for coherency class HeapTracker { public: explicit HeapTracker(Common::HostMemory& buffer); ~HeapTracker(); - - void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm, - bool is_separate_heap); + void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm, bool is_separate_heap); void Unmap(size_t virtual_offset, size_t size, bool is_separate_heap); void Protect(size_t virtual_offset, size_t length, MemoryPermission perm); - u8* VirtualBasePointer() { + inline u8* VirtualBasePointer() noexcept { return m_buffer.VirtualBasePointer(); } - - bool DeferredMapSeparateHeap(u8* fault_address); - bool DeferredMapSeparateHeap(size_t virtual_offset); - private: - using AddrTreeTraits = - Common::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&SeparateHeapMap::addr_node>; - using AddrTree = AddrTreeTraits::TreeType; - - using TickTreeTraits = - Common::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&SeparateHeapMap::tick_node>; - using TickTree = TickTreeTraits::TreeType; - - AddrTree m_mappings{}; - TickTree m_resident_mappings{}; - + // TODO: You may want to "fake-map" the first 2GB of 64-bit address space + // and dedicate it entirely to a recursive PTE mapping :) + // However Ankerl is way better than using an RB tree, in all senses + using AddrTree = ankerl::unordered_dense::map; + AddrTree m_mappings; + using TicksTree = ankerl::unordered_dense::map; + TicksTree m_resident_mappings; private: void SplitHeapMap(VAddr offset, size_t size); void SplitHeapMapLocked(VAddr offset); - - AddrTree::iterator GetNearestHeapMapLocked(VAddr offset); - void RebuildSeparateHeapAddressSpace(); - + inline HeapTracker::AddrTree::iterator GetNearestHeapMapLocked(VAddr offset) noexcept { + return m_mappings.find(offset); + } private: Common::HostMemory& m_buffer; const s64 m_max_resident_map_count; - std::shared_mutex m_rebuild_lock{}; std::mutex m_lock{}; s64 m_map_count{}; diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 67e6e63c85..8aea5db583 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -47,6 +47,7 @@ constexpr std::bitset<32> BuildRegSet(std::initializer_list regs) { constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000); +constexpr inline Xbyak::Reg ABI_JIT_REG = Xbyak::util::rbx; #ifdef _WIN32 // Microsoft x64 ABI diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index e6e9fc45be..9d26db51f7 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -3,47 +3,9 @@ #ifdef __linux__ -#include "common/signal_chain.h" - +//#include "common/signal_chain.h" #include "core/arm/dynarmic/arm_dynarmic.h" -#include "core/hle/kernel/k_process.h" -#include "core/memory.h" - -namespace Core { - -namespace { - -thread_local Core::Memory::Memory* g_current_memory{}; -std::once_flag g_registered{}; -struct sigaction g_old_segv {}; - -void HandleSigSegv(int sig, siginfo_t* info, void* ctx) { - if (g_current_memory && g_current_memory->InvalidateSeparateHeap(info->si_addr)) { - return; - } - - return g_old_segv.sa_sigaction(sig, info, ctx); -} - -} // namespace - -ScopedJitExecution::ScopedJitExecution(Kernel::KProcess* process) { - g_current_memory = std::addressof(process->GetMemory()); -} - -ScopedJitExecution::~ScopedJitExecution() { - g_current_memory = nullptr; -} - -void ScopedJitExecution::RegisterHandler() { - std::call_once(g_registered, [] { - struct sigaction sa {}; - sa.sa_sigaction = &HandleSigSegv; - sa.sa_flags = SA_SIGINFO | SA_ONSTACK; - Common::SigAction(SIGSEGV, std::addressof(sa), std::addressof(g_old_segv)); - }); -} - -} // namespace Core +//#include "core/hle/kernel/k_process.h" +//#include "core/memory.h" #endif diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index 53dd188151..eef7c31160 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -26,24 +26,4 @@ constexpr HaltReason TranslateHaltReason(Dynarmic::HaltReason hr) { return static_cast(hr); } -#ifdef __linux__ - -class ScopedJitExecution { -public: - explicit ScopedJitExecution(Kernel::KProcess* process); - ~ScopedJitExecution(); - static void RegisterHandler(); -}; - -#else - -class ScopedJitExecution { -public: - explicit ScopedJitExecution(Kernel::KProcess* process) {} - ~ScopedJitExecution() {} - static void RegisterHandler() {} -}; - -#endif - } // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index afbf178349..1731ef1aec 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -336,15 +336,11 @@ bool ArmDynarmic32::IsInThumbMode() const { } HaltReason ArmDynarmic32::RunThread(Kernel::KThread* thread) { - ScopedJitExecution sj(thread->GetOwnerProcess()); - m_jit->ClearExclusiveState(); return TranslateHaltReason(m_jit->Run()); } HaltReason ArmDynarmic32::StepThread(Kernel::KThread* thread) { - ScopedJitExecution sj(thread->GetOwnerProcess()); - m_jit->ClearExclusiveState(); return TranslateHaltReason(m_jit->Step()); } @@ -386,7 +382,6 @@ ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProc m_cp15(std::make_shared(*this)), m_core_index{core_index} { auto& page_table_impl = process->GetPageTable().GetBasePageTable().GetImpl(); m_jit = MakeJit(&page_table_impl); - ScopedJitExecution::RegisterHandler(); } ArmDynarmic32::~ArmDynarmic32() = default; diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 99a80644ad..9674e88d9d 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -136,6 +136,7 @@ public: case Dynarmic::A64::Exception::SendEvent: case Dynarmic::A64::Exception::SendEventLocal: case Dynarmic::A64::Exception::Yield: + LOG_TRACE(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast(exception), pc, m_memory.Read32(pc)); return; case Dynarmic::A64::Exception::NoExecuteFault: LOG_CRITICAL(Core_ARM, "Cannot execute instruction at unmapped address {:#016x}", pc); @@ -144,12 +145,10 @@ public: default: if (m_debugger_enabled) { ReturnException(pc, InstructionBreakpoint); - return; + } else { + m_parent.LogBacktrace(m_process); + LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast(exception), pc, m_memory.Read32(pc)); } - - m_parent.LogBacktrace(m_process); - LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", - static_cast(exception), pc, m_memory.Read32(pc)); } } @@ -367,15 +366,11 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa } HaltReason ArmDynarmic64::RunThread(Kernel::KThread* thread) { - ScopedJitExecution sj(thread->GetOwnerProcess()); - m_jit->ClearExclusiveState(); return TranslateHaltReason(m_jit->Run()); } HaltReason ArmDynarmic64::StepThread(Kernel::KThread* thread) { - ScopedJitExecution sj(thread->GetOwnerProcess()); - m_jit->ClearExclusiveState(); return TranslateHaltReason(m_jit->Step()); } @@ -415,7 +410,6 @@ ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProc auto& page_table = process->GetPageTable().GetBasePageTable(); auto& page_table_impl = page_table.GetImpl(); m_jit = MakeJit(&page_table_impl, page_table.GetAddressSpaceWidth()); - ScopedJitExecution::RegisterHandler(); } ArmDynarmic64::~ArmDynarmic64() = default; diff --git a/src/core/debugger/gdbstub.cpp b/src/core/debugger/gdbstub.cpp index 80091cc7e0..fcb5787147 100644 --- a/src/core/debugger/gdbstub.cpp +++ b/src/core/debugger/gdbstub.cpp @@ -554,32 +554,31 @@ void GDBStub::HandleVCont(std::string_view command, std::vector& } } -constexpr std::array, 22> MemoryStateNames{{ - {"----- Free ------", Kernel::Svc::MemoryState::Free}, - {"Io ", Kernel::Svc::MemoryState::Io}, - {"Static ", Kernel::Svc::MemoryState::Static}, - {"Code ", Kernel::Svc::MemoryState::Code}, - {"CodeData ", Kernel::Svc::MemoryState::CodeData}, - {"Normal ", Kernel::Svc::MemoryState::Normal}, - {"Shared ", Kernel::Svc::MemoryState::Shared}, - {"AliasCode ", Kernel::Svc::MemoryState::AliasCode}, - {"AliasCodeData ", Kernel::Svc::MemoryState::AliasCodeData}, - {"Ipc ", Kernel::Svc::MemoryState::Ipc}, - {"Stack ", Kernel::Svc::MemoryState::Stack}, - {"ThreadLocal ", Kernel::Svc::MemoryState::ThreadLocal}, - {"Transferred ", Kernel::Svc::MemoryState::Transferred}, - {"SharedTransferred", Kernel::Svc::MemoryState::SharedTransferred}, - {"SharedCode ", Kernel::Svc::MemoryState::SharedCode}, - {"Inaccessible ", Kernel::Svc::MemoryState::Inaccessible}, - {"NonSecureIpc ", Kernel::Svc::MemoryState::NonSecureIpc}, - {"NonDeviceIpc ", Kernel::Svc::MemoryState::NonDeviceIpc}, - {"Kernel ", Kernel::Svc::MemoryState::Kernel}, - {"GeneratedCode ", Kernel::Svc::MemoryState::GeneratedCode}, - {"CodeOut ", Kernel::Svc::MemoryState::CodeOut}, - {"Coverage ", Kernel::Svc::MemoryState::Coverage}, -}}; - static constexpr const char* GetMemoryStateName(Kernel::Svc::MemoryState state) { + constexpr std::array, 22> MemoryStateNames{{ + {"----- Free ------", Kernel::Svc::MemoryState::Free}, + {"Io ", Kernel::Svc::MemoryState::Io}, + {"Static ", Kernel::Svc::MemoryState::Static}, + {"Code ", Kernel::Svc::MemoryState::Code}, + {"CodeData ", Kernel::Svc::MemoryState::CodeData}, + {"Normal ", Kernel::Svc::MemoryState::Normal}, + {"Shared ", Kernel::Svc::MemoryState::Shared}, + {"AliasCode ", Kernel::Svc::MemoryState::AliasCode}, + {"AliasCodeData ", Kernel::Svc::MemoryState::AliasCodeData}, + {"Ipc ", Kernel::Svc::MemoryState::Ipc}, + {"Stack ", Kernel::Svc::MemoryState::Stack}, + {"ThreadLocal ", Kernel::Svc::MemoryState::ThreadLocal}, + {"Transferred ", Kernel::Svc::MemoryState::Transferred}, + {"SharedTransferred", Kernel::Svc::MemoryState::SharedTransferred}, + {"SharedCode ", Kernel::Svc::MemoryState::SharedCode}, + {"Inaccessible ", Kernel::Svc::MemoryState::Inaccessible}, + {"NonSecureIpc ", Kernel::Svc::MemoryState::NonSecureIpc}, + {"NonDeviceIpc ", Kernel::Svc::MemoryState::NonDeviceIpc}, + {"Kernel ", Kernel::Svc::MemoryState::Kernel}, + {"GeneratedCode ", Kernel::Svc::MemoryState::GeneratedCode}, + {"CodeOut ", Kernel::Svc::MemoryState::CodeOut}, + {"Coverage ", Kernel::Svc::MemoryState::Coverage}, + }}; for (size_t i = 0; i < MemoryStateNames.size(); i++) { if (std::get<1>(MemoryStateNames[i]) == state) { return std::get<0>(MemoryStateNames[i]); @@ -611,13 +610,7 @@ void GDBStub::HandleRcmd(const std::vector& command) { auto* process = GetProcess(); auto& page_table = process->GetPageTable(); - - const char* commands = "Commands:\n" - " get fastmem\n" - " get info\n" - " get mappings\n"; - - if (command_str == "get fastmem") { + if (command_str == "fastmem" || command_str == "get fastmem") { if (Settings::IsFastmemEnabled()) { const auto& impl = page_table.GetImpl(); const auto region = reinterpret_cast(impl.fastmem_arena); @@ -630,7 +623,7 @@ void GDBStub::HandleRcmd(const std::vector& command) { } else { reply = "Fastmem is not enabled.\n"; } - } else if (command_str == "get info") { + } else if (command_str == "info" || command_str == "get info") { auto modules = Core::FindModules(process); reply = fmt::format("Process: {:#x} ({})\n" @@ -648,8 +641,7 @@ void GDBStub::HandleRcmd(const std::vector& command) { GetInteger(page_table.GetHeapRegionStart()), GetInteger(page_table.GetHeapRegionStart()) + page_table.GetHeapRegionSize() - 1, GetInteger(page_table.GetAliasCodeRegionStart()), - GetInteger(page_table.GetAliasCodeRegionStart()) + page_table.GetAliasCodeRegionSize() - - 1, + GetInteger(page_table.GetAliasCodeRegionStart()) + page_table.GetAliasCodeRegionSize() - 1, GetInteger(page_table.GetStackRegionStart()), GetInteger(page_table.GetStackRegionStart()) + page_table.GetStackRegionSize() - 1); @@ -657,7 +649,7 @@ void GDBStub::HandleRcmd(const std::vector& command) { reply += fmt::format(" {:#012x} - {:#012x} {}\n", vaddr, GetInteger(Core::GetModuleEnd(process, vaddr)), name); } - } else if (command_str == "get mappings") { + } else if (command_str == "mappings" || command_str == "get mappings") { reply = "Mappings:\n"; VAddr cur_addr = 0; @@ -675,15 +667,11 @@ void GDBStub::HandleRcmd(const std::vector& command) { std::numeric_limits::max()) { const char* state = GetMemoryStateName(svc_mem_info.state); const char* perm = GetMemoryPermissionString(svc_mem_info); - const char l = True(svc_mem_info.attribute & MemoryAttribute::Locked) ? 'L' : '-'; - const char i = - True(svc_mem_info.attribute & MemoryAttribute::IpcLocked) ? 'I' : '-'; - const char d = - True(svc_mem_info.attribute & MemoryAttribute::DeviceShared) ? 'D' : '-'; + const char i = True(svc_mem_info.attribute & MemoryAttribute::IpcLocked) ? 'I' : '-'; + const char d = True(svc_mem_info.attribute & MemoryAttribute::DeviceShared) ? 'D' : '-'; const char u = True(svc_mem_info.attribute & MemoryAttribute::Uncached) ? 'U' : '-'; - const char p = - True(svc_mem_info.attribute & MemoryAttribute::PermissionLocked) ? 'P' : '-'; + const char p =True(svc_mem_info.attribute & MemoryAttribute::PermissionLocked) ? 'P' : '-'; reply += fmt::format( " {:#012x} - {:#012x} {} {} {}{}{}{}{} [{}, {}]\n", svc_mem_info.base_address, @@ -698,11 +686,8 @@ void GDBStub::HandleRcmd(const std::vector& command) { cur_addr = next_address; } - } else if (command_str == "help") { - reply = commands; } else { - reply = "Unknown command.\n"; - reply += commands; + reply += "Commands: fastmem, info, mappings\n"; } std::span reply_span{reinterpret_cast(&reply.front()), reply.size()}; diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h index 191c28bb46..3f870becbf 100644 --- a/src/core/hardware_properties.h +++ b/src/core/hardware_properties.h @@ -15,7 +15,7 @@ namespace Hardware { constexpr u64 BASE_CLOCK_RATE = 1'020'000'000; // Default CPU Frequency = 1020 MHz constexpr u64 CNTFREQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz -constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores +constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores - sync with dynarmic exclusive_monitor.h // Virtual to Physical core map. constexpr std::array()> VirtualToPhysicalCoreMap{ diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 80566b7e77..cf03353f84 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -1266,10 +1266,6 @@ void KProcess::InitializeInterfaces() { #ifdef HAS_NCE if (this->IsApplication() && Settings::IsNceEnabled()) { - // Register the scoped JIT handler before creating any NCE instances - // so that its signal handler will appear first in the signal chain. - Core::ScopedJitExecution::RegisterHandler(); - for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { m_arm_interfaces[i] = std::make_unique(m_kernel.System(), true, i); } diff --git a/src/core/hle/service/am/service/library_applet_accessor.cpp b/src/core/hle/service/am/service/library_applet_accessor.cpp index 5ce96a1e3f..f7314b8f28 100644 --- a/src/core/hle/service/am/service/library_applet_accessor.cpp +++ b/src/core/hle/service/am/service/library_applet_accessor.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -101,12 +104,12 @@ Result ILibraryAppletAccessor::PushInData(SharedPointer storage) { Result ILibraryAppletAccessor::PopOutData(Out> out_storage) { LOG_DEBUG(Service_AM, "called"); - if (auto caller_applet = m_applet->caller_applet.lock(); caller_applet) { - Event m_system_event = caller_applet->lifecycle_manager.GetSystemEvent(); - m_system_event.Signal(); - caller_applet->lifecycle_manager.RequestResumeNotification(); - m_system_event.Clear(); - } + if (auto caller_applet = m_applet->caller_applet.lock(); caller_applet) { + caller_applet->lifecycle_manager.GetSystemEvent().Signal(); + caller_applet->lifecycle_manager.RequestResumeNotification(); + caller_applet->lifecycle_manager.GetSystemEvent().Clear(); + caller_applet->lifecycle_manager.UpdateRequestedFocusState(); + } R_RETURN(m_broker->GetOutData().Pop(out_storage.Get())); } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 0035c626e2..08391cd815 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -61,8 +61,7 @@ struct Memory::Impl { } #ifdef __linux__ - heap_tracker.emplace(system.DeviceMemory().buffer); - buffer = std::addressof(*heap_tracker); + buffer.emplace(system.DeviceMemory().buffer); #else buffer = std::addressof(system.DeviceMemory().buffer); #endif @@ -1024,9 +1023,8 @@ struct Memory::Impl { std::span gpu_dirty_managers; std::mutex sys_core_guard; - std::optional heap_tracker; #ifdef __linux__ - Common::HeapTracker* buffer{}; + std::optional buffer; #else Common::HostMemory* buffer{}; #endif @@ -1230,22 +1228,7 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { if (rasterizer) { impl->InvalidateGPUMemory(ptr, size); } - -#ifdef __linux__ - if (!rasterizer && mapped) { - impl->buffer->DeferredMapSeparateHeap(GetInteger(vaddr)); - } -#endif - return mapped && ptr != nullptr; } -bool Memory::InvalidateSeparateHeap(void* fault_address) { -#ifdef __linux__ - return impl->buffer->DeferredMapSeparateHeap(static_cast(fault_address)); -#else - return false; -#endif -} - } // namespace Core::Memory diff --git a/src/core/memory.h b/src/core/memory.h index dcca26892b..99108ecf0d 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -487,13 +487,8 @@ public: * marked as debug or non-debug. */ void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); - void SetGPUDirtyManagers(std::span managers); - bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); - - bool InvalidateSeparateHeap(void* fault_address); - private: Core::System& system; diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index e5298cad92..6ffe41bcc6 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -79,7 +79,7 @@ contain a prediction with the same `UniqueHash`. ? u64(unique_hash_to_code_ptr[imm64]) : u64(code->GetReturnFromRunCodeAddress()); - code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]); + code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); code->add(index_reg, 1); code->and_(index_reg, u32(JitState::RSBSize - 1)); @@ -91,13 +91,13 @@ contain a prediction with the same `UniqueHash`. Xbyak::Label label; for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); code->je(label, code->T_SHORT); } - code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); + code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); code->L(label); } @@ -122,14 +122,14 @@ To check if a predicition is in the RSB, we linearly scan the RSB. // This calculation has to match up with IREmitter::PushRSB code->mov(ecx, MJitStateReg(Arm::Reg::PC)); code->shl(rcx, 32); - code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]); - code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]); + code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); code->or_(rbx, rcx); code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); + code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); } code->jmp(rax); diff --git a/src/dynarmic/src/dynarmic/backend/arm64/exclusive_monitor.cpp b/src/dynarmic/src/dynarmic/backend/arm64/exclusive_monitor.cpp index 326ab4ad00..b47167bf6f 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/exclusive_monitor.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/exclusive_monitor.cpp @@ -14,7 +14,7 @@ namespace Dynarmic { -ExclusiveMonitor::ExclusiveMonitor(size_t processor_count) +ExclusiveMonitor::ExclusiveMonitor(std::size_t processor_count) : exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {} size_t ExclusiveMonitor::GetProcessorCount() const { diff --git a/src/dynarmic/src/dynarmic/backend/arm64/verbose_debugging_output.h b/src/dynarmic/src/dynarmic/backend/arm64/verbose_debugging_output.h index 84beda4057..b5187f6375 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/verbose_debugging_output.h +++ b/src/dynarmic/src/dynarmic/backend/arm64/verbose_debugging_output.h @@ -20,7 +20,7 @@ struct Label; } // namespace oaknut namespace Dynarmic::IR { -enum class Type; +enum class Type : u16; } // namespace Dynarmic::IR namespace Dynarmic::Backend::Arm64 { diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index 43e0750d68..fb306336cf 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -44,21 +44,21 @@ namespace Dynarmic::Backend::X64 { using namespace Xbyak::util; static Xbyak::Address MJitStateReg(A32::Reg reg) { - return dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast(reg)]; + return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast(reg)]; } static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) { if (A32::IsSingleExtReg(reg)) { const size_t index = static_cast(reg) - static_cast(A32::ExtReg::S0); - return dword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u32) * index]; + return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u32) * index]; } if (A32::IsDoubleExtReg(reg)) { const size_t index = static_cast(reg) - static_cast(A32::ExtReg::D0); - return qword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u64) * index]; + return qword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u64) * index]; } if (A32::IsQuadExtReg(reg)) { const size_t index = static_cast(reg) - static_cast(A32::ExtReg::Q0); - return xword[r15 + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index]; + return xword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index]; } ASSERT_FALSE("Should never happen."); } @@ -109,12 +109,12 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { const boost::container::static_vector gpr_order = [this] { boost::container::static_vector gprs{any_gpr}; - if (conf.page_table) { - gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); - } if (conf.fastmem_pointer) { gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13)); } + if (conf.page_table) { + gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); + } return gprs; }(); @@ -220,7 +220,7 @@ void A32EmitX64::GenTerminalHandlers() { // PC ends up in ebp, location_descriptor ends up in rbx const auto calculate_location_descriptor = [this] { // This calculation has to match up with IREmitter::PushRSB - code.mov(ebx, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]); + code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]); code.shl(rbx, 32); code.mov(ecx, MJitStateReg(A32::Reg::PC)); code.mov(ebp, ecx); @@ -232,17 +232,17 @@ void A32EmitX64::GenTerminalHandlers() { code.align(); terminal_handler_pop_rsb_hint = code.getCurr(); calculate_location_descriptor(); - code.mov(eax, dword[r15 + offsetof(A32JitState, rsb_ptr)]); - code.dec(eax); + code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)]); + code.sub(eax, 1); code.and_(eax, u32(A32JitState::RSBPtrMask)); - code.mov(dword[r15 + offsetof(A32JitState, rsb_ptr)], eax); - code.cmp(rbx, qword[r15 + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax); + code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]); if (conf.HasOptimization(OptimizationFlag::FastDispatch)) { code.jne(rsb_cache_miss); } else { code.jne(code.GetReturnFromRunCodeAddress()); } - code.mov(rax, qword[r15 + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]); + code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]); code.jmp(rax); PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a32_terminal_handler_pop_rsb_hint"); @@ -392,17 +392,17 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // so we load them both at the same time with one 64-bit read. This allows us to // extract all of their bits together at once with one pext. static_assert(offsetof(A32JitState, upper_location_descriptor) + 4 == offsetof(A32JitState, cpsr_ge)); - code.mov(result.cvt64(), qword[r15 + offsetof(A32JitState, upper_location_descriptor)]); + code.mov(result.cvt64(), qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]); code.mov(tmp.cvt64(), 0x80808080'00000003ull); code.pext(result.cvt64(), result.cvt64(), tmp.cvt64()); code.mov(tmp, 0x000f0220); code.pdep(result, result, tmp); } else { - code.mov(result, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]); code.imul(result, result, 0x120); code.and_(result, 0x00000220); - code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_ge)]); + code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]); code.and_(tmp, 0x80808080); code.imul(tmp, tmp, 0x00204081); code.shr(tmp, 12); @@ -410,11 +410,11 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.or_(result, tmp); } - code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_q)]); + code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); code.shl(tmp, 27); code.or_(result, tmp); - code.mov(tmp2, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); + code.mov(tmp2, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]); if (code.HasHostFeature(HostFeature::FastBMI2)) { code.mov(tmp, NZCV::x64_mask); code.pext(tmp2, tmp2, tmp); @@ -426,7 +426,7 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { } code.or_(result, tmp2); - code.or_(result, dword[r15 + offsetof(A32JitState, cpsr_jaifm)]); + code.or_(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -444,7 +444,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // cpsr_q code.bt(cpsr, 27); - code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]); + code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); // cpsr_nzcv code.mov(tmp, cpsr); @@ -456,12 +456,12 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.imul(tmp, tmp, NZCV::to_x64_multiplier); code.and_(tmp, NZCV::x64_mask); } - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], tmp); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], tmp); // cpsr_jaifm code.mov(tmp, cpsr); code.and_(tmp, 0x010001DF); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_jaifm)], tmp); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)], tmp); if (code.HasHostFeature(HostFeature::FastBMI2)) { // cpsr_et and cpsr_ge @@ -469,7 +469,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // This mask is 0x7FFF0000, because we do not want the MSB to be sign extended to the upper dword. static_assert((A32::LocationDescriptor::FPSCR_MODE_MASK & ~0x7FFF0000) == 0); - code.and_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000)); + code.and_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000)); code.mov(tmp, 0x000f0220); code.pext(cpsr, cpsr, tmp); code.mov(tmp.cvt64(), 0x01010101'00000003ull); @@ -479,14 +479,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.mov(tmp2.cvt64(), tmp.cvt64()); code.sub(tmp.cvt64(), cpsr.cvt64()); code.xor_(tmp.cvt64(), tmp2.cvt64()); - code.or_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64()); + code.or_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64()); } else { - code.and_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000)); + code.and_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000)); code.mov(tmp, cpsr); code.and_(tmp, 0x00000220); code.imul(tmp, tmp, 0x00900000); code.shr(tmp, 28); - code.or_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp); + code.or_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp); code.and_(cpsr, 0x000f0000); code.shr(cpsr, 16); @@ -495,14 +495,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.mov(tmp, 0x80808080); code.sub(tmp, cpsr); code.xor_(tmp, 0x80808080); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], tmp); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], tmp); } } void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], to_store); } void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) { @@ -510,7 +510,7 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsImmediate()) { const u32 imm = args[0].GetImmediateU32(); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -518,14 +518,14 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) { code.shr(a, 28); code.mov(b, NZCV::x64_mask); code.pdep(a, a, b); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); } else { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code.shr(a, 28); code.imul(a, a, NZCV::to_x64_multiplier); code.and_(a, NZCV::x64_mask); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); } } @@ -534,25 +534,25 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsImmediate()) { const u32 imm = args[0].GetImmediateU32(); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); code.shr(a, 28); - code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]); + code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); code.mov(b, NZCV::x64_mask); code.pdep(a, a, b); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); } else { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code.shr(a, 28); - code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]); + code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); code.imul(a, a, NZCV::to_x64_multiplier); code.and_(a, NZCV::x64_mask); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); } } @@ -562,10 +562,10 @@ void A32EmitX64::EmitA32SetCpsrNZ(A32EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(args[0]).cvt32(); const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - code.movzx(tmp, code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1]); + code.movzx(tmp, code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1]); code.and_(tmp, 1); code.or_(tmp, nz); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8()); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8()); } void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { @@ -575,11 +575,11 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { if (args[1].IsImmediate()) { const bool c = args[1].GetImmediateU1(); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c); } else { const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(args[1]).cvt8(); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c); } } else { const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -588,19 +588,19 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { const bool c = args[1].GetImmediateU1(); code.or_(nz, c); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); } else { const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(args[1]).cvt32(); code.or_(nz, c); - code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); } } } static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]); if (flag_bit != 0) { code.shr(result, static_cast(flag_bit)); } @@ -616,18 +616,18 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code.mov(dword[r15 + offsetof(A32JitState, cpsr_q)], 1); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], 1); } } else { const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); - code.or_(code.byte[r15 + offsetof(A32JitState, cpsr_q)], to_store); + code.or_(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], to_store); } } void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code.movd(result, dword[r15 + offsetof(A32JitState, cpsr_ge)]); + code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -637,10 +637,10 @@ void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsInXmm()) { const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]); - code.movd(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store); + code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store); } else { const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store); } } @@ -654,7 +654,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst ge |= mcl::bit::get_bit<17>(imm) ? 0x0000FF00 : 0; ge |= mcl::bit::get_bit<16>(imm) ? 0x000000FF : 0; - code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], ge); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], ge); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -663,7 +663,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst code.shr(a, 16); code.pdep(a, a, b); code.imul(a, a, 0xFF); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a); } else { const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); @@ -672,7 +672,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst code.imul(a, a, 0x00204081); code.and_(a, 0x01010101); code.imul(a, a, 0xFF); - code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a); } } @@ -716,7 +716,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { const u32 new_upper = upper_without_t | (mcl::bit::get_bit<0>(new_pc) ? 1 : 0); code.mov(MJitStateReg(A32::Reg::PC), new_pc & mask); - code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper); } else { const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32(); const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -728,7 +728,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC code.and_(new_pc, mask); code.mov(MJitStateReg(A32::Reg::PC), new_pc); - code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper); } } @@ -798,9 +798,9 @@ static u32 GetFpscrImpl(A32JitState* jit_state) { void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst); - code.mov(code.ABI_PARAM1, code.r15); + code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); - code.stmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]); + code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]); code.CallFunction(&GetFpscrImpl); } @@ -811,15 +811,15 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) { void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, args[0]); - code.mov(code.ABI_PARAM2, code.r15); + code.mov(code.ABI_PARAM2, code.ABI_JIT_PTR); code.CallFunction(&SetFpscrImpl); - code.ldmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]); + code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]); } void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A32JitState, fpsr_nzcv)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -833,7 +833,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { code.mov(tmp, NZCV::x64_mask); code.pext(tmp, value, tmp); code.shl(tmp, 28); - code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], tmp); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], tmp); return; } @@ -843,7 +843,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { code.and_(value, NZCV::x64_mask); code.imul(value, value, NZCV::from_x64_multiplier); code.and_(value, NZCV::arm_mask); - code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], value); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], value); } static void EmitCoprocessorException() { @@ -1155,7 +1155,7 @@ void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat }(); if (old_upper != new_upper) { - code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper); } } @@ -1165,32 +1165,28 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC()); code.ReturnFromRunCode(); - return; - } - - if (conf.enable_cycle_counting) { - code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); - - patch_information[terminal.next].jg.push_back(code.getCurr()); - if (const auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJg(terminal.next, next_bb->entrypoint); - } else { - EmitPatchJg(terminal.next); - } } else { - code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0); - - patch_information[terminal.next].jz.push_back(code.getCurr()); - if (const auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJz(terminal.next, next_bb->entrypoint); + if (conf.enable_cycle_counting) { + code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); + patch_information[terminal.next].jg.push_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJg(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJg(terminal.next); + } } else { - EmitPatchJz(terminal.next); + code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0); + patch_information[terminal.next].jz.push_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJz(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJz(terminal.next); + } } + code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC()); + PushRSBHelper(rax, rbx, terminal.next); + code.ForceReturnFromRunCode(); } - - code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC()); - PushRSBHelper(rax, rbx, terminal.next); - code.ForceReturnFromRunCode(); } void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) { @@ -1199,14 +1195,13 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::Location if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC()); code.ReturnFromRunCode(); - return; - } - - patch_information[terminal.next].jmp.push_back(code.getCurr()); - if (const auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJmp(terminal.next, next_bb->entrypoint); } else { - EmitPatchJmp(terminal.next); + patch_information[terminal.next].jmp.push_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJmp(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJmp(terminal.next); + } } } @@ -1245,7 +1240,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr } void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0); + code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0); code.jne(code.GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location, is_single_step); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp index f2919485be..a1fca21f47 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp @@ -168,7 +168,7 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { } void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) { - code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, exclusive_state)], u8(0)); } void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { @@ -244,14 +244,14 @@ void A32EmitX64::EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak const A32::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}}; - code.test(dword[r15 + offsetof(A32JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); + code.test(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); if (end) { code.jz(*end, code.T_NEAR); } else { code.jz(skip, code.T_NEAR); } EmitSetUpperLocationDescriptor(current_location, ctx.Location()); - code.mov(dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC()); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC()); code.ForceReturnFromRunCode(); code.L(skip); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 47a2236a87..1e673338a8 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -80,12 +80,12 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { const boost::container::static_vector gpr_order = [this] { boost::container::static_vector gprs{any_gpr}; - if (conf.page_table) { - gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); - } if (conf.fastmem_pointer) { gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13)); } + if (conf.page_table) { + gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); + } return gprs; }(); @@ -192,10 +192,10 @@ void A64EmitX64::GenTerminalHandlers() { const auto calculate_location_descriptor = [this] { // This calculation has to match up with A64::LocationDescriptor::UniqueHash // TODO: Optimization is available here based on known state of fpcr. - code.mov(rbp, qword[r15 + offsetof(A64JitState, pc)]); + code.mov(rbp, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]); code.mov(rcx, A64::LocationDescriptor::pc_mask); code.and_(rcx, rbp); - code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]); + code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]); code.and_(ebx, A64::LocationDescriptor::fpcr_mask); code.shl(rbx, A64::LocationDescriptor::fpcr_shift); code.or_(rbx, rcx); @@ -207,17 +207,17 @@ void A64EmitX64::GenTerminalHandlers() { code.align(); terminal_handler_pop_rsb_hint = code.getCurr(); calculate_location_descriptor(); - code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]); - code.dec(eax); + code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)]); + code.sub(eax, 1); code.and_(eax, u32(A64JitState::RSBPtrMask)); - code.mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax); - code.cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)], eax); + code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]); if (conf.HasOptimization(OptimizationFlag::FastDispatch)) { code.jne(rsb_cache_miss, code.T_NEAR); } else { code.jne(code.GetReturnFromRunCodeAddress()); } - code.mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]); + code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]); code.jmp(rax); PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a64_terminal_handler_pop_rsb_hint"); @@ -272,7 +272,7 @@ void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]); code.shr(result, NZCV::x64_c_flag_bit); code.and_(result, 1); ctx.reg_alloc.DefineValue(inst, result); @@ -281,7 +281,7 @@ void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(nzcv_raw, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]); + code.mov(nzcv_raw, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]); if (code.HasHostFeature(HostFeature::FastBMI2)) { const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -310,20 +310,20 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { code.imul(nzcv_raw, nzcv_raw, NZCV::to_x64_multiplier); code.and_(nzcv_raw, NZCV::x64_mask); } - code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw); } void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], to_store); + code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], to_store); } void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) { const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -331,13 +331,13 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) { const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - code.mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); + code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); ctx.reg_alloc.DefineValue(inst, result); } void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code.movd(result, addr); @@ -346,7 +346,7 @@ void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code.movq(result, addr); @@ -355,7 +355,7 @@ void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code.movaps(result, addr); @@ -364,13 +364,13 @@ void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - code.mov(result, qword[r15 + offsetof(A64JitState, sp)]); + code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]); ctx.reg_alloc.DefineValue(inst, result); } void A64EmitX64::EmitA64GetFPCR(A64EmitContext& ctx, IR::Inst* inst) { const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code.mov(result, dword[r15 + offsetof(A64JitState, fpcr)]); + code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -380,15 +380,15 @@ static u32 GetFPSRImpl(A64JitState* jit_state) { void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst); - code.mov(code.ABI_PARAM1, code.r15); - code.stmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); + code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); + code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); code.CallFunction(GetFPSRImpl); } void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); - const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; if (args[1].FitsInImmediateS32()) { code.mov(addr, args[1].GetImmediateS32()); } else { @@ -402,7 +402,7 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); - const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; if (args[1].FitsInImmediateS32()) { code.mov(addr, args[1].GetImmediateS32()); } else if (args[1].IsInXmm()) { @@ -417,7 +417,7 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); @@ -430,7 +430,7 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]); code.movq(to_store, to_store); // TODO: Remove when able @@ -440,7 +440,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); - const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; + const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); code.movaps(addr, to_store); @@ -448,7 +448,7 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto addr = qword[r15 + offsetof(A64JitState, sp)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]; if (args[0].FitsInImmediateS32()) { code.mov(addr, args[0].GetImmediateS32()); } else if (args[0].IsInXmm()) { @@ -467,9 +467,9 @@ static void SetFPCRImpl(A64JitState* jit_state, u32 value) { void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); - code.mov(code.ABI_PARAM1, code.r15); + code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); code.CallFunction(SetFPCRImpl); - code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); + code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); } static void SetFPSRImpl(A64JitState* jit_state, u32 value) { @@ -479,14 +479,14 @@ static void SetFPSRImpl(A64JitState* jit_state, u32 value) { void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); - code.mov(code.ABI_PARAM1, code.r15); + code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); code.CallFunction(SetFPSRImpl); - code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); + code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); } void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto addr = qword[r15 + offsetof(A64JitState, pc)]; + const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]; if (args[0].FitsInImmediateS32()) { code.mov(addr, args[0].GetImmediateS32()); } else if (args[0].IsInXmm()) { @@ -507,7 +507,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) { code.mov(param[0], imm); }); // The kernel would have to execute ERET to get here, which would clear exclusive state. - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0)); } void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) { @@ -621,7 +621,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc code.SwitchMxcsrOnExit(); Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, [&](RegList param) { code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], param[0]); code.mov(param[1].cvt32(), terminal.num_instructions); }); code.ReturnFromRunCode(true); // TODO: Check cycles @@ -632,61 +632,56 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescri } void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) { - if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { + // Used for patches and linking + if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { + if (conf.enable_cycle_counting) { + code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); + patch_information[terminal.next].jg.push_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJg(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJg(terminal.next); + } + } else { + code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0); + patch_information[terminal.next].jz.push_back(code.getCurr()); + if (const auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJz(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJz(terminal.next); + } + } code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); - code.ReturnFromRunCode(); - return; - } - - if (conf.enable_cycle_counting) { - code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); - - patch_information[terminal.next].jg.push_back(code.getCurr()); - if (const auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJg(terminal.next, next_bb->entrypoint); - } else { - EmitPatchJg(terminal.next); - } + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); + code.ForceReturnFromRunCode(); } else { - code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0); - - patch_information[terminal.next].jz.push_back(code.getCurr()); - if (const auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJz(terminal.next, next_bb->entrypoint); - } else { - EmitPatchJz(terminal.next); - } + code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); + code.ReturnFromRunCode(); } - - code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); - code.ForceReturnFromRunCode(); } void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) { - if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { - code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); - code.ReturnFromRunCode(); - return; - } - - patch_information[terminal.next].jmp.push_back(code.getCurr()); - if (auto next_bb = GetBasicBlock(terminal.next)) { - EmitPatchJmp(terminal.next, next_bb->entrypoint); + if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { + patch_information[terminal.next].jmp.push_back(code.getCurr()); + if (auto next_bb = GetBasicBlock(terminal.next)) { + EmitPatchJmp(terminal.next, next_bb->entrypoint); + } else { + EmitPatchJmp(terminal.next); + } } else { - EmitPatchJmp(terminal.next); + code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); + code.ReturnFromRunCode(); } } void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) { - if (!conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) || is_single_step) { + if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) { + code.jmp(terminal_handler_pop_rsb_hint); + } else { code.ReturnFromRunCode(); - return; } - - code.jmp(terminal_handler_pop_rsb_hint); } void A64EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) { @@ -723,7 +718,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr } void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0); + code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0); code.jne(code.GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location, is_single_step); } @@ -734,7 +729,7 @@ void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr code.jg(target_code_ptr); } else { code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.jg(code.GetReturnFromRunCodeAddress()); } code.EnsurePatchLocationSize(patch_location, 23); @@ -746,7 +741,7 @@ void A64EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr code.jz(target_code_ptr); } else { code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.jz(code.GetReturnFromRunCodeAddress()); } code.EnsurePatchLocationSize(patch_location, 23); @@ -758,7 +753,7 @@ void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr code.jmp(target_code_ptr); } else { code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.jmp(code.GetReturnFromRunCodeAddress()); } code.EnsurePatchLocationSize(patch_location, 22); diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h index f26723092f..a1917a3594 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h @@ -127,10 +127,10 @@ protected: BlockRangeInformation block_ranges; std::array fast_dispatch_table; ankerl::unordered_dense::map fastmem_patch_info; - std::map, void (*)()> read_fallbacks; - std::map, void (*)()> write_fallbacks; - std::map, void (*)()> exclusive_write_fallbacks; - std::set do_not_fastmem; + ankerl::unordered_dense::map, void (*)()> read_fallbacks; + ankerl::unordered_dense::map, void (*)()> write_fallbacks; + ankerl::unordered_dense::map, void (*)()> exclusive_write_fallbacks; + ankerl::unordered_dense::set do_not_fastmem; const void* terminal_handler_pop_rsb_hint = nullptr; const void* terminal_handler_fast_dispatch_hint = nullptr; FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr; diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp index fe7dfa011f..8fd6777542 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp @@ -324,7 +324,7 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { } void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) { - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0)); } void A64EmitX64::EmitA64ExclusiveReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { @@ -416,14 +416,14 @@ void A64EmitX64::EmitCheckMemoryAbort(A64EmitContext&, IR::Inst* inst, Xbyak::La const A64::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}}; - code.test(dword[r15 + offsetof(A64JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); + code.test(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], static_cast(HaltReason::MemoryAbort)); if (end) { code.jz(*end, code.T_NEAR); } else { code.jz(skip, code.T_NEAR); } code.mov(rax, current_location.PC()); - code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.ForceReturnFromRunCode(); code.L(skip); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp index e8eaddcbac..a9bbab3d10 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp @@ -49,16 +49,11 @@ void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM); const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size); - for (auto const gpr : regs) { - if (HostLocIsGPR(gpr)) { + for (auto const gpr : regs) + if (HostLocIsGPR(gpr)) code.push(HostLocToReg64(gpr)); - } - } - - if (frame_info.stack_subtraction != 0) { + if (frame_info.stack_subtraction != 0) code.sub(rsp, u32(frame_info.stack_subtraction)); - } - size_t xmm_offset = frame_info.xmm_offset; for (auto const xmm : regs) { if (HostLocIsXMM(xmm)) { @@ -80,27 +75,22 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM); const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size); - size_t xmm_offset = frame_info.xmm_offset; - for (auto const xmm : regs) { + size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE); + for (auto const xmm : mcl::iterator::reverse(regs)) { if (HostLocIsXMM(xmm)) { + xmm_offset -= XMM_SIZE; if (code.HasHostFeature(HostFeature::AVX)) { code.vmovaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]); } else { code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]); } - xmm_offset += XMM_SIZE; } } - - if (frame_info.stack_subtraction != 0) { + if (frame_info.stack_subtraction != 0) code.add(rsp, u32(frame_info.stack_subtraction)); - } - - for (auto const gpr : mcl::iterator::reverse(regs)) { - if (HostLocIsGPR(gpr)) { + for (auto const gpr : mcl::iterator::reverse(regs)) + if (HostLocIsGPR(gpr)) code.pop(HostLocToReg64(gpr)); - } - } } void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) { @@ -119,6 +109,20 @@ void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE); } +// Windows ABI registers are not in the same allocation algorithm as unix's +#ifdef _MSC_VER +void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) { + std::vector regs; + std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); + ABI_PushRegistersAndAdjustStack(code, 0, regs); +} + +void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) { + std::vector regs; + std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); + ABI_PopRegistersAndAdjustStack(code, 0, regs); +} +#else static consteval size_t ABI_AllCallerSaveSize() noexcept { return ABI_ALL_CALLER_SAVE.max_size(); } @@ -166,24 +170,14 @@ alignas(64) static constinit std::array AB }; void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) { -#ifdef _MSC_VER - std::vector regs; - std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); - ABI_PushRegistersAndAdjustStack(code, 0, regs); -#else ASSUME(size_t(exception) < 32); ABI_PushRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]); -#endif } void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) { -#ifdef _MSC_VER - std::vector regs; - std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); - ABI_PopRegistersAndAdjustStack(code, 0, regs); -#else ASSUME(size_t(exception) < 32); ABI_PopRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]); -#endif } +#endif + } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.h b/src/dynarmic/src/dynarmic/backend/x64/abi.h index 32f2bdac67..307817a864 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.h +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.h @@ -17,6 +17,7 @@ namespace Dynarmic::Backend::X64 { class BlockOfCode; +constexpr HostLoc ABI_JIT_PTR = HostLoc::R15; #ifdef _WIN32 constexpr HostLoc ABI_RETURN = HostLoc::RAX; diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index 41603abf86..5a33ac7727 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -36,6 +36,7 @@ namespace Dynarmic::Backend::X64 { +const Xbyak::Reg64 BlockOfCode::ABI_JIT_PTR = HostLocToReg64(Dynarmic::Backend::X64::ABI_JIT_PTR); #ifdef _WIN32 const Xbyak::Reg64 BlockOfCode::ABI_RETURN = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN); const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM1); @@ -322,8 +323,8 @@ void BlockOfCode::GenRunCode(std::function rcp) { // that the stack is appropriately aligned for CALLs. ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); - mov(r15, ABI_PARAM1); - mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register + mov(ABI_JIT_PTR, ABI_PARAM1); + mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register if (cb.enable_cycle_counting) { cb.GetTicksRemaining->EmitCall(*this); @@ -331,9 +332,11 @@ void BlockOfCode::GenRunCode(std::function rcp) { mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN); } + // r14 = page table + // r13 = fastmem pointer rcp(*this); - cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); SwitchMxcsrOnEntry(); @@ -344,7 +347,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); - mov(r15, ABI_PARAM1); + mov(ABI_JIT_PTR, ABI_PARAM1); if (cb.enable_cycle_counting) { mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1); @@ -353,10 +356,10 @@ void BlockOfCode::GenRunCode(std::function rcp) { rcp(*this); - cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); lock(); - or_(dword[r15 + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); + or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); SwitchMxcsrOnEntry(); jmp(ABI_PARAM2); @@ -366,7 +369,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { align(); return_from_run_code[0] = getCurr(); - cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller); if (cb.enable_cycle_counting) { cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); @@ -378,7 +381,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { align(); return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr(); - cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited); if (cb.enable_cycle_counting) { cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); @@ -407,7 +410,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { xor_(eax, eax); lock(); - xchg(dword[r15 + jsi.offsetof_halt_reason], eax); + xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); ret(); @@ -417,22 +420,22 @@ void BlockOfCode::GenRunCode(std::function rcp) { void BlockOfCode::SwitchMxcsrOnEntry() { stmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]); - ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); + ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); } void BlockOfCode::SwitchMxcsrOnExit() { - stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); + stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); ldmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]); } void BlockOfCode::EnterStandardASIMD() { - stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); - ldmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]); + stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); + ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]); } void BlockOfCode::LeaveStandardASIMD() { - stmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]); - ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); + stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]); + ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]); } void BlockOfCode::UpdateTicks() { diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h index 4cc8663e11..095e75336b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.h @@ -155,6 +155,7 @@ public: void SetCodePtr(CodePtr code_ptr); void EnsurePatchLocationSize(CodePtr begin, size_t size); + static const Xbyak::Reg64 ABI_JIT_PTR; // ABI registers #ifdef _WIN32 static const Xbyak::Reg64 ABI_RETURN; diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index d428199585..a13baa6a97 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -91,19 +91,18 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I ? iter->second.entrypoint : code.GetReturnFromRunCodeAddress(); - code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]); - + code.mov(index_reg.cvt32(), dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr]); code.mov(loc_desc_reg, target.Value()); - patch_information[target].mov_rcx.push_back(code.getCurr()); EmitPatchMovRcx(target_code_ptr); - - code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg); - code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx); - - code.add(index_reg.cvt32(), 1); - code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask)); - code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32()); + code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg); + code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx); + // Byte size hack + DEBUG_ASSERT(code.GetJitStateInfo().rsb_ptr_mask <= 0xFF); + code.add(index_reg.cvt32(), 1); //flags trashed, 1 single byte, haswell doesn't care + code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask)); //trashes flags + // Results ready and sort by least needed: give OOO some break + code.mov(dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32()); } void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) { @@ -119,7 +118,7 @@ void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) { code.movaps(xword[rsp + offsetof(RegisterData, xmms) + 2 * sizeof(u64) * i], Xbyak::Xmm{i}); } code.lea(rax, ptr[rsp + sizeof(RegisterData) + offsetof(StackLayout, spill)]); - code.mov(xword[rsp + offsetof(RegisterData, spill)], rax); + code.mov(qword[rsp + offsetof(RegisterData, spill)], rax); reg_alloc.EmitVerboseDebuggingOutput(); @@ -285,7 +284,7 @@ void EmitX64::EmitAddCycles(size_t cycles) { Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { Xbyak::Label pass; - code.mov(eax, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]); + code.mov(eax, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]); code.LoadRequiredFlagsForCondFromRax(cond); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp index 842a8612ee..9d7c57cb57 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp @@ -18,24 +18,20 @@ namespace CRC32 = Common::Crypto::CRC32; static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (code.HasHostFeature(HostFeature::SSE42)) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size); - if (data_size != 64) { code.crc32(crc, value); } else { code.crc32(crc.cvt64(), value); } - ctx.reg_alloc.DefineValue(inst, crc); - return; + } else { + ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); + code.mov(code.ABI_PARAM3.cvt32(), data_size / CHAR_BIT); //zext + code.CallFunction(&CRC32::ComputeCRC32Castagnoli); } - - ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); - code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); - code.CallFunction(&CRC32::ComputeCRC32Castagnoli); } static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) { @@ -69,10 +65,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - return; - } - - if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) { + } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32(); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); @@ -90,10 +83,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - return; - } - - if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) { + } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); @@ -111,12 +101,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - return; + } else { + ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); + code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); + code.CallFunction(&CRC32::ComputeCRC32ISO); } - - ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); - code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); - code.CallFunction(&CRC32::ComputeCRC32ISO); } void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp index 4128ef1721..7e03e3dcd1 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp @@ -143,7 +143,7 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst* const Xbyak::Reg then_ = ctx.reg_alloc.UseGpr(args[1]).changeBit(bitsize); const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize); - code.mov(nzcv, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]); + code.mov(nzcv, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]); code.LoadRequiredFlagsForCondFromRax(args[0].GetImmediateCond()); @@ -909,11 +909,11 @@ static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* ca } } +// AL contains flags (after LAHF + SETO sequence) static Xbyak::Reg64 DoNZCV(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* nzcv_out) { if (!nzcv_out) { return Xbyak::Reg64{-1}; } - const Xbyak::Reg64 nzcv = reg_alloc.ScratchGpr(HostLoc::RAX); code.xor_(nzcv.cvt32(), nzcv.cvt32()); return nzcv; @@ -1168,7 +1168,7 @@ void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) { code.xor_(eax, eax); code.test(divisor, divisor); - code.jz(end); + code.jz(end, code.T_NEAR); code.mov(eax, dividend); code.xor_(edx, edx); code.div(divisor); @@ -1189,7 +1189,7 @@ void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) { code.xor_(eax, eax); code.test(divisor, divisor); - code.jz(end); + code.jz(end, code.T_NEAR); code.mov(rax, dividend); code.xor_(edx, edx); code.div(divisor); @@ -1568,14 +1568,14 @@ void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { } else { const Xbyak::Reg32 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 temp = ctx.reg_alloc.ScratchGpr().cvt32(); // The result of a bsr of zero is undefined, but zf is set after it. code.bsr(result, source); - code.mov(source, 0xFFFFFFFF); - code.cmovz(result, source); - code.neg(result); - code.add(result, 31); - + code.mov(temp, 32); + code.xor_(result, 31); + code.test(source, source); + code.cmove(result, temp); ctx.reg_alloc.DefineValue(inst, result); } } @@ -1592,14 +1592,14 @@ void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { } else { const Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64(); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); + const Xbyak::Reg64 temp = ctx.reg_alloc.ScratchGpr().cvt64(); // The result of a bsr of zero is undefined, but zf is set after it. code.bsr(result, source); - code.mov(source.cvt32(), 0xFFFFFFFF); - code.cmovz(result.cvt32(), source.cvt32()); - code.neg(result.cvt32()); - code.add(result.cvt32(), 63); - + code.mov(temp.cvt32(), 64); + code.xor_(result.cvt32(), 63); + code.test(source, source); + code.cmove(result.cvt32(), temp.cvt32()); ctx.reg_alloc.DefineValue(inst, result); } } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 63b9659618..47e51acb03 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -712,12 +712,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); #ifdef _WIN32 code.lea(rsp, ptr[rsp - (16 + ABI_SHADOW_SPACE)]); - code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); code.CallFunction(fallback_fn); code.add(rsp, 16 + ABI_SHADOW_SPACE); #else - code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(fallback_fn); #endif code.movq(result, code.ABI_RETURN); @@ -821,12 +821,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); #ifdef _WIN32 ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); - code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); code.CallFunction(fallback_fn); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); #else - code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(fallback_fn); #endif } @@ -945,7 +945,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipEstimate); } @@ -968,7 +968,7 @@ static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipExponent); } @@ -1026,7 +1026,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.movq(code.ABI_PARAM1, operand1); code.movq(code.ABI_PARAM2, operand2); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipStepFused); code.movq(result, code.ABI_RETURN); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); @@ -1055,7 +1055,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* ctx.reg_alloc.HostCall(inst, args[0], args[1]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipStepFused); } @@ -1119,7 +1119,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); - code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.CallFunction(lut.at(std::make_tuple(fsize, rounding_mode, exact))); } @@ -1206,7 +1206,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i } // a > 0 && a < 0x00800000; - code.dec(tmp); + code.sub(tmp, 1); code.cmp(tmp, 0x007FFFFF); code.jb(fallback, code.T_NEAR); //within -127,128 needs_fallback = true; @@ -1284,7 +1284,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); code.movq(code.ABI_PARAM1, operand); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtEstimate); code.movq(result, rax); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); @@ -1298,7 +1298,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtEstimate); } } @@ -1368,7 +1368,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.movq(code.ABI_PARAM1, operand1); code.movq(code.ABI_PARAM2, operand2); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtStepFused); code.movq(result, code.ABI_RETURN); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); @@ -1398,7 +1398,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* ctx.reg_alloc.HostCall(inst, args[0], args[1]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtStepFused); } @@ -1511,7 +1511,7 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1535,7 +1535,7 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1556,7 +1556,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } } @@ -1581,7 +1581,7 @@ void EmitX64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1595,7 +1595,7 @@ void EmitX64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1616,7 +1616,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } } @@ -1757,7 +1757,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { mp::cartesian_product{}); ctx.reg_alloc.HostCall(inst, args[0]); - code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.CallFunction(lut.at(std::make_tuple(fbits, rounding_mode))); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 272b896ae3..34f77b0446 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -28,27 +28,24 @@ std::optional AxxEmitX64::ShouldFastmem(AxxEmitC FakeCall AxxEmitX64::FastmemCallback(u64 rip_) { const auto iter = fastmem_patch_info.find(rip_); - - if (iter == fastmem_patch_info.end()) { + if (iter != fastmem_patch_info.end()) { + FakeCall result{ + .call_rip = iter->second.callback, + .ret_rip = iter->second.resume_rip, + }; + if (iter->second.recompile) { + const auto marker = iter->second.marker; + do_not_fastmem.insert(marker); + InvalidateBasicBlocks({std::get<0>(marker)}); + } + return result; + } else { fmt::print("dynarmic: Segfault happened within JITted code at rip = {:016x}\n", rip_); fmt::print("Segfault wasn't at a fastmem patch location!\n"); fmt::print("Now dumping code.......\n\n"); Common::DumpDisassembledX64((void*)(rip_ & ~u64(0xFFF)), 0x1000); ASSERT_FALSE("iter != fastmem_patch_info.end()"); } - - FakeCall result{ - .call_rip = iter->second.callback, - .ret_rip = iter->second.resume_rip, - }; - - if (iter->second.recompile) { - const auto marker = iter->second.marker; - do_not_fastmem.insert(marker); - InvalidateBasicBlocks({std::get<0>(marker)}); - } - - return result; } template @@ -95,7 +92,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { if (fastmem_marker) { // Use fastmem - bool require_abort_handling; + bool require_abort_handling = false; const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling); const auto location = EmitReadMemoryMov(code, value_idx, src_ptr, ordered); @@ -182,7 +179,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { if (fastmem_marker) { // Use fastmem - bool require_abort_handling; + bool require_abort_handling = false; const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling); const auto location = EmitWriteMemoryMov(code, dest_ptr, value_idx, ordered); @@ -230,7 +227,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, {}, args[1]); - code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); if (ordered) { code.mfence(); @@ -248,7 +245,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); @@ -288,9 +285,9 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { Xbyak::Label end; code.mov(code.ABI_RETURN, u32(1)); - code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.je(end); - code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); if constexpr (bitsize != 128) { using T = mcl::unsigned_integer_of_size; @@ -358,7 +355,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in EmitExclusiveLock(code, conf, tmp, tmp2.cvt32()); - code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.mov(qword[tmp], vaddr); @@ -442,14 +439,14 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.mov(status, u32(1)); - code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.je(*end, code.T_NEAR); code.cmp(qword[tmp], vaddr); code.jne(*end, code.T_NEAR); EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax); - code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); if constexpr (bitsize == 128) { @@ -504,7 +501,6 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i } code.setnz(status.cvt8()); - ctx.deferred_emits.emplace_back([=, this] { code.L(*abort); code.call(wrapped_fn); @@ -518,24 +514,21 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i conf.recompile_on_exclusive_fastmem_failure, }); - code.cmp(al, 0); + code.xor_(status.cvt32(), status.cvt32()); //dep-break + code.test(code.al, code.al); code.setz(status.cvt8()); - code.movzx(status.cvt32(), status.cvt8()); code.jmp(*end, code.T_NEAR); }); } else { code.call(wrapped_fn); - code.cmp(al, 0); + code.xor_(status.cvt32(), status.cvt32()); //dep-break + code.test(code.al, code.al); code.setz(status.cvt8()); - code.movzx(status.cvt32(), status.cvt8()); } code.L(*end); - EmitExclusiveUnlock(code, conf, tmp, eax); - ctx.reg_alloc.DefineValue(inst, status); - EmitCheckMemoryAbort(ctx, inst); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index b25b33101c..75a47c6a80 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -46,26 +46,25 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi code.test(vaddr, align_mask); - if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { + if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { + const u32 page_align_mask = static_cast(page_size - 1) & ~align_mask; + + SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel(); + + code.jnz(*detect_boundary, code.T_NEAR); + code.L(*resume); + + ctx.deferred_emits.emplace_back([=, &code] { + code.L(*detect_boundary); + code.mov(tmp, vaddr); + code.and_(tmp, page_align_mask); + code.cmp(tmp, page_align_mask); + code.jne(*resume, code.T_NEAR); + // NOTE: We expect to fallthrough into abort code here. + }); + } else { code.jnz(abort, code.T_NEAR); - return; } - - const u32 page_align_mask = static_cast(page_size - 1) & ~align_mask; - - SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel(); - - code.jnz(*detect_boundary, code.T_NEAR); - code.L(*resume); - - ctx.deferred_emits.emplace_back([=, &code] { - code.L(*detect_boundary); - code.mov(tmp, vaddr); - code.and_(tmp, page_align_mask); - code.cmp(tmp, page_align_mask); - code.jne(*resume, code.T_NEAR); - // NOTE: We expect to fallthrough into abort code here. - }); } template @@ -202,7 +201,7 @@ template const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::RegExp& addr, bool ordered) { if (ordered) { if constexpr (bitsize != 128) { - code.xor_(Xbyak::Reg32{value_idx}, Xbyak::Reg32{value_idx}); + code.xor_(Xbyak::Reg32(value_idx), Xbyak::Reg32(value_idx)); } else { code.xor_(eax, eax); code.xor_(ebx, ebx); @@ -214,59 +213,59 @@ const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::Reg switch (bitsize) { case 8: code.lock(); - code.xadd(code.byte[addr], Xbyak::Reg32{value_idx}.cvt8()); + code.xadd(code.byte[addr], Xbyak::Reg32(value_idx).cvt8()); break; case 16: code.lock(); - code.xadd(word[addr], Xbyak::Reg16{value_idx}); + code.xadd(word[addr], Xbyak::Reg64(value_idx).cvt16()); break; case 32: code.lock(); - code.xadd(dword[addr], Xbyak::Reg32{value_idx}); + code.xadd(dword[addr], Xbyak::Reg64(value_idx).cvt32()); break; case 64: code.lock(); - code.xadd(qword[addr], Xbyak::Reg64{value_idx}); + code.xadd(qword[addr], Xbyak::Reg64(value_idx)); break; case 128: code.lock(); code.cmpxchg16b(xword[addr]); if (code.HasHostFeature(HostFeature::SSE41)) { - code.movq(Xbyak::Xmm{value_idx}, rax); - code.pinsrq(Xbyak::Xmm{value_idx}, rdx, 1); + code.movq(Xbyak::Xmm(value_idx), rax); + code.pinsrq(Xbyak::Xmm(value_idx), rdx, 1); } else { - code.movq(Xbyak::Xmm{value_idx}, rax); + code.movq(Xbyak::Xmm(value_idx), rax); code.movq(xmm0, rdx); - code.punpcklqdq(Xbyak::Xmm{value_idx}, xmm0); + code.punpcklqdq(Xbyak::Xmm(value_idx), xmm0); } break; default: ASSERT_FALSE("Invalid bitsize"); } return fastmem_location; + } else { + const void* fastmem_location = code.getCurr(); + switch (bitsize) { + case 8: + code.movzx(Xbyak::Reg64(value_idx).cvt32(), code.byte[addr]); + break; + case 16: + code.movzx(Xbyak::Reg64(value_idx).cvt32(), word[addr]); + break; + case 32: + code.mov(Xbyak::Reg64(value_idx).cvt32(), dword[addr]); + break; + case 64: + code.mov(Xbyak::Reg64(value_idx), qword[addr]); + break; + case 128: + code.movups(Xbyak::Xmm(value_idx), xword[addr]); + break; + default: + ASSERT_FALSE("Invalid bitsize"); + } + return fastmem_location; } - - const void* fastmem_location = code.getCurr(); - switch (bitsize) { - case 8: - code.movzx(Xbyak::Reg32{value_idx}, code.byte[addr]); - break; - case 16: - code.movzx(Xbyak::Reg32{value_idx}, word[addr]); - break; - case 32: - code.mov(Xbyak::Reg32{value_idx}, dword[addr]); - break; - case 64: - code.mov(Xbyak::Reg64{value_idx}, qword[addr]); - break; - case 128: - code.movups(Xbyak::Xmm{value_idx}, xword[addr]); - break; - default: - ASSERT_FALSE("Invalid bitsize"); - } - return fastmem_location; } template @@ -276,10 +275,10 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int code.xor_(eax, eax); code.xor_(edx, edx); if (code.HasHostFeature(HostFeature::SSE41)) { - code.movq(rbx, Xbyak::Xmm{value_idx}); - code.pextrq(rcx, Xbyak::Xmm{value_idx}, 1); + code.movq(rbx, Xbyak::Xmm(value_idx)); + code.pextrq(rcx, Xbyak::Xmm(value_idx), 1); } else { - code.movaps(xmm0, Xbyak::Xmm{value_idx}); + code.movaps(xmm0, Xbyak::Xmm(value_idx)); code.movq(rbx, xmm0); code.punpckhqdq(xmm0, xmm0); code.movq(rcx, xmm0); @@ -289,16 +288,16 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int const void* fastmem_location = code.getCurr(); switch (bitsize) { case 8: - code.xchg(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8()); + code.xchg(code.byte[addr], Xbyak::Reg64(value_idx).cvt8()); break; case 16: - code.xchg(word[addr], Xbyak::Reg16{value_idx}); + code.xchg(word[addr], Xbyak::Reg64(value_idx).cvt16()); break; case 32: - code.xchg(dword[addr], Xbyak::Reg32{value_idx}); + code.xchg(dword[addr], Xbyak::Reg64(value_idx).cvt32()); break; case 64: - code.xchg(qword[addr], Xbyak::Reg64{value_idx}); + code.xchg(qword[addr], Xbyak::Reg64(value_idx)); break; case 128: { Xbyak::Label loop; @@ -312,29 +311,29 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int ASSERT_FALSE("Invalid bitsize"); } return fastmem_location; + } else { + const void* fastmem_location = code.getCurr(); + switch (bitsize) { + case 8: + code.mov(code.byte[addr], Xbyak::Reg64(value_idx).cvt8()); + break; + case 16: + code.mov(word[addr], Xbyak::Reg64(value_idx).cvt16()); + break; + case 32: + code.mov(dword[addr], Xbyak::Reg64(value_idx).cvt32()); + break; + case 64: + code.mov(qword[addr], Xbyak::Reg64(value_idx)); + break; + case 128: + code.movups(xword[addr], Xbyak::Xmm(value_idx)); + break; + default: + ASSERT_FALSE("Invalid bitsize"); + } + return fastmem_location; } - - const void* fastmem_location = code.getCurr(); - switch (bitsize) { - case 8: - code.mov(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8()); - break; - case 16: - code.mov(word[addr], Xbyak::Reg16{value_idx}); - break; - case 32: - code.mov(dword[addr], Xbyak::Reg32{value_idx}); - break; - case 64: - code.mov(qword[addr], Xbyak::Reg64{value_idx}); - break; - case 128: - code.movups(xword[addr], Xbyak::Xmm{value_idx}); - break; - default: - ASSERT_FALSE("Invalid bitsize"); - } - return fastmem_location; } template diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp index d36a75426a..e795181872 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp @@ -69,7 +69,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) ctx.reg_alloc.DefineValue(overflow_inst, overflow); } } else { - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); } ctx.reg_alloc.DefineValue(inst, result); @@ -98,7 +98,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(); code.setb(overflow.cvt8()); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); ctx.reg_alloc.DefineValue(inst, addend); } @@ -226,7 +226,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, code.cmovns(y, tmp); code.sets(tmp.cvt8()); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); ctx.reg_alloc.DefineValue(inst, y); } @@ -250,7 +250,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, code.cmovns(y.cvt32(), tmp.cvt32()); code.sets(tmp.cvt8()); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); ctx.reg_alloc.DefineValue(inst, y); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index e9b8866b52..e1b9e54df8 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -25,6 +25,7 @@ #include "dynarmic/backend/x64/constants.h" #include "dynarmic/backend/x64/emit_x64.h" #include "dynarmic/common/math_util.h" +#include "dynarmic/interface/optimization_flags.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -109,7 +110,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } @@ -137,7 +138,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } @@ -164,7 +165,7 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code, ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } @@ -1009,10 +1010,7 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) { code.gf2p8affineqb(result, code.BConst<64>(xword, 0xaaccf0ff'00000000), 8); ctx.reg_alloc.DefineValue(inst, result); - return; - } - - if (code.HasHostFeature(HostFeature::SSSE3)) { + } else if (code.HasHostFeature(HostFeature::SSSE3)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -1034,10 +1032,9 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) { code.paddb(data, tmp1); ctx.reg_alloc.DefineValue(inst, data); - return; + } else { + EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } - - EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { @@ -1070,10 +1067,7 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { code.vpshufb(result, result, data); ctx.reg_alloc.DefineValue(inst, result); - return; - } - - if (code.HasHostFeature(HostFeature::SSSE3)) { + } else if (code.HasHostFeature(HostFeature::SSSE3)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -1106,24 +1100,33 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { code.pshufb(result, data); ctx.reg_alloc.DefineValue(inst, result); - return; + } else { + EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } - - EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } void EmitX64::EmitVectorCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512CD)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); code.vplzcntd(data, data); - ctx.reg_alloc.DefineValue(inst, data); - return; + // See https://stackoverflow.com/questions/58823140/count-leading-zero-bits-for-each-element-in-avx2-vector-emulate-mm256-lzcnt-ep/58827596#58827596 + } else if (code.HasHostFeature(HostFeature::AVX2)) { + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(); + code.vmovdqa(temp, data); + code.vpsrld(data, data, 8); + code.vpandn(data, data, temp); + code.vmovdqa(temp, code.Const(xword, 0x0000009E0000009E, 0x0000009E0000009E)); + code.vcvtdq2ps(data, data); + code.vpsrld(data, data, 23); + code.vpsubusw(data, temp, data); + code.vpminsw(data, data, code.Const(xword, 0x0000002000000020, 0x0000002000000020)); + ctx.reg_alloc.DefineValue(inst, data); + } else { + EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } - - EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) { @@ -3323,7 +3326,7 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { code.paddb(mask, mask); code.paddb(xmm_a, xmm_a); code.pblendvb(result, alternate); - code.dec(counter); + code.sub(counter, 1); code.jnz(loop); ctx.reg_alloc.DefineValue(inst, result); @@ -3367,7 +3370,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst code.paddw(mask, mask); code.paddw(xmm_a, xmm_a); code.pblendvb(result, alternate); - code.dec(counter); + code.sub(counter, 1); code.jnz(loop); ctx.reg_alloc.DefineValue(inst, result); @@ -4258,7 +4261,7 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo UNREACHABLE(); } - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, data); } @@ -4393,7 +4396,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(mask, xmm0); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], mask); if (code.HasHostFeature(HostFeature::SSE41)) { code.pblendvb(result, tmp); @@ -4479,7 +4482,7 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(bit, upper_tmp); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, result); } @@ -4530,7 +4533,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); code.vpxor(result, result, mask); code.pmovmskb(bit, mask); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.Release(mask); ctx.reg_alloc.Release(bit); @@ -4586,7 +4589,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.pcmpeqd(tmp, result); code.pxor(result, tmp); code.pmovmskb(bit, tmp); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, result); } @@ -4620,7 +4623,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(bit, y); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, x); } @@ -4673,7 +4676,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, code.pxor(x, y); code.pmovmskb(bit, y); } - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, x); } @@ -4712,7 +4715,7 @@ static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, Block code.pcmpeqd(reconstructed, src); code.movmskps(bit, reconstructed); code.xor_(bit, 0b1111); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, dest); } @@ -4767,7 +4770,7 @@ static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, Blo code.pcmpeqd(reconstructed, src); code.movmskps(bit, reconstructed); code.xor_(bit, 0b1111); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, dest); } @@ -4870,7 +4873,7 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo // Check if any elements matched the mask prior to performing saturation. If so, set the Q bit. const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(bit, tmp); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, zero); } @@ -5641,6 +5644,7 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, break; } case 32: + // See https://stackoverflow.com/questions/3380785/compute-the-absolute-difference-between-unsigned-integers-using-sse/3527267#3527267 if (code.HasHostFeature(HostFeature::SSE41)) { const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); @@ -5652,16 +5656,33 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, } else { const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - - code.movdqa(temp, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); - code.pxor(x, temp); - code.pxor(y, temp); - code.movdqa(temp, x); - code.psubd(temp, y); - code.pcmpgtd(y, x); - code.psrld(y, 1); - code.pxor(temp, y); - code.psubd(temp, y); + if (ctx.HasOptimization(OptimizationFlag::CodeSpeed)) { + // About 45 bytes + const Xbyak::Xmm temp_x = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm temp_y = ctx.reg_alloc.ScratchXmm(); + code.pcmpeqd(temp, temp); + code.pslld(temp, 31); + code.movdqa(temp_x, x); + code.movdqa(temp_y, y); + code.paddd(temp_x, x); + code.paddd(temp_y, y); + code.pcmpgtd(temp_y, temp_x); + code.psubd(x, y); + code.pandn(temp, temp_y); + code.pxor(x, y); + code.psubd(x, y); + } else { + // Smaller code size - about 36 bytes + code.movdqa(temp, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); + code.pxor(x, temp); + code.pxor(y, temp); + code.movdqa(temp, x); + code.psubd(temp, y); + code.pcmpgtd(y, x); + code.psrld(y, 1); + code.pxor(temp, y); + code.psubd(temp, y); + } } break; } @@ -5727,10 +5748,7 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { code.vpmulld(result, x, y); ctx.reg_alloc.DefineValue(lower_inst, result); - return; - } - - if (code.HasHostFeature(HostFeature::AVX)) { + } else if (code.HasHostFeature(HostFeature::AVX)) { const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); @@ -5749,39 +5767,33 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { code.shufps(result, x, 0b11011101); ctx.reg_alloc.DefineValue(upper_inst, result); - return; - } + } else { + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm upper_result = upper_inst ? ctx.reg_alloc.ScratchXmm() : Xbyak::Xmm{-1}; + const Xbyak::Xmm lower_result = lower_inst ? ctx.reg_alloc.ScratchXmm() : Xbyak::Xmm{-1}; - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(); + // calculate unsigned multiply + code.movdqa(tmp, x); + code.pmuludq(tmp, y); + code.psrlq(x, 32); + code.psrlq(y, 32); + code.pmuludq(x, y); - // calculate unsigned multiply - code.movdqa(tmp, x); - code.pmuludq(tmp, y); - code.psrlq(x, 32); - code.psrlq(y, 32); - code.pmuludq(x, y); - - // put everything into place - code.pcmpeqw(upper_result, upper_result); - code.pcmpeqw(lower_result, lower_result); - code.psllq(upper_result, 32); - code.psrlq(lower_result, 32); - code.pand(upper_result, x); - code.pand(lower_result, tmp); - code.psrlq(tmp, 32); - code.psllq(x, 32); - code.por(upper_result, tmp); - code.por(lower_result, x); - - if (upper_inst) { - ctx.reg_alloc.DefineValue(upper_inst, upper_result); - } - if (lower_inst) { - ctx.reg_alloc.DefineValue(lower_inst, lower_result); + // put everything into place - only if needed + if (upper_inst) code.pcmpeqw(upper_result, upper_result); + if (lower_inst) code.pcmpeqw(lower_result, lower_result); + if (upper_inst) code.psllq(upper_result, 32); + if (lower_inst) code.psrlq(lower_result, 32); + if (upper_inst) code.pand(upper_result, x); + if (lower_inst) code.pand(lower_result, tmp); + if (upper_inst) code.psrlq(tmp, 32); + if (lower_inst) code.psllq(x, 32); + if (upper_inst) code.por(upper_result, tmp); + if (lower_inst) code.por(lower_result, x); + if (upper_inst) ctx.reg_alloc.DefineValue(upper_inst, upper_result); + if (lower_inst) ctx.reg_alloc.DefineValue(lower_inst, lower_result); } } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index b24120c346..c8f0d9575c 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -450,7 +450,7 @@ void EmitTwoOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.mov(code.ABI_PARAM3.cvt32(), fpcr); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.movaps(xword[code.ABI_PARAM2], arg1); code.CallFunction(fn); @@ -487,7 +487,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.mov(code.ABI_PARAM4.cvt32(), fpcr); - code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax); #else constexpr u32 stack_space = 3 * 16; @@ -496,7 +496,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.mov(code.ABI_PARAM4.cvt32(), fpcr); - code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); #endif code.movaps(xword[code.ABI_PARAM2], arg1); @@ -545,7 +545,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 4 * 16]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR(fpcr_controlled).Value()); - code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 8], rax); #else constexpr u32 stack_space = 4 * 16; @@ -555,7 +555,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR(fpcr_controlled).Value()); - code.lea(code.ABI_PARAM6, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM6, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); #endif if constexpr (load_previous_result == LoadPreviousResult::Yes) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp index 88bd41a47e..580a32dec8 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp @@ -62,7 +62,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.test(overflow.cvt32(), overflow.cvt32()); } code.setnz(overflow); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); ctx.reg_alloc.DefineValue(inst, result); } @@ -104,7 +104,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.ktestb(k1, k1); code.setnz(overflow); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); ctx.reg_alloc.DefineValue(inst, result); return; @@ -160,7 +160,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.test(overflow.cvt32(), overflow.cvt32()); } code.setnz(overflow); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); if (code.HasHostFeature(HostFeature::SSE41)) { FCODE(blendvp)(result, tmp); @@ -204,7 +204,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.ktestb(k1, k1); code.setnz(overflow); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); ctx.reg_alloc.DefineValue(inst, result); return; @@ -263,7 +263,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* } code.setnz(overflow); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); if constexpr (op == Op::Add) { code.por(result, tmp); diff --git a/src/dynarmic/src/dynarmic/backend/x64/exclusive_monitor.cpp b/src/dynarmic/src/dynarmic/backend/x64/exclusive_monitor.cpp index 09ef60205f..f8237c99e8 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/exclusive_monitor.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/exclusive_monitor.cpp @@ -14,7 +14,7 @@ namespace Dynarmic { -ExclusiveMonitor::ExclusiveMonitor(size_t processor_count) +ExclusiveMonitor::ExclusiveMonitor(std::size_t processor_count) : exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {} size_t ExclusiveMonitor::GetProcessorCount() const { @@ -29,20 +29,16 @@ void ExclusiveMonitor::Unlock() { lock.Unlock(); } -bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address) { +bool ExclusiveMonitor::CheckAndClear(std::size_t processor_id, VAddr address) { const VAddr masked_address = address & RESERVATION_GRANULE_MASK; - Lock(); if (exclusive_addresses[processor_id] != masked_address) { Unlock(); return false; } - - for (VAddr& other_address : exclusive_addresses) { - if (other_address == masked_address) { + for (VAddr& other_address : exclusive_addresses) + if (other_address == masked_address) other_address = INVALID_EXCLUSIVE_ADDRESS; - } - } return true; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/hostloc.h b/src/dynarmic/src/dynarmic/backend/x64/hostloc.h index 1b27edbdee..d6fb88554e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/hostloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/hostloc.h @@ -13,9 +13,9 @@ namespace Dynarmic::Backend::X64 { -// Our static vector will contain 32 elements, stt. an uint16_t will fill up 64 bytes +// Our static vector will contain 32 elements, stt. an uint8_t will fill up 64 bytes // (an entire cache line). Thanks. -enum class HostLoc : uint16_t { +enum class HostLoc : std::uint8_t { // Ordering of the registers is intentional. See also: HostLocToX64. RAX, RCX, @@ -60,48 +60,48 @@ enum class HostLoc : uint16_t { constexpr size_t NonSpillHostLocCount = static_cast(HostLoc::FirstSpill); -inline bool HostLocIsGPR(HostLoc reg) { +constexpr bool HostLocIsGPR(HostLoc reg) { return reg >= HostLoc::RAX && reg <= HostLoc::R15; } -inline bool HostLocIsXMM(HostLoc reg) { +constexpr bool HostLocIsXMM(HostLoc reg) { return reg >= HostLoc::XMM0 && reg <= HostLoc::XMM15; } -inline bool HostLocIsRegister(HostLoc reg) { +constexpr bool HostLocIsRegister(HostLoc reg) { return HostLocIsGPR(reg) || HostLocIsXMM(reg); } -inline bool HostLocIsFlag(HostLoc reg) { +constexpr bool HostLocIsFlag(HostLoc reg) { return reg >= HostLoc::CF && reg <= HostLoc::OF; } -inline HostLoc HostLocRegIdx(int idx) { +constexpr HostLoc HostLocRegIdx(int idx) { ASSERT(idx >= 0 && idx <= 15); - return static_cast(idx); + return HostLoc(idx); } -inline HostLoc HostLocXmmIdx(int idx) { +constexpr HostLoc HostLocXmmIdx(int idx) { ASSERT(idx >= 0 && idx <= 15); - return static_cast(static_cast(HostLoc::XMM0) + idx); + return HostLoc(size_t(HostLoc::XMM0) + idx); } -inline HostLoc HostLocSpill(size_t i) { - return static_cast(static_cast(HostLoc::FirstSpill) + i); +constexpr HostLoc HostLocSpill(size_t i) { + return HostLoc(size_t(HostLoc::FirstSpill) + i); } -inline bool HostLocIsSpill(HostLoc reg) { +constexpr bool HostLocIsSpill(HostLoc reg) { return reg >= HostLoc::FirstSpill; } -inline size_t HostLocBitWidth(HostLoc loc) { +constexpr size_t HostLocBitWidth(HostLoc loc) { if (HostLocIsGPR(loc)) return 64; - if (HostLocIsXMM(loc)) + else if (HostLocIsXMM(loc)) return 128; - if (HostLocIsSpill(loc)) + else if (HostLocIsSpill(loc)) return 128; - if (HostLocIsFlag(loc)) + else if (HostLocIsFlag(loc)) return 1; UNREACHABLE(); } @@ -109,6 +109,8 @@ inline size_t HostLocBitWidth(HostLoc loc) { using HostLocList = std::initializer_list; // RSP is preserved for function calls +// R13 contains fastmem pointer if any +// R14 contains the pagetable pointer // R15 contains the JitState pointer const HostLocList any_gpr = { HostLoc::RAX, @@ -125,12 +127,16 @@ const HostLocList any_gpr = { HostLoc::R12, HostLoc::R13, HostLoc::R14, + //HostLoc::R15, }; // XMM0 is reserved for use by instructions that implicitly use it as an argument +// XMM1 is used by 128 mem accessors +// XMM2 is also used by that (and other stuff) +// Basically dont use either XMM0, XMM1 or XMM2 ever; they're left for the regsel const HostLocList any_xmm = { - HostLoc::XMM1, - HostLoc::XMM2, + //HostLoc::XMM1, + //HostLoc::XMM2, HostLoc::XMM3, HostLoc::XMM4, HostLoc::XMM5, diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 916c74193c..fa6006ed2a 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -357,9 +357,8 @@ void RegAlloc::HostCall(IR::Inst* result_def, static const boost::container::static_vector other_caller_save = [args_hostloc]() noexcept { boost::container::static_vector ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end()); ret.erase(std::find(ret.begin(), ret.end(), ABI_RETURN)); - for (auto const hostloc : args_hostloc) { + for (auto const hostloc : args_hostloc) ret.erase(std::find(ret.begin(), ret.end(), hostloc)); - } return ret; }(); @@ -368,7 +367,7 @@ void RegAlloc::HostCall(IR::Inst* result_def, DefineValueImpl(result_def, ABI_RETURN); } - for (size_t i = 0; i < args_count; i++) { + for (size_t i = 0; i < args.size(); i++) { if (args[i] && !args[i]->get().IsVoid()) { UseScratch(*args[i], args_hostloc[i]); // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee @@ -383,36 +382,35 @@ void RegAlloc::HostCall(IR::Inst* result_def, case IR::Type::U32: code->mov(reg.cvt32(), reg.cvt32()); break; + case IR::Type::U64: + break; //no op default: - break; // Nothing needs to be done + UNREACHABLE(); } } } - for (size_t i = 0; i < args_count; i++) { + for (size_t i = 0; i < args.size(); i++) if (!args[i]) { // TODO: Force spill ScratchGpr(args_hostloc[i]); } - } - - for (HostLoc caller_saved : other_caller_save) { + for (auto const caller_saved : other_caller_save) ScratchImpl({caller_saved}); - } } void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { - ASSERT(stack_space < static_cast(std::numeric_limits::max())); + ASSERT(stack_space < size_t(std::numeric_limits::max())); ASSERT(reserved_stack_space == 0); reserved_stack_space = stack_space; - code->sub(code->rsp, static_cast(stack_space)); + code->sub(code->rsp, u32(stack_space)); } void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept { - ASSERT(stack_space < static_cast(std::numeric_limits::max())); + ASSERT(stack_space < size_t(std::numeric_limits::max())); ASSERT(reserved_stack_space == stack_space); reserved_stack_space = 0; - code->add(code->rsp, static_cast(stack_space)); + code->add(code->rsp, u32(stack_space)); } HostLoc RegAlloc::SelectARegister(const boost::container::static_vector& desired_locations) const noexcept { @@ -429,13 +427,22 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector= HostLoc::R13 && *it <= HostLoc::R15) { + // skip, do not touch + // Intel recommends to reuse registers as soon as they're overwritable (DO NOT SPILL) + } else if (loc_info.IsEmpty()) { + it_empty_candidate = it; + break; + // No empty registers for some reason (very evil) - just do normal LRU } else { if (loc_info.lru_counter < min_lru_counter) { - if (loc_info.IsEmpty()) - it_empty_candidate = it; // Otherwise a "quasi"-LRU min_lru_counter = loc_info.lru_counter; if (*it >= HostLoc::R8 && *it <= HostLoc::R15) { @@ -446,9 +453,6 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vectormov(reg, imm_value); } - return host_loc; - } - - if (HostLocIsXMM(host_loc)) { + } else if (HostLocIsXMM(host_loc)) { const Xbyak::Xmm reg = HostLocToXmm(host_loc); const u64 imm_value = imm.GetImmediateAsU64(); if (imm_value == 0) { @@ -508,22 +508,19 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept { } else { MAYBE_AVX(movaps, reg, code->Const(code->xword, imm_value)); } - return host_loc; + } else { + UNREACHABLE(); } - - UNREACHABLE(); + return host_loc; } void RegAlloc::Move(HostLoc to, HostLoc from) noexcept { const size_t bit_width = LocInfo(from).GetMaxBitWidth(); - ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked()); ASSERT(bit_width <= HostLocBitWidth(to)); - - if (!LocInfo(from).IsEmpty()) { - EmitMove(bit_width, to, from); - LocInfo(to) = std::exchange(LocInfo(from), {}); - } + ASSERT_MSG(!LocInfo(from).IsEmpty(), "Mov eliminated"); + EmitMove(bit_width, to, from); + LocInfo(to) = std::exchange(LocInfo(from), {}); } void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept { @@ -557,30 +554,44 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept { ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled"); ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers"); ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt"); - - const HostLoc new_loc = FindFreeSpill(); + auto const new_loc = FindFreeSpill(HostLocIsXMM(loc)); Move(new_loc, loc); } -HostLoc RegAlloc::FindFreeSpill() const noexcept { - for (size_t i = static_cast(HostLoc::FirstSpill); i < hostloc_info.size(); i++) { - const auto loc = static_cast(i); - if (LocInfo(loc).IsEmpty()) { - return loc; - } +HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { +#if 0 + // TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows + // but it's fine anyways. We can find other ways to cheat it later - but which?!?! + // we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end? + // TODO(lizzie): This needs to be investigated further later. + // Do not spill XMM into other XMM silly + if (!is_xmm) { + // TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY + // Intel recommends to spill GPR onto XMM registers IF POSSIBLE + // TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call? + // Must sync with ABI registers (except XMM0, XMM1 and XMM2) + for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i) + if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) + return loc; } - +#endif + // Otherwise go to stack spilling + for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i) + if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) + return loc; ASSERT_FALSE("All spill locations are full"); -} - -inline static Xbyak::RegExp SpillToOpArg_Helper1(HostLoc loc, size_t reserved_stack_space) noexcept { - ASSERT(HostLocIsSpill(loc)); - size_t i = static_cast(loc) - static_cast(HostLoc::FirstSpill); - ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations"); - return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]); -} +}; void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept { + auto const spill_to_op_arg_helper = [&](HostLoc loc, size_t reserved_stack_space) { + ASSERT(HostLocIsSpill(loc)); + size_t i = size_t(loc) - size_t(HostLoc::FirstSpill); + ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations"); + return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]); + }; + auto const spill_xmm_to_op = [&](const HostLoc loc) { + return Xbyak::util::xword[spill_to_op_arg_helper(loc, reserved_stack_space)]; + }; if (HostLocIsXMM(to) && HostLocIsXMM(from)) { MAYBE_AVX(movaps, HostLocToXmm(to), HostLocToXmm(from)); } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) { @@ -605,7 +616,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc MAYBE_AVX(movd, HostLocToReg64(to).cvt32(), HostLocToXmm(from)); } } else if (HostLocIsXMM(to) && HostLocIsSpill(from)) { - const Xbyak::Address spill_addr = SpillToOpArg(from); + const Xbyak::Address spill_addr = spill_xmm_to_op(from); ASSERT(spill_addr.getBit() >= bit_width); switch (bit_width) { case 128: @@ -623,7 +634,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc UNREACHABLE(); } } else if (HostLocIsSpill(to) && HostLocIsXMM(from)) { - const Xbyak::Address spill_addr = SpillToOpArg(to); + const Xbyak::Address spill_addr = spill_xmm_to_op(to); ASSERT(spill_addr.getBit() >= bit_width); switch (bit_width) { case 128: @@ -643,16 +654,16 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc } else if (HostLocIsGPR(to) && HostLocIsSpill(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(HostLocToReg64(to), Xbyak::util::qword[SpillToOpArg_Helper1(from, reserved_stack_space)]); + code->mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]); } else { - code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[SpillToOpArg_Helper1(from, reserved_stack_space)]); + code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]); } } else if (HostLocIsSpill(to) && HostLocIsGPR(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(Xbyak::util::qword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from)); + code->mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from)); } else { - code->mov(Xbyak::util::dword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from).cvt32()); + code->mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32()); } } else { ASSERT_FALSE("Invalid RegAlloc::EmitMove"); @@ -669,8 +680,4 @@ void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept { } } -Xbyak::Address RegAlloc::SpillToOpArg(const HostLoc loc) noexcept { - return Xbyak::util::xword[SpillToOpArg_Helper1(loc, reserved_stack_space)]; -} - } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 12b6010aa8..f70329f471 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -22,6 +22,7 @@ #include "dynarmic/backend/x64/hostloc.h" #include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/oparg.h" +#include "dynarmic/backend/x64/abi.h" #include "dynarmic/ir/cond.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/value.h" @@ -242,20 +243,19 @@ private: void MoveOutOfTheWay(HostLoc reg) noexcept; void SpillRegister(HostLoc loc) noexcept; - HostLoc FindFreeSpill() const noexcept; + HostLoc FindFreeSpill(bool is_xmm) const noexcept; inline HostLocInfo& LocInfo(const HostLoc loc) noexcept { - ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); + ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR); return hostloc_info[static_cast(loc)]; } inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept { - ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15); + ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR); return hostloc_info[static_cast(loc)]; } void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept; void EmitExchange(const HostLoc a, const HostLoc b) noexcept; - Xbyak::Address SpillToOpArg(const HostLoc loc) noexcept; //data alignas(64) boost::container::static_vector gpr_order; @@ -264,7 +264,7 @@ private: BlockOfCode* code = nullptr; size_t reserved_stack_space = 0; }; -// Ensure a cache line is used, this is primordial -static_assert(sizeof(boost::container::static_vector) == 64); +// Ensure a cache line (or less) is used, this is primordial +static_assert(sizeof(boost::container::static_vector) == 40); } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp b/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp index 3378786c46..b3a02005eb 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.cpp @@ -22,7 +22,7 @@ void PrintVerboseDebuggingOutputLine(RegisterData& reg_data, HostLoc hostloc, si } else if (HostLocIsXMM(hostloc)) { return reg_data.xmms[HostLocToXmm(hostloc).getIdx()]; } else if (HostLocIsSpill(hostloc)) { - return (*reg_data.spill)[static_cast(hostloc) - static_cast(HostLoc::FirstSpill)]; + return (*reg_data.spill)[size_t(hostloc) - size_t(HostLoc::FirstSpill)]; } else { fmt::print("invalid hostloc! "); return {0, 0}; diff --git a/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.h b/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.h index 68d0ccff24..3f4823010b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.h +++ b/src/dynarmic/src/dynarmic/backend/x64/verbose_debugging_output.h @@ -16,7 +16,7 @@ namespace Dynarmic::Backend::X64 { -enum class HostLoc : uint16_t; +enum class HostLoc : std::uint8_t; using Vector = std::array; #ifdef _MSC_VER diff --git a/src/dynarmic/src/dynarmic/common/assert.cpp b/src/dynarmic/src/dynarmic/common/assert.cpp index b0d3450c36..84a398f23e 100644 --- a/src/dynarmic/src/dynarmic/common/assert.cpp +++ b/src/dynarmic/src/dynarmic/common/assert.cpp @@ -4,6 +4,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include +#include [[noreturn]] void assert_terminate_impl(const char* expr_str, fmt::string_view msg, fmt::format_args args) { fmt::print(stderr, "assertion failed: {}\n", expr_str); diff --git a/src/dynarmic/src/dynarmic/common/crypto/crc32.cpp b/src/dynarmic/src/dynarmic/common/crypto/crc32.cpp index c2821fa2c3..6b9c129a44 100644 --- a/src/dynarmic/src/dynarmic/common/crypto/crc32.cpp +++ b/src/dynarmic/src/dynarmic/common/crypto/crc32.cpp @@ -152,11 +152,9 @@ constexpr CRC32Table iso_table{ static u32 ComputeCRC32(const CRC32Table& table, u32 crc, const u64 value, int length) { const auto* data = reinterpret_cast(&value); - while (length-- > 0) { crc = (crc >> 8) ^ table[(crc ^ (*data++)) & 0xFF]; } - return crc; } diff --git a/src/dynarmic/src/dynarmic/common/spin_lock.h b/src/dynarmic/src/dynarmic/common/spin_lock.h index f653704db6..e97ba2897f 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock.h +++ b/src/dynarmic/src/dynarmic/common/spin_lock.h @@ -8,9 +8,8 @@ namespace Dynarmic { struct SpinLock { - void Lock(); - void Unlock(); - + void Lock() noexcept; + void Unlock() noexcept; volatile int storage = 0; }; diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_arm64.cpp b/src/dynarmic/src/dynarmic/common/spin_lock_arm64.cpp index ccf807e2d2..7833b65403 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_arm64.cpp +++ b/src/dynarmic/src/dynarmic/common/spin_lock_arm64.cpp @@ -73,12 +73,12 @@ void SpinLockImpl::Initialize() { } // namespace -void SpinLock::Lock() { +void SpinLock::Lock() noexcept { std::call_once(flag, &SpinLockImpl::Initialize, impl); impl.lock(&storage); } -void SpinLock::Unlock() { +void SpinLock::Unlock() noexcept { std::call_once(flag, &SpinLockImpl::Initialize, impl); impl.unlock(&storage); } diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp index fdea94f4be..474c2f8404 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp @@ -16,15 +16,14 @@ namespace Dynarmic { void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { Xbyak::Label start, loop; - code.jmp(start); + code.jmp(start, code.T_NEAR); code.L(loop); code.pause(); code.L(start); code.mov(tmp, 1); - code.lock(); - code.xchg(code.dword[ptr], tmp); + /*code.lock();*/ code.xchg(code.dword[ptr], tmp); code.test(tmp, tmp); - code.jnz(loop); + code.jnz(loop, code.T_NEAR); } void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { @@ -63,12 +62,12 @@ void SpinLockImpl::Initialize() { } // namespace -void SpinLock::Lock() { +void SpinLock::Lock() noexcept { std::call_once(flag, &SpinLockImpl::Initialize, impl); impl.lock(&storage); } -void SpinLock::Unlock() { +void SpinLock::Unlock() noexcept { std::call_once(flag, &SpinLockImpl::Initialize, impl); impl.unlock(&storage); } diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp index 7ef8b7e890..6a25eb97c6 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/impl/load_store.cpp @@ -109,13 +109,11 @@ bool TranslatorVisitor::arm_LDR_imm(Cond cond, bool P, bool U, bool W, Reg n, Re if (t == Reg::PC) { ir.LoadWritePC(data); - if (!P && W && n == Reg::R13) { ir.SetTerm(IR::Term::PopRSBHint{}); } else { ir.SetTerm(IR::Term::FastDispatchHint{}); } - return false; } @@ -145,7 +143,11 @@ bool TranslatorVisitor::arm_LDR_reg(Cond cond, bool P, bool U, bool W, Reg n, Re if (t == Reg::PC) { ir.LoadWritePC(data); - ir.SetTerm(IR::Term::FastDispatchHint{}); + if (!P && W && n == Reg::R13) { + ir.SetTerm(IR::Term::PopRSBHint{}); + } else { + ir.SetTerm(IR::Term::FastDispatchHint{}); + } return false; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp index 01cc1390c7..faf0686231 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp @@ -21,6 +21,7 @@ bool TranslatorVisitor::B_uncond(Imm<26> imm26) { const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend(); const u64 target = ir.PC() + offset; + //ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); return false; } diff --git a/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h b/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h index 4813675873..566743c767 100644 --- a/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h +++ b/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h @@ -6,11 +6,10 @@ #pragma once #include -#include #include #include #include -#include +#include #include @@ -80,9 +79,10 @@ private: static constexpr VAddr RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFFFull; static constexpr VAddr INVALID_EXCLUSIVE_ADDRESS = 0xDEAD'DEAD'DEAD'DEADull; + static constexpr size_t MAX_NUM_CPU_CORES = 4; // Sync with src/core/hardware_properties + boost::container::static_vector exclusive_addresses; + boost::container::static_vector exclusive_values; SpinLock lock; - std::vector exclusive_addresses; - std::vector exclusive_values; }; } // namespace Dynarmic diff --git a/src/dynarmic/src/dynarmic/interface/optimization_flags.h b/src/dynarmic/src/dynarmic/interface/optimization_flags.h index 2f65f0bfa4..743d902767 100644 --- a/src/dynarmic/src/dynarmic/interface/optimization_flags.h +++ b/src/dynarmic/src/dynarmic/interface/optimization_flags.h @@ -32,6 +32,8 @@ enum class OptimizationFlag : std::uint32_t { ConstProp = 0x00000010, /// This is enables miscellaneous safe IR optimizations. MiscIROpt = 0x00000020, + /// Optimize for code speed rather than for code size (this serves well for tight loops) + CodeSpeed = 0x00000040, /// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations. /// This unfuses fused instructions to improve performance on host CPUs without FMA support. diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index 12765e26a8..b00ab3cb20 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -86,11 +86,9 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept { } std::string DumpBlock(const IR::Block& block) noexcept { - std::string ret; - - ret += fmt::format("Block: location={}\n", block.Location()); - ret += fmt::format("cycles={}", block.CycleCount()); - ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition())); + std::string ret = fmt::format("Block: location={}-{}\n", block.Location(), block.EndLocation()) + + fmt::format("cycles={}", block.CycleCount()) + + fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition())); if (block.GetCondition() != Cond::AL) { ret += fmt::format(", cond_fail={}", block.ConditionFailedLocation()); } @@ -116,6 +114,8 @@ std::string DumpBlock(const IR::Block& block) noexcept { return fmt::format("#{:#x}", arg.GetU32()); case Type::U64: return fmt::format("#{:#x}", arg.GetU64()); + case Type::U128: + return fmt::format("#"); case Type::A32Reg: return A32::RegToString(arg.GetA32RegRef()); case Type::A32ExtReg: @@ -124,8 +124,18 @@ std::string DumpBlock(const IR::Block& block) noexcept { return A64::RegToString(arg.GetA64RegRef()); case Type::A64Vec: return A64::VecToString(arg.GetA64VecRef()); + case Type::CoprocInfo: + return fmt::format("#"); + case Type::NZCVFlags: + return fmt::format("#"); + case Type::Cond: + return fmt::format("#", A32::CondToString(arg.GetCond())); + case Type::Table: + return fmt::format("#"); + case Type::AccType: + return fmt::format("#", u32(arg.GetAccType())); default: - return ""; + return fmt::format("", arg.GetType()); } }; diff --git a/src/dynarmic/src/dynarmic/ir/microinstruction.h b/src/dynarmic/src/dynarmic/ir/microinstruction.h index bc5a355793..6651aab7c5 100644 --- a/src/dynarmic/src/dynarmic/ir/microinstruction.h +++ b/src/dynarmic/src/dynarmic/ir/microinstruction.h @@ -19,7 +19,7 @@ namespace Dynarmic::IR { enum class Opcode; -enum class Type; +enum class Type : u16; constexpr size_t max_arg_count = 4; diff --git a/src/dynarmic/src/dynarmic/ir/opcodes.cpp b/src/dynarmic/src/dynarmic/ir/opcodes.cpp index e7e73b7032..828cdb5109 100644 --- a/src/dynarmic/src/dynarmic/ir/opcodes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opcodes.cpp @@ -16,12 +16,6 @@ namespace Dynarmic::IR { namespace OpcodeInfo { -struct Meta { - std::vector arg_types; - const char* name; - Type type; -}; - constexpr Type Void = Type::Void; constexpr Type A32Reg = Type::A32Reg; constexpr Type A32ExtReg = Type::A32ExtReg; @@ -40,36 +34,62 @@ constexpr Type Cond = Type::Cond; constexpr Type Table = Type::Table; constexpr Type AccType = Type::AccType; -alignas(64) static const std::array opcode_info{ -#define OPCODE(name, type, ...) Meta{{__VA_ARGS__}, #name, type}, -#define A32OPC(name, type, ...) Meta{{__VA_ARGS__}, #name, type}, -#define A64OPC(name, type, ...) Meta{{__VA_ARGS__}, #name, type}, +struct Meta { + std::vector arg_types; + Type type; +}; + +// Evil macro magic for Intel C++ compiler +// Helper macro to force expanding __VA_ARGS__ to satisfy MSVC compiler. +#define PP_EXPAND(x) x +#define PP_NARGS(...) PP_EXPAND(PP_ARG_N(__VA_ARGS__, 5, 4, 3, 2, 1, 0)) +#define PP_ARG_N(_1, _2, _3, _4, _5, N, ...) N + +alignas(64) static const Meta opcode_info[] = { +#define OPCODE(name, type, ...) Meta{{__VA_ARGS__}, type}, +#define A32OPC(name, type, ...) Meta{{__VA_ARGS__}, type}, +#define A64OPC(name, type, ...) Meta{{__VA_ARGS__}, type}, #include "./opcodes.inc" #undef OPCODE #undef A32OPC #undef A64OPC }; +// Be aware of trailing commas, they can cause PP_NARG to return 2! +static_assert(PP_EXPAND(PP_NARGS(u8,)) == 2); +static_assert(PP_EXPAND(PP_NARGS(u8)) == 1); +static_assert(PP_EXPAND(PP_NARGS(u8, u16)) == 2); +static_assert(PP_EXPAND(PP_NARGS(u8, u16, u32)) == 3); + } // namespace OpcodeInfo /// @brief Get return type of an opcode Type GetTypeOf(Opcode op) noexcept { - return OpcodeInfo::opcode_info.at(size_t(op)).type; + return OpcodeInfo::opcode_info[size_t(op)].type; } /// @brief Get the number of arguments an opcode accepts size_t GetNumArgsOf(Opcode op) noexcept { - return OpcodeInfo::opcode_info.at(size_t(op)).arg_types.size(); + return OpcodeInfo::opcode_info[size_t(op)].arg_types.size(); } /// @brief Get the required type of an argument of an opcode Type GetArgTypeOf(Opcode op, size_t arg_index) noexcept { - return OpcodeInfo::opcode_info.at(size_t(op)).arg_types.at(arg_index); + return OpcodeInfo::opcode_info[size_t(op)].arg_types[arg_index]; } /// @brief Get the name of an opcode. -std::string GetNameOf(Opcode op) noexcept { - return OpcodeInfo::opcode_info.at(size_t(op)).name; +std::string_view GetNameOf(Opcode op) noexcept { + static const std::string_view opcode_names[] = { +#define OPCODE(name, type, ...) #name, +#define A32OPC(name, type, ...) #name, +#define A64OPC(name, type, ...) #name, +#include "./opcodes.inc" +#undef OPCODE +#undef A32OPC +#undef A64OPC + }; + return opcode_names[size_t(op)]; } } // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opcodes.h b/src/dynarmic/src/dynarmic/ir/opcodes.h index c11ad549da..a231365fa7 100644 --- a/src/dynarmic/src/dynarmic/ir/opcodes.h +++ b/src/dynarmic/src/dynarmic/ir/opcodes.h @@ -15,7 +15,7 @@ namespace Dynarmic::IR { -enum class Type; +enum class Type : u16; /// @brief The Opcodes of our intermediate representation. /// Type signatures for each opcode can be found in opcodes.inc @@ -35,7 +35,7 @@ constexpr size_t OpcodeCount = static_cast(Opcode::NUM_OPCODE); Type GetTypeOf(Opcode op) noexcept; size_t GetNumArgsOf(Opcode op) noexcept; Type GetArgTypeOf(Opcode op, size_t arg_index) noexcept; -std::string GetNameOf(Opcode op) noexcept; +std::string_view GetNameOf(Opcode op) noexcept; /// @brief Determines whether or not this instruction performs an arithmetic shift. constexpr bool IsArithmeticShift(const Opcode op) noexcept { diff --git a/src/dynarmic/src/dynarmic/ir/type.h b/src/dynarmic/src/dynarmic/ir/type.h index 0aaf9d9414..e223513367 100644 --- a/src/dynarmic/src/dynarmic/ir/type.h +++ b/src/dynarmic/src/dynarmic/ir/type.h @@ -18,7 +18,7 @@ namespace Dynarmic::IR { /** * The intermediate representation is typed. These are the used by our IR. */ -enum class Type { +enum class Type : u16 { Void = 0, A32Reg = 1 << 0, A32ExtReg = 1 << 1, diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index 9498f86d9b..087ce54813 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -357,7 +357,7 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit, uni.ClearPageCache(); jit_env.ticks_left = ticks_left; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); uni_env.ticks_left = instructions.size(); // Unicorn counts thumb instructions weirdly. uni.Run(); @@ -445,6 +445,9 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit, } } + // TODO: Why the difference? QEMU what are you doing??? + jit.Regs()[15] = uni.GetRegisters()[15]; + REQUIRE(uni.GetRegisters() == jit.Regs()); REQUIRE(uni.GetExtRegs() == jit.ExtRegs()); REQUIRE((uni.GetCpsr() & 0xFFFFFDDF) == (jit.Cpsr() & 0xFFFFFDDF)); diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index dfd5672772..4d14141bbf 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -130,7 +130,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn
expected = {0x954d53b0, 0x4caaad40, 0xb0afaead, 0x0da0cdb6, 0x0f43507e, 0xb4b3b2b1, 0x00000066, 0x892a6888, 0x3b9ffb23, 0x0a92ef93, 0x38dee619, 0xc0e95e81, 0x6a448690, 0xc2d4d6b9, 0xe93600b9, 0x0000000a}; diff --git a/src/dynarmic/tests/A32/testenv.h b/src/dynarmic/tests/A32/testenv.h index b196c5e568..a6df2017ce 100644 --- a/src/dynarmic/tests/A32/testenv.h +++ b/src/dynarmic/tests/A32/testenv.h @@ -16,8 +16,8 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A32/a32.h" +#include "../native/testenv.h" template class A32TestEnv : public Dynarmic::A32::UserCallbacks { diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index 801b01d555..40eff1f071 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -28,7 +28,7 @@ TEST_CASE("A64: ADD", "[a64]") { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 3); REQUIRE(jit.GetRegister(1) == 1); @@ -54,7 +54,7 @@ TEST_CASE("A64: ADD{V,P}", "[a64]") { jit.SetPC(0); env.ticks_left = 7; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(1) == Vector{0x0000000000000008, 0x0000000000000000}); REQUIRE(jit.GetVector(2) == Vector{0x0000000000000010, 0x0000000000000000}); @@ -79,9 +79,8 @@ TEST_CASE("A64: CLZ", "[a64]") { jit.SetVector(0, {0xeff0fafbfcfdfeff, 0xff7f3f1f0f070301}); jit.SetVector(1, {0xfffcfffdfffeffff, 0x000F000700030001}); jit.SetVector(2, {0xfffffffdfffffffe, 0x0000000300000001}); - env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(3) == Vector{0x0, 0x0001020304050607}); REQUIRE(jit.GetVector(4) == Vector{0x0, 0x000c000d000e000f}); @@ -106,7 +105,7 @@ TEST_CASE("A64: UADDL{V,P}", "[a64]") { jit.SetPC(0); env.ticks_left = 7; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(1) == Vector{0x00000000000007f8, 0x0000000000000000}); REQUIRE(jit.GetVector(2) == Vector{0x0000000000000ff0, 0x0000000000000000}); @@ -134,7 +133,7 @@ TEST_CASE("A64: SADDL{V,P}", "[a64]") { jit.SetPC(0); env.ticks_left = 7; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(1) == Vector{0x000000000000fff8, 0x0000000000000000}); REQUIRE(jit.GetVector(2) == Vector{0x000000000000fff0, 0x0000000000000000}); @@ -165,7 +164,7 @@ TEST_CASE("A64: VQADD", "[a64]") { jit.SetPC(0); env.ticks_left = 9; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(2) == Vector{0xff8fff7ffffe7f7f, 0xffffffffffffffff}); REQUIRE(jit.GetVector(3) == Vector{0xff7f7e7fff7f7f7f, 0xffffffffffffffff}); @@ -198,7 +197,7 @@ TEST_CASE("A64: VQSUB", "[a64]") { jit.SetPC(0); env.ticks_left = 9; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(2) == Vector{0x0100800001000000, 0x0100000001000100}); REQUIRE(jit.GetVector(3) == Vector{0x8091808180008181, 0x8001010180018001}); @@ -225,7 +224,7 @@ TEST_CASE("A64: REV", "[a64]") { jit.SetPC(0); env.ticks_left = 3; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0x11ffeeddccbbaa); REQUIRE(jit.GetRegister(1) == 0xddccbbaa); @@ -245,7 +244,7 @@ TEST_CASE("A64: REV32", "[a64]") { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0xddccbbaa0011ffee); REQUIRE(jit.GetPC() == 4); } @@ -266,7 +265,7 @@ TEST_CASE("A64: REV16", "[a64]") { jit.SetPC(0); env.ticks_left = 3; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0xbbaaddccffee0011); REQUIRE(jit.GetRegister(1) == 0xbbaaddcc); REQUIRE(jit.GetPC() == 8); @@ -299,7 +298,7 @@ TEST_CASE("A64: SSHL", "[a64]") { jit.SetVector(17, {0x8000000000000000, 0xFFFFFFFFFFFFFFFF}); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(4) == Vector{0xfffffefcf8f0e0c0, 0x0080e0f0f8fcfeff}); CHECK(jit.GetVector(5) == Vector{0xf800f000e000c000, 0xfff0fff8fffcfffe}); @@ -344,7 +343,7 @@ TEST_CASE("A64: USHL", "[a64]") { jit.SetVector(17, {0x8000000000000000, 0x0000000000000001}); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(4) == Vector{0x003f000000000000, 0x0080e0f0f8fcfeff}); CHECK(jit.GetVector(14) == Vector{0x0000000102040810}); @@ -380,7 +379,7 @@ TEST_CASE("A64: URSHL", "[a64]") { jit.SetVector(11, Vector{0xffffffffffffffc1, 0x00555555555555f5}); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(0) == Vector{0x00000001'53500000, 0x00000001'00000000}); CHECK(jit.GetVector(3) == Vector{0x00000001'00000002, 0x80000000'fffffffe}); @@ -406,7 +405,7 @@ TEST_CASE("A64: XTN", "[a64]") { jit.SetVector(2, {0x0000000000000000, 0x1111111111111111}); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(3) == Vector{0x7766554433221100, 0x0000000000000000}); REQUIRE(jit.GetVector(4) == Vector{0x3333222211110000, 0x0000000000000000}); @@ -449,7 +448,7 @@ TEST_CASE("A64: TBL", "[a64]") { jit.SetPC(0); env.ticks_left = 9; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0) == Vector{0x001122334455'00'77, 0x0000000000000000}); REQUIRE(jit.GetVector(1) == Vector{0x001122334455'00'77, 0x8899aabbccddeeff}); @@ -497,7 +496,7 @@ TEST_CASE("A64: TBX", "[a64]") { jit.SetPC(0); env.ticks_left = 9; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0) == Vector{0x001122334455'FF'77, 0x0000000000000000}); REQUIRE(jit.GetVector(1) == Vector{0x001122334455'FF'77, 0x8899aabbccddeeff}); @@ -524,7 +523,7 @@ TEST_CASE("A64: AND", "[a64]") { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 1); REQUIRE(jit.GetRegister(1) == 1); @@ -546,7 +545,7 @@ TEST_CASE("A64: Bitmasks", "[a64]") { jit.SetPC(0); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0x01010101); REQUIRE(jit.GetRegister(1) == 0x00F000F0); @@ -570,7 +569,7 @@ TEST_CASE("A64: ANDS NZCV", "[a64]") { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0xFFFFFFFF); REQUIRE(jit.GetRegister(1) == 0xFFFFFFFF); @@ -586,7 +585,7 @@ TEST_CASE("A64: ANDS NZCV", "[a64]") { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0x00000000); REQUIRE(jit.GetRegister(1) == 0xFFFFFFFF); @@ -601,7 +600,7 @@ TEST_CASE("A64: ANDS NZCV", "[a64]") { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0x12240010); REQUIRE(jit.GetRegister(1) == 0x12345678); @@ -628,7 +627,7 @@ TEST_CASE("A64: CBZ", "[a64]") { jit.SetRegister(0, 1); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(2) == 1); REQUIRE(jit.GetPC() == 8); @@ -639,7 +638,7 @@ TEST_CASE("A64: CBZ", "[a64]") { jit.SetRegister(0, 0); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(2) == 2); REQUIRE(jit.GetPC() == 16); @@ -663,7 +662,7 @@ TEST_CASE("A64: TBZ", "[a64]") { jit.SetRegister(0, 0xFF); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(2) == 1); REQUIRE(jit.GetPC() == 8); @@ -674,7 +673,7 @@ TEST_CASE("A64: TBZ", "[a64]") { jit.SetRegister(0, 0); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(2) == 2); REQUIRE(jit.GetPC() == 16); @@ -685,7 +684,7 @@ TEST_CASE("A64: TBZ", "[a64]") { jit.SetRegister(0, 1); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(2) == 2); REQUIRE(jit.GetPC() == 16); @@ -706,7 +705,7 @@ TEST_CASE("A64: FABD", "[a64]") { jit.SetVector(21, {0x56d3f085ff890e2b, 0x6e4b0a41801a2d00}); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(22) == Vector{0x56d3f0857fc90e2b, 0x6e4b0a4144873176}); } @@ -728,7 +727,7 @@ TEST_CASE("A64: FABS", "[a64]") { jit.SetVector(2, {0xffffffffffffffff, 0x8000000000000000}); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(4) == Vector{0x7fff7fff7fff7fff, 0x7fff7fff7fff0000}); REQUIRE(jit.GetVector(5) == Vector{0x7fbfffff7fc00000, 0x7f80000000000000}); @@ -753,7 +752,7 @@ TEST_CASE("A64: FMIN (example)", "[a64]") { jit.SetVector(3, {0xbff0000000000000, 0x6e4b0a41ffffffff}); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0) == Vector{0x7fc00000'00000001, 0x00000000'7fd84a37}); REQUIRE(jit.GetVector(2) == Vector{0xbff0000000000000, 0x3ff0000000000000}); @@ -777,7 +776,7 @@ TEST_CASE("A64: FMAX (example)", "[a64]") { jit.SetVector(3, {0xbff0000000000000, 0x6e4b0a41ffffffff}); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0) == Vector{0x7fc00000'09503366, 0x6e4b0a41'7fd84a37}); REQUIRE(jit.GetVector(2) == Vector{0x7fc0000009503366, 0x6e4b0a41ffffffff}); @@ -801,7 +800,7 @@ TEST_CASE("A64: FMINNM (example)", "[a64]") { jit.SetVector(3, {0xfff0000000000000, 0xffffffffffffffff}); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0) == Vector{0xc1200000'00000001, 0x00000000'7fd84a37}); REQUIRE(jit.GetVector(2) == Vector{0xfff0000000000000, 0x3ff0000000000000}); @@ -825,7 +824,7 @@ TEST_CASE("A64: FMAXNM (example)", "[a64]") { jit.SetVector(3, {0xfff0000000000000, 0xffffffffffffffff}); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0) == Vector{0xc1200000'09503366, 0x6e4b0a41'7fd84a37}); REQUIRE(jit.GetVector(2) == Vector{0x7fc0000009503366, 0x3ff0000000000000}); @@ -846,7 +845,7 @@ TEST_CASE("A64: FMAXNM (example 2)", "[a64]") { jit.SetVector(27, {0xbc48d091'c79b271e, 0xff800001'3304c3ef}); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(29) == Vector{0xb485877c'42280000, 0xffc00001'3304c3ef}); } @@ -876,7 +875,7 @@ TEST_CASE("A64: 128-bit exclusive read/write", "[a64]") { jit.SetRegister(6, 0xd0d0cacad0d0caca); env.ticks_left = 3; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(1) == 0x7f7e7d7c7b7a7978); REQUIRE(jit.GetRegister(2) == 0x8786858483828180); @@ -903,7 +902,7 @@ TEST_CASE("A64: CNTPCT_EL0", "[a64]") { env.code_mem.emplace_back(0x14000000); // B . env.ticks_left = 10; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(3) == 7); } @@ -923,7 +922,7 @@ TEST_CASE("A64: FNMSUB 1", "[a64]") { jit.SetVector(2, {0x0000000000000000, 0xc79b271e3f000000}); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(28) == Vector{0x66ca513533ee6076, 0x0000000000000000}); } @@ -944,7 +943,7 @@ TEST_CASE("A64: FNMSUB 2", "[a64]") { jit.SetFpcr(0x00400000); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(14) == Vector{0x0000000080045284, 0x0000000000000000}); } @@ -965,7 +964,7 @@ TEST_CASE("A64: FMADD", "[a64]") { jit.SetFpcr(0x00400000); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(10) == Vector{0x3f059921bf0dbfff, 0x0000000000000000}); } @@ -992,7 +991,7 @@ TEST_CASE("A64: FMLA.4S(lane)", "[a64]") { jit.SetVector(15, {0x3ff00000'40000000, 0x40400000'40800000}); env.ticks_left = 5; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0) == Vector{0x40b4000040b40000, 0x4070000040700000}); REQUIRE(jit.GetVector(1) == Vector{0x40ac800040ac8000, 0x4061000040610000}); @@ -1017,7 +1016,7 @@ TEST_CASE("A64: FMUL.4S(lane)", "[a64]") { jit.SetVector(15, {0x3ff00000'40000000, 0x40400000'40800000}); env.ticks_left = 5; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0) == Vector{0x4070000040700000, 0x4070000040700000}); REQUIRE(jit.GetVector(1) == Vector{0x4061000040610000, 0x4061000040610000}); @@ -1041,7 +1040,7 @@ TEST_CASE("A64: FMLA.4S (denormal)", "[a64]") { jit.SetFpcr(0x01000000); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(12) == Vector{0x7ff800007fc00000, 0xbff0000068e8e581}); } @@ -1062,7 +1061,7 @@ TEST_CASE("A64: FMLA.4S (0x80800000)", "[a64]") { jit.SetFpcr(0x03000000); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(11) == Vector{0xc79b271e7fc00000, 0x7fc0000080000000}); } @@ -1086,7 +1085,7 @@ TEST_CASE("A64: FMADD (0x80800000)", "[a64]") { jit.SetFpcr(0x01000000); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(25) == Vector{0x80000000, 0}); } @@ -1106,7 +1105,7 @@ TEST_CASE("A64: FNEG failed to zero upper", "[a64]") { jit.SetFpcr(0x01000000); env.ticks_left = 6; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(28) == Vector{0x79ee7a03980db670, 0}); REQUIRE(FP::FPSR{jit.GetFpsr()}.QC() == false); @@ -1131,7 +1130,7 @@ TEST_CASE("A64: FRSQRTS", "[a64]") { jit.SetFpcr(0x00400000); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(13) == Vector{0xff7fffff, 0}); } @@ -1153,7 +1152,7 @@ TEST_CASE("A64: SQDMULH.8H (saturate)", "[a64]") { jit.SetFpsr(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0) == Vector{0x7ffe7fff7ffc7ffe, 0x8001800180028002}); REQUIRE(FP::FPSR{jit.GetFpsr()}.QC() == true); @@ -1176,7 +1175,7 @@ TEST_CASE("A64: SQDMULH.4S (saturate)", "[a64]") { jit.SetFpsr(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0) == Vector{0x7ffffffe7fffffff, 0x8000000180000001}); REQUIRE(FP::FPSR{jit.GetFpsr()}.QC() == true); @@ -1197,7 +1196,7 @@ TEST_CASE("A64: This is an infinite loop if fast dispatch is enabled", "[a64]") env.code_mem.emplace_back(0x14000000); // B . env.ticks_left = 6; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); } TEST_CASE("A64: EXTR", "[a64]") { @@ -1214,7 +1213,7 @@ TEST_CASE("A64: EXTR", "[a64]") { jit.SetRegister(24, 1); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(23) == 0); } @@ -1249,7 +1248,7 @@ TEST_CASE("A64: Isolated GetNZCVFromOp", "[a64]") { jit.SetPC(0); env.ticks_left = 20; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); } TEST_CASE("A64: Optimization failure when folding ADD", "[a64]") { @@ -1302,7 +1301,7 @@ TEST_CASE("A64: Optimization failure when folding ADD", "[a64]") { jit.SetPstate(0x30000000); env.ticks_left = 6; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0x46e15845dba57924); REQUIRE(jit.GetRegister(1) == 0x6f60d04350581fea); @@ -1365,7 +1364,7 @@ TEST_CASE("A64: Cache Maintenance Instructions", "[a64]") { env.code_mem.emplace_back(0x14000000); // B . env.ticks_left = 3; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); } TEST_CASE("A64: Memory access (fastmem)", "[a64]") { @@ -1408,7 +1407,7 @@ TEST_CASE("A64: Memory access (fastmem)", "[a64]") { jit.SetPstate(0x30000000); env.ticks_left = 5; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(strncmp(backing_memory + 0x100, backing_memory + 0x1F0, 23) == 0); } @@ -1428,7 +1427,7 @@ TEST_CASE("A64: SQRDMULH QC flag when output invalidated", "[a64]") { jit.SetFpcr(0x05400000); env.ticks_left = 3; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetFpsr() == 0x08000000); REQUIRE(jit.GetVector(11) == Vector{0xb4cb'4fec'8563'1032, 0x0000'0000'0000'0000}); @@ -1449,7 +1448,7 @@ TEST_CASE("A64: SDIV maximally", "[a64]") { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0xffffffffffffffff); REQUIRE(jit.GetRegister(1) == 0x8000000000000000); @@ -1540,7 +1539,7 @@ TEST_CASE("A64: rand1", "[a64]") { jit.SetFpcr(0x01080000); env.ticks_left = 16; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0x67e1d59cc30a788c); REQUIRE(jit.GetRegister(1) == 0x0e771a2a79dfb060); @@ -1575,15 +1574,67 @@ TEST_CASE("A64: rand1", "[a64]") { REQUIRE(jit.GetRegister(30) == 0x9a5d96aa066e5c39); } -TEST_CASE("A64: rand2", "[a64][.]") { - A64TestEnv env; - A64::UserConfig jit_user_config{}; - jit_user_config.callbacks = &env; - jit_user_config.fastmem_pointer = 0xffffffff00000000; - A64::Jit jit{jit_user_config}; +TEST_CASE("A64: rand3", "[a64]") { + constexpr size_t address_width = 12; + constexpr size_t memory_size = 1ull << address_width; // 4K + constexpr size_t page_size = 4 * 1024; + constexpr size_t buffer_size = 2 * page_size; + char buffer[buffer_size]; - env.code_mem = {0xea80f352, 0x6e65e59d, 0x1e20c343, 0x2e3a7192, 0x2e267249, 0xd500405f, 0x6f01f461, 0x6eb684fc, 0x58028edd, 0x0ea5f5b6, 0x0ea069fb, 0x2e769517, 0x5e066063, 0x1e65c3f5, 0x4f00ff52, 0x93401cf6, 0x1e274248, 0x6f67aaf5, 0x5e0c0782, 0x5ef43f3c, 0x2e6595b7, 0x4e20590f, 0xb35aa451, 0x6ee2c5ed, 0x4e32bf46, 0x2ea1ba8f, 0x2f68a85e, 0x9237d90a, 0x5e23dd10, 0x0e762e32, 0x4e31a8cf, 0xce1f3360, 0x781a4ac0, 0x13834066, 0x5fa8101c, 0x6f7c5594, 0x0e71bb68, 0xbc0b3e8f, 0x785dbbda, 0x6f51e794, 0xce50af75, 0x1ad728ec, 0x6ee0da4c, 0xb84efa14, 0x2eb3f613, 0x4e287ade, 0x4eb8c734, 0x2e83f4e8, 0x0e397c80, 0xd08f93f8, 0xce718e48, 0x0f672a0d, 0x2e9edd40, 0x0e14128b, 0x6f5942e6, 0x8b3a0f03, 0x3c5d16b9, 0x7f7e3743, 0x4f4c54e4, 0x0ea0a9e9, 0x9e59dbe6, 0x6e7ddcd3, 0xcec08377, 0x9ba759f8, 0x2ea5046e, 0x0e24c569, 0xb8979780, 0x4e31b98c, 0x4efe4f46, 0x4ea7c762, 0x7e61c9c6, 0x6e30c880, 0x1ada0c25, 0x4e603a2f, 0xda9d7218, 0x0d40c5d9, 0x5e214b05, 0x9ba9efc5, 0x5e61b81e, 0x6e7bc31c, 0x0e61a163, 0x9e5832d2, 0x4e772248, 0x4e3d17c8, 0x92624f60, 0x7a1a02dc, 0x79891f65, 0x6eb45036, 0x0e321ee8, 0x4e2566f0, 0x4ea02b9b, 0x0f9dcb3d, 0x2e21b9f9, 0x0e21a8c3, 0xda1700bd, 0x6ea0fb38, 0x7e607a0b, 0x72845817, 0x7f61068e, 0x0d60e529, 0x4ea0ca5c, 0x1a94b20f, 0x8b87419d, 0x7ea9ed71, 0x2ea1a86e, 0x4d40c4da, 0x5ea0eada, 0x784ba96e, 0x7eb6ee02, 0x3db1c710, 0x0e217836, 0x7ee0bb96, 0x4e786c08, 0x4e976a08, 0x489ffe86, 0x4e79fc9b, 0x0e21cbce, 0x5ef7fc65, 0x4ea1286d, 0xd29c771e, 0x6f5c2839, 0x0ea00a9d, 0x6ee44c06, 0x5ee1d858, 0x5ef2fda6, 0x7eb0c9fe, 0x7f762791, 0x2e212ae6, 0x4e61c9db, 0x13003c57, 0x5ee1b8f8, 0x0f2396d2, 0x6ea0db1e, 0x0e71ba82, 0xab29c807, 0x6ef8f8b3, 0x1f18d4a1, 0x0e261d15, 0x1e290081, 0x1b0c7d12, 0x4e7771c3, 0xf845f1e4, 0x4d40c9e8, 0xce778452, 0x6eb9879d, 0x6e21c93d, 0xcec0829f, 0x52a0969f, 0x1e772b4f, 0x7ee1da88, 0x5f52fe0a, 0x7f3387b1, 0x5e214850, 0x1e65c025, 0x0e2ca294, 0x2e614829, 0x1e640077, 0x9e240048, 0x4ebe9537, 0x9bb7925e, 0x38b669c5, 0x2840d089, 0x6f43e648, 0x2e662d28, 0x4eabaff3, 0x6e734cc7, 0x0e31baee, 0x7ee0d93c, 0x5e282bde, 0x7e21bba4, 0x4e6c75fa, 0x5ac01217, 0x7f4304af, 0x1e7878ed, 0x1ada2196, 0x7ee1aba3, 0x93407f3c, 0x4f6c34eb, 0x6e3447a9, 0x7e7ae545, 0x5e0802bb, 0x6eeae63a, 0x7ee1da62, 0x5e280bb3, 0xf81d4009, 0x1e603b21, 0x5e281a14, 0x6eb0a99b, 0x1e266a25, 0x0d60cafe, 0x0e0b6194, 0x7a4ed2c5, 0x92b762ec, 0x4e6b5749, 0x3c16a6e5, 0x4ea0a92b, 0x0fa58b6a, 0x5f76148c, 0x6e30c95f, 0x1e6540fd, 0x5e28e40f, 0x0d403fd4, 0x7e30da36, 0x7fda9b51, 0x2ea04bde, 0x1e25c3d2, 0x1ee0434c, 0x5e21d8e7, 0x5ee1ba51, 0x5e61aba9, 0x4e2849fb, 0x5ee098ea, 0x4e60f63d, 0x0f280443, 0x5ee0da27, 0x2e78a6ce, 0x78054afc, 0x4e14286b, 0x4e218bd8, 0x2a3d2551, 0x3a04017a, 0x5f4317cd, 0x0e604a37, 0x9a834614, 0x0e2edf4d, 0x7a51a0a0, 0x5f8e9043, 0x6ea06bb2, 0xaa2857dd, 0x7a1903fc, 0x301ba9ba, 0x9ac929cd, 0x4e061ff0, 0x2e38fcfc, 0x0e2f614a, 0x7ee0d8e4, 0x6e73afda, 0x7f4156f7, 0x0e6078bf, 0x4ee1d9ed, 0x93403fbe, 0xce6f8640, 0x4e3855e3, 0x6f76fe23, 0x112466e8, 0x1e358a90, 0x7f45272c, 0x6ea19a9d, 0x8a696350, 0x1e3900f6, 0x5e61c866, 0x0e3fbfd0, 0x5ee09ad0, 0x0e651d27, 0x4dffc35e, 0x2e20c6ce, 0x0fbe118d, 0x1e656a15, 0xd1357365, 0x0e20a847, 0xce4a835c, 0x4e203905, 0x2e60090d, 0x7f4a27bb, 0x1e64c316, 0xce7d86a4, 0x7ebded2d, 0x6e70a97e, 0x4eb9a42b, 0x0e209bef, 0x6f151730, 0x0e7e30f7, 0x4e724509, 0xd503375f, 0xce58b6ae, 0x5e21a9b8, 0xcb2ca538, 0x5ac01131, 0x6ea19a24, 0xeb40c8b3, 0xc8df7d65, 0x78108341, 0x3218ab9b, 0x0f3da7dd, 0x2e003089, 0x4e21cab5, 0x8aa5c924, 0x1a94950c, 0x123e506f, 0x13117e37, 0x1ee6005b, 0x5ac00647, 0x5eec8cd5, 0x7ef0fb3d, 0x9223272a, 0x5ee0cb02, 0x6e66071d, 0x6ea1dbbf, 0x5e61c903, 0x5ac015ea, 0x93db6206, 0x7e62b5e3, 0x6ea0c87b, 0xdac0090e, 0x48df7d90, 0x6e206ba5, 0x9e2503c2, 0x6e25fc89, 0x4d60e2db, 0x1e3e22a0, 0x2eb81c19, 0x7856ea00, 0x5fbfb22d, 0x1e630244, 0x4e202a83, 0x1f50a722, 0x7f7b55d2, 0x0fae89b9, 0x4e781d73, 0xce738c3a, 0x4f15a591, 0x6e21c7e1, 0x586ff77e, 0x8a5d3592, 0x93401c67, 0x5e61cb86, 0xce6bc2c1, 0x6e393f10, 0x9bb70ec3, 0xdac0098c, 0x4da84b95, 0x7f494476, 0x9ace5c11, 0x7e61ca14, 0x4f7a60ef, 0x1ad32b39, 0x0ea3777f, 0x5e61da7f, 0x4f1404e2, 0x4e3244e2, 0x6e1b1ceb, 0x0dee5aac, 0x4e2f9dc4, 0x5ea1b8c3, 0x1e59f863, 0xd500403f, 0x4e3ae7d0, 0x4ef5c6ea, 0x08dffe3b, 0x6e36f4f6, 0x2e764f29, 0x0e726f23, 0x5f42375b, 0x7f71fc40, 0x6e618aad, 0x93403e5b, 0x0e205976, 0x0e7250c4, 0x6eb0abc9, 0x2e2049f0, 0x5f14754d, 0x7f6ce468, 0x6f950bbe, 0x6e31aa47, 0x4eb83396, 0x0dccc952, 0x2ea1ca90, 0xce69c701, 0xb0bed69e, 0x7c5dec39, 0x4e2868a2, 0x0e591b08, 0x5f34e6dd, 0x3a449184, 0x5e3ce6de, 0x4ea149b7, 0x4e7ad29b, 0xba198503, 0x1f683e8f, 0xfa52f2a7, 0x6e30dffc, 0x4e6c3d17, 0x2eae3248, 0xd503349f, 0x1e60002c, 0x0f180680, 0x9e240049, 0x6f75774e, 0xa90d8678, 0x9ad924c4, 0x7eb0f85b, 0x0e205aaf, 0x7ee08899, 0x5f4bffd8, 0x1b0ff5f3, 0x4ee11dcd, 0x2e218948, 0x0dcb2733, 0x4eac107c, 0x4ea04a53, 0x4e287b44, 0x0e60b82a, 0x5ee0ebbc, 0xce454ff1, 0x5e1761e7, 0x5e09202f, 0x0e0c0754, 0x1e72e6b9, 0x7e21da70, 0x0fbdb20c, 0x5efb8c84, 0xd500401f, 0x3a47526e, 0x1e680acf, 0x7f7375fc, 0xf80522da, 0x4ee60c02, 0x4d40c2e7, 0x6f89096b, 0x7ee1bb6e, 0x5e280b4a, 0x1e3120c8, 0x7eb2ef96, 0x4fd012dd, 0x0f3027ef, 0x4e2078a8, 0xd503201f, 0x2e2312d9, 0x6ebf1c6e, 0x5ee1f8df, 0x4e607a46, 0x6e30c877, 0x6c09d2d1, 0x4e61abd8, 0x0e35267e, 0x6ac17728, 0x0e861aa0, 0x6f63fe26, 0x6f157628, 0x6f30a5f9, 0x4d60cc0c, 0x4e21cb59, 0x2e68a3fb, 0x7efae601, 0x6ea0f82c, 0x9b25ec12, 0x1a1a0305, 0x0e043fe1, 0x6e73c0ed, 0x6ea1b8c0, 0x7e20380b, 0x0f0534e8, 0x1f56bc7d, 0xba0c0128, 0x1e672160, 0x6e7b259b, 0x7ee07b5d, 0x9a820443, 0x4e040581, 0x2f1d87e8, 0x1acd2f5b, 0x6e20794f, 0x2e6a3c93, 0xc8dffe13, 0xce5ab1c6, 0x6eea55f6, 0x4ea039b3, 0x0d602fec, 0x2e246e2f, 0x7857be39, 0xb80608fb, 0x1e67c017, 0x9bcf7f63, 0x0f92d857, 0x5e0812f7, 0x1e210172, 0x7e6128e9, 0x7ea94d41, 0x981179e1, 0x1effb018, 0x2e600828, 0x0eb9c6b2, 0x6ee1baae, 0x4ea0db28, 0x2ea1487b, 0x4ea6c7f0, 0x2e2374c7, 0x7e30d8dd, 0xb9991fa7, 0x4e791e3e, 0x889f7c4b, 0x0e6c753c, 0x1e740ad1, 0x1e244324, 0x1ef33010, 0x5ac01102, 0x9bd97fba, 0x6e290143, 0x1e2220d8, 0x4d8d5aee, 0x6f28570b, 0xfa4ab0c1, 0xdac00b14, 0x7ea1a90e, 0x2e3027d8, 0x6f25a733, 0x4e61a96e, 0x4e1a2fcb, 0x0e22fe0a, 0xc8df7cd0, 0x5e280a55, 0x4e012b20, 0x7e70dbf4, 0x520c5a4e, 0x6ea6c57f, 0x0e861af8, 0xd503233f, 0x889ffe3c, 0x5e274ea9, 0x4e21a89a, 0x0e170c02, 0x6efd4c0b, 0xd5033ebf, 0x6e61a92c, 0x2e205b72, 0x789fb828, 0x0e626e94, 0x2ea6724c, 0x9a10028b, 0x2c6c51fc, 0x5a9de6b9, 0x6e6881f3, 0x5ee0ea6b, 0x0faec36e, 0x0e955bca, 0x1acf206d, 0x7f6f571b, 0x4e286930, 0x12b41ceb, 0x1e770b7a, 0x0ea18ac2, 0x5e282aaf, 0xf2b7fa1e, 0x1ac34311, 0x13167d11, 0x4ea63412, 0x6e758038, 0x2f1d85d6, 0x0f275480, 0x0ead6c71, 0x6e204b69, 0x1e6303f4, 0x5e0031ef, 0x13001e40, 0x7a16006f, 0x6e6ae4c0, 0x0f0f242f, 0x6e674f50, 0x4e606b7a, 0x7e6ee684, 0x1e6b5957, 0x7ea1bbab, 0x7ea0b6cb, 0xce4da241, 0x0ea1b953, 0x0eb2af4b, 0x9ac309d0, 0x6e61d8bd, 0x5ea0d890, 0x5f47d1e7, 0xfa5981ca, 0x1e7f7959, 0x6ef24dd8, 0x0e0a41d1, 0x5ee0e898, 0x4e6038e2, 0x13097d65, 0x6f839088, 0x9e290265, 0x0e208824, 0x2e65af79, 0x6f36a561, 0x9ad3204b, 0x0e21482e, 0x1e24431d, 0xd50330bf, 0x0df641aa, 0x6e602a83, 0xce30505f, 0x5e025238, 0xd503201f, 0x4e608880, 0x4de9c38d, 0x5e0f5348, 0x6eb48ca9, 0x50fda31b, 0x2e251eec, 0x7842ba50, 0xd8a1cd86, 0x2ea09862, 0x0ea09983, 0x2ea333b0, 0x0ea6032c, 0x4f94801b, 0x7e3ee57d, 0x38135e4f, 0xd8fdd9dd, 0x5ee0fcde, 0x9e64033d, 0x6e37f547, 0x6e3dd7ef, 0x13003f3d, 0x0e602f9f, 0x4e7ad014, 0x9b3b6857, 0x5ea0cb67, 0x0eb31c9f, 0x4e7c5372, 0x5e61b8c0, 0x0ea19b23, 0x0ee6e1df, 0x6e63a626, 0x2f139405, 0x7eb0f96d, 0x9e588c63, 0x2e714c3a, 0x6e8c941e, 0x0f61b331, 0x6f01f625, 0x4e78d4ea, 0x6f403709, 0x1a0300da, 0xda0102c8, 0x7e61d9fd, 0xb89469bb, 0x0c838780, 0x2e60a590, 0x4dfd29e1, 0x4e150f2e, 0xce2810bc, 0x5f541591, 0x9ee60259, 0x2eb40e56, 0x5e014027, 0x2ef71faf, 0x4e2d452f, 0x5ee0a813, 0x4eb03301, 0x38443acf, 0x6eabd502, 0x0e2ee71e, 0x5a960364, 0xce7ec596, 0x7efbed09, 0x4ef42ea2, 0x0eb30ea5, 0x5ee0d9f8, 0x6f513552, 0xf89eb3fa, 0x7ea2eca6, 0x9b00cc19, 0xf897409e, 0x1e73485f, 0x381afa77, 0x0f169f3b, 0x5ee1aa70, 0x5e1803ee, 0x0dbf5a4c, 0xce78c7a6, 0x9b0b260c, 0x2ef8fa19, 0x6e70aa4b, 0xce45b805, 0x2ea08e86, 0x4ee0bafd, 0x2ea09a1f, 0x4e218900, 0x6e744f13, 0xce518653, 0xf81b7a68, 0xce45ac5e, 0x7e62e416, 0x1a1b02b6, 0x7e21db48, 0x381daaaf, 0x6b2c0987, 0x0e2ec651, 0x4eae8502, 0x9bde7ca0, 0x6f47201f, 0x7e61a8a3, 0x6e60d5db, 0x4e2879de, 0xf81d194e, 0x4f1b8d05, 0x4d0048b2, 0x6e203be9, 0x4e3e7eb1, 0x0e260ef8, 0x2e688518, 0x7e3fec46, 0xdac00843, 0xf85c8917, 0x2e212a0f, 0x0e8196da, 0xd503359f, 0xce4c81f2, 0x6ee19992, 0x6e21ca79, 0x4d40c1d2, 0x4f5816ef, 0x4e34c3ea, 0x4df7c283, 0x7ef7eeb6, 0x18e276ce, 0xab0d21c0, 0xd5032f7f, 0x4ea00dbf, 0x5ac01251, 0xd0121955, 0x7f1495e4, 0x7ef0fa11, 0x5e24dd9c, 0x9add25b5, 0x0eb2bdef, 0x9e1977c7, 0x6f4b26bd, 0x0e200a9c, 0x9b4f7c00, 0x0ea0392e, 0x7e212a2c, 0x0b248b90, 0x1acc27a1, 0x2e701c90, 0x5ee1b870, 0x5e280aba, 0x5ea0780e, 0x1e264246, 0x4e052d04, 0x0e731dc4, 0xce461997, 0x9a9e9413, 0x3d462048, 0x5ea1fac5, 0x2ea0c8c4, 0x9a030280, 0x2ebda4b8, 0x5eef8614, 0x6eadc4e0, 0xbd035a8f, 0x4e606b84, 0x4eb1aba1, 0x4e286928, 0x4e2858cc, 0x9add0ce9, 0x4e070d65, 0x5fd399d5, 0x0f03fde7, 0x6ee90c74, 0x4ef8e31e, 0x381d986a, 0x5ea0ebf4, 0x5ea0d87e, 0x2e76ac9e, 0x6eb36cd4, 0x2e6e1c4c, 0x2e2feebc, 0x1ace4b03, 0x5ee0db12, 0x5ea0e9b1, 0x2e1c32d5, 0x5fa49a09, 0x0e258737, 0x7e21ca8e, 0xce4f9988, 0x5f7f56a6, 0x0e739766, 0x4e28586c, 0x6e619908, 0xd500401f, 0xf88b9252, 0x6e251c8e, 0x9e20015b, 0x7f1486b9, 0x717c339b, 0x1f31ff70, 0x4ea0eb62, 0x9acb0926, 0x489f7d85, 0x4e209b54, 0x2e84cf03, 0x2e65946c, 0x0e7d80cd, 0xc8dffecc, 0xce668bd8, 0x6e2188af, 0xeb4ada34, 0x2b25ec33, 0x0d40e6e7, 0x4eb2c757, 0x4ec82ad0, 0x7e21cb0a, 0x0e21a847, 0x4e0b1ec0, 0x381e6ac0, 0x6e61c8f5, 0x0f10071c, 0x2ee21daa, 0x5e61ab31, 0x6e218892, 0x2e7e7cb5, 0x6f2826aa, 0x7f6b54df, 0x4eaa2620, 0xdac00034, 0x4f6477be, 0x7e6148ea, 0x4eef1f57, 0x78459aeb, 0x2ebc3f10, 0x2e35f4eb, 0x4fbf19ce, 0xd8d0e58e, 0x2e21bbc7, 0x6ee0cab6, 0x9bc57e3f, 0x2f854037, 0x4e92181c, 0x6e6d1f89, 0x0f305545, 0x4ee19a57, 0x0e887bdf, 0x5e1a4185, 0x7ef0c821, 0x2eb6607c, 0x2ea0d9b8, 0x9e0380f4, 0x2ebf1c83, 0x1e62597d, 0x7f6e2548, 0x5ac00205, 0x4e616adb, 0xce638b8c, 0x5e1653cf, 0x2e6069be, 0x0e2ac641, 0x1e33c76f, 0xce44956d, 0x9bb90d31, 0x1e24c20a, 0x7ee038c1, 0x93407e5e, 0x4e280127, 0xc8df7f7d, 0xba42f263, 0x1e6f199c, 0x6e212889, 0x6e92f60e, 0x6ebdc499, 0x8b9acbf8, 0x4d40c581, 0x3a020250, 0x6e6a6716, 0x9248403b, 0x9081ffea, 0x4e603856, 0x9ad1242b, 0x6f270579, 0x1a070349, 0xcec08133, 0xd503305f, 0x5a1a00ca, 0x2e60b8a2, 0x0e5f28fd, 0x0e31a3da, 0x7e61cbc1, 0xd503399f, 0x5f5e54aa, 0x0eb8bdea, 0x4eba8f10, 0x4e2a2e60, 0x2f3da7d6, 0x1e58e297, 0x6e71aa3e, 0x6b86701a, 0xce4fa5e6, 0x4ee7c463, 0x8a79307f, 0x0ebea541, 0x2e218af4, 0x4e774f8a, 0xb9b95dc5, 0x6e61abd5, 0x4dd1e814, 0x4da72098, 0x98307582, 0x3a512101, 0x7ef95497, 0x1ace5535, 0x5a0c0349, 0x4e28581b, 0x6ebf1c02, 0x5ea1da23, 0x1e274314, 0x5e25dd29, 0x6e75f594, 0x6eaf6ed5, 0x4e214abe, 0x4e064172, 0x2e21c8f4, 0xf84c5b08, 0x1e244312, 0x14000000}; - env.code_mem.emplace_back(0x14000000); // B . + void* buffer_ptr = reinterpret_cast(buffer); + size_t buffer_size_nconst = buffer_size; + char* backing_memory = reinterpret_cast(std::align(page_size, memory_size, buffer_ptr, buffer_size_nconst)); + + A64FastmemTestEnv env{backing_memory}; + Dynarmic::A64::UserConfig config{}; + config.callbacks = &env; + config.fastmem_pointer = reinterpret_cast(backing_memory); + config.fastmem_address_space_bits = address_width; + config.recompile_on_fastmem_failure = false; + config.silently_mirror_fastmem = true; + config.processor_id = 0; + A64::Jit jit{config}; + memset(backing_memory, 0, memory_size); + + // cat rand2.txt | awk '{print "env.code_mem.emplace_back(0x"$2"); // "$0}' > rand2-out.txt + env.MemoryWrite32(100, 0x58028edd); // 0000000000000084 58028edd ldr x29, #20952 + env.MemoryWrite32(104, 0x14000000); // 0000000000000ea4 14000000 b #0 + + jit.SetPC(100); + jit.SetPstate(0xb0000000); + jit.SetFpcr(0x01000000); + env.ticks_left = 110; + //jit.DumpDisassembly(); + CheckedRun([&]() { jit.Run(); }); +} + +TEST_CASE("A64: rand2", "[a64][.]") { + constexpr size_t address_width = 12; + constexpr size_t memory_size = 1ull << address_width; // 4K + constexpr size_t page_size = 4 * 1024; + constexpr size_t buffer_size = 2 * page_size; + char buffer[buffer_size]; + + void* buffer_ptr = reinterpret_cast(buffer); + size_t buffer_size_nconst = buffer_size; + char* backing_memory = reinterpret_cast(std::align(page_size, memory_size, buffer_ptr, buffer_size_nconst)); + + A64FastmemTestEnv env{backing_memory}; + Dynarmic::A64::UserConfig config{}; + config.callbacks = &env; + config.fastmem_pointer = reinterpret_cast(backing_memory); + config.fastmem_address_space_bits = address_width; + config.recompile_on_fastmem_failure = false; + config.silently_mirror_fastmem = true; + config.processor_id = 0; + A64::Jit jit{config}; + memset(backing_memory, 0, memory_size); + + // cat rand2.txt | awk '{print "env.code_mem.emplace_back(0x"$2"); // "$0}' > rand2-out.txt + const std::array code32 = {0xea80f352, 0x6e65e59d, 0x1e20c343, 0x2e3a7192, 0x2e267249, 0xd500405f, 0x6f01f461, 0x6eb684fc, 0x58028edd, 0x0ea5f5b6, 0x0ea069fb, 0x2e769517, 0x5e066063, 0x1e65c3f5, 0x4f00ff52, 0x93401cf6, 0x1e274248, 0x6f67aaf5, 0x5e0c0782, 0x5ef43f3c, 0x2e6595b7, 0x4e20590f, 0xb35aa451, 0x6ee2c5ed, 0x4e32bf46, 0x2ea1ba8f, 0x2f68a85e, 0x9237d90a, 0x5e23dd10, 0x0e762e32, 0x4e31a8cf, 0xce1f3360, 0x781a4ac0, 0x13834066, 0x5fa8101c, 0x6f7c5594, 0x0e71bb68, 0xbc0b3e8f, 0x785dbbda, 0x6f51e794, 0xce50af75, 0x1ad728ec, 0x6ee0da4c, 0xb84efa14, 0x2eb3f613, 0x4e287ade, 0x4eb8c734, 0x2e83f4e8, 0x0e397c80, 0xd08f93f8, 0xce718e48, 0x0f672a0d, 0x2e9edd40, 0x0e14128b, 0x6f5942e6, 0x8b3a0f03, 0x3c5d16b9, 0x7f7e3743, 0x4f4c54e4, 0x0ea0a9e9, 0x9e59dbe6, 0x6e7ddcd3, 0xcec08377, 0x9ba759f8, 0x2ea5046e, 0x0e24c569, 0xb8979780, 0x4e31b98c, 0x4efe4f46, 0x4ea7c762, 0x7e61c9c6, 0x6e30c880, 0x1ada0c25, 0x4e603a2f, 0xda9d7218, 0x0d40c5d9, 0x5e214b05, 0x9ba9efc5, 0x5e61b81e, 0x6e7bc31c, 0x0e61a163, 0x9e5832d2, 0x4e772248, 0x4e3d17c8, 0x92624f60, 0x7a1a02dc, 0x79891f65, 0x6eb45036, 0x0e321ee8, 0x4e2566f0, 0x4ea02b9b, 0x0f9dcb3d, 0x2e21b9f9, 0x0e21a8c3, 0xda1700bd, 0x6ea0fb38, 0x7e607a0b, 0x72845817, 0x7f61068e, 0x0d60e529, 0x4ea0ca5c, 0x1a94b20f, 0x8b87419d, 0x7ea9ed71, 0x2ea1a86e, 0x4d40c4da, 0x5ea0eada, 0x784ba96e, 0x7eb6ee02, 0x3db1c710, 0x0e217836, 0x7ee0bb96, 0x4e786c08, 0x4e976a08, 0x489ffe86, 0x4e79fc9b, 0x0e21cbce, 0x5ef7fc65, 0x4ea1286d, 0xd29c771e, 0x6f5c2839, 0x0ea00a9d, 0x6ee44c06, 0x5ee1d858, 0x5ef2fda6, 0x7eb0c9fe, 0x7f762791, 0x2e212ae6, 0x4e61c9db, 0x13003c57, 0x5ee1b8f8, 0x0f2396d2, 0x6ea0db1e, 0x0e71ba82, 0xab29c807, 0x6ef8f8b3, 0x1f18d4a1, 0x0e261d15, 0x1e290081, 0x1b0c7d12, 0x4e7771c3, 0xf845f1e4, 0x4d40c9e8, 0xce778452, 0x6eb9879d, 0x6e21c93d, 0xcec0829f, 0x52a0969f, 0x1e772b4f, 0x7ee1da88, 0x5f52fe0a, 0x7f3387b1, 0x5e214850, 0x1e65c025, 0x0e2ca294, 0x2e614829, 0x1e640077, 0x9e240048, 0x4ebe9537, 0x9bb7925e, 0x38b669c5, 0x2840d089, 0x6f43e648, 0x2e662d28, 0x4eabaff3, 0x6e734cc7, 0x0e31baee, 0x7ee0d93c, 0x5e282bde, 0x7e21bba4, 0x4e6c75fa, 0x5ac01217, 0x7f4304af, 0x1e7878ed, 0x1ada2196, 0x7ee1aba3, 0x93407f3c, 0x4f6c34eb, 0x6e3447a9, 0x7e7ae545, 0x5e0802bb, 0x6eeae63a, 0x7ee1da62, 0x5e280bb3, 0xf81d4009, 0x1e603b21, 0x5e281a14, 0x6eb0a99b, 0x1e266a25, 0x0d60cafe, 0x0e0b6194, 0x7a4ed2c5, 0x92b762ec, 0x4e6b5749, 0x3c16a6e5, 0x4ea0a92b, 0x0fa58b6a, 0x5f76148c, 0x6e30c95f, 0x1e6540fd, 0x5e28e40f, 0x0d403fd4, 0x7e30da36, 0x7fda9b51, 0x2ea04bde, 0x1e25c3d2, 0x1ee0434c, 0x5e21d8e7, 0x5ee1ba51, 0x5e61aba9, 0x4e2849fb, 0x5ee098ea, 0x4e60f63d, 0x0f280443, 0x5ee0da27, 0x2e78a6ce, 0x78054afc, 0x4e14286b, 0x4e218bd8, 0x2a3d2551, 0x3a04017a, 0x5f4317cd, 0x0e604a37, 0x9a834614, 0x0e2edf4d, 0x7a51a0a0, 0x5f8e9043, 0x6ea06bb2, 0xaa2857dd, 0x7a1903fc, 0x301ba9ba, 0x9ac929cd, 0x4e061ff0, 0x2e38fcfc, 0x0e2f614a, 0x7ee0d8e4, 0x6e73afda, 0x7f4156f7, 0x0e6078bf, 0x4ee1d9ed, 0x93403fbe, 0xce6f8640, 0x4e3855e3, 0x6f76fe23, 0x112466e8, 0x1e358a90, 0x7f45272c, 0x6ea19a9d, 0x8a696350, 0x1e3900f6, 0x5e61c866, 0x0e3fbfd0, 0x5ee09ad0, 0x0e651d27, 0x4dffc35e, 0x2e20c6ce, 0x0fbe118d, 0x1e656a15, 0xd1357365, 0x0e20a847, 0xce4a835c, 0x4e203905, 0x2e60090d, 0x7f4a27bb, 0x1e64c316, 0xce7d86a4, 0x7ebded2d, 0x6e70a97e, 0x4eb9a42b, 0x0e209bef, 0x6f151730, 0x0e7e30f7, 0x4e724509, 0xd503375f, 0xce58b6ae, 0x5e21a9b8, 0xcb2ca538, 0x5ac01131, 0x6ea19a24, 0xeb40c8b3, 0xc8df7d65, 0x78108341, 0x3218ab9b, 0x0f3da7dd, 0x2e003089, 0x4e21cab5, 0x8aa5c924, 0x1a94950c, 0x123e506f, 0x13117e37, 0x1ee6005b, 0x5ac00647, 0x5eec8cd5, 0x7ef0fb3d, 0x9223272a, 0x5ee0cb02, 0x6e66071d, 0x6ea1dbbf, 0x5e61c903, 0x5ac015ea, 0x93db6206, 0x7e62b5e3, 0x6ea0c87b, 0xdac0090e, 0x48df7d90, 0x6e206ba5, 0x9e2503c2, 0x6e25fc89, 0x4d60e2db, 0x1e3e22a0, 0x2eb81c19, 0x7856ea00, 0x5fbfb22d, 0x1e630244, 0x4e202a83, 0x1f50a722, 0x7f7b55d2, 0x0fae89b9, 0x4e781d73, 0xce738c3a, 0x4f15a591, 0x6e21c7e1, 0x586ff77e, 0x8a5d3592, 0x93401c67, 0x5e61cb86, 0xce6bc2c1, 0x6e393f10, 0x9bb70ec3, 0xdac0098c, 0x4da84b95, 0x7f494476, 0x9ace5c11, 0x7e61ca14, 0x4f7a60ef, 0x1ad32b39, 0x0ea3777f, 0x5e61da7f, 0x4f1404e2, 0x4e3244e2, 0x6e1b1ceb, 0x0dee5aac, 0x4e2f9dc4, 0x5ea1b8c3, 0x1e59f863, 0xd500403f, 0x4e3ae7d0, 0x4ef5c6ea, 0x08dffe3b, 0x6e36f4f6, 0x2e764f29, 0x0e726f23, 0x5f42375b, 0x7f71fc40, 0x6e618aad, 0x93403e5b, 0x0e205976, 0x0e7250c4, 0x6eb0abc9, 0x2e2049f0, 0x5f14754d, 0x7f6ce468, 0x6f950bbe, 0x6e31aa47, 0x4eb83396, 0x0dccc952, 0x2ea1ca90, 0xce69c701, 0xb0bed69e, 0x7c5dec39, 0x4e2868a2, 0x0e591b08, 0x5f34e6dd, 0x3a449184, 0x5e3ce6de, 0x4ea149b7, 0x4e7ad29b, 0xba198503, 0x1f683e8f, 0xfa52f2a7, 0x6e30dffc, 0x4e6c3d17, 0x2eae3248, 0xd503349f, 0x1e60002c, 0x0f180680, 0x9e240049, 0x6f75774e, 0xa90d8678, 0x9ad924c4, 0x7eb0f85b, 0x0e205aaf, 0x7ee08899, 0x5f4bffd8, 0x1b0ff5f3, 0x4ee11dcd, 0x2e218948, 0x0dcb2733, 0x4eac107c, 0x4ea04a53, 0x4e287b44, 0x0e60b82a, 0x5ee0ebbc, 0xce454ff1, 0x5e1761e7, 0x5e09202f, 0x0e0c0754, 0x1e72e6b9, 0x7e21da70, 0x0fbdb20c, 0x5efb8c84, 0xd500401f, 0x3a47526e, 0x1e680acf, 0x7f7375fc, 0xf80522da, 0x4ee60c02, 0x4d40c2e7, 0x6f89096b, 0x7ee1bb6e, 0x5e280b4a, 0x1e3120c8, 0x7eb2ef96, 0x4fd012dd, 0x0f3027ef, 0x4e2078a8, 0xd503201f, 0x2e2312d9, 0x6ebf1c6e, 0x5ee1f8df, 0x4e607a46, 0x6e30c877, 0x6c09d2d1, 0x4e61abd8, 0x0e35267e, 0x6ac17728, 0x0e861aa0, 0x6f63fe26, 0x6f157628, 0x6f30a5f9, 0x4d60cc0c, 0x4e21cb59, 0x2e68a3fb, 0x7efae601, 0x6ea0f82c, 0x9b25ec12, 0x1a1a0305, 0x0e043fe1, 0x6e73c0ed, 0x6ea1b8c0, 0x7e20380b, 0x0f0534e8, 0x1f56bc7d, 0xba0c0128, 0x1e672160, 0x6e7b259b, 0x7ee07b5d, 0x9a820443, 0x4e040581, 0x2f1d87e8, 0x1acd2f5b, 0x6e20794f, 0x2e6a3c93, 0xc8dffe13, 0xce5ab1c6, 0x6eea55f6, 0x4ea039b3, 0x0d602fec, 0x2e246e2f, 0x7857be39, 0xb80608fb, 0x1e67c017, 0x9bcf7f63, 0x0f92d857, 0x5e0812f7, 0x1e210172, 0x7e6128e9, 0x7ea94d41, 0x981179e1, 0x1effb018, 0x2e600828, 0x0eb9c6b2, 0x6ee1baae, 0x4ea0db28, 0x2ea1487b, 0x4ea6c7f0, 0x2e2374c7, 0x7e30d8dd, 0xb9991fa7, 0x4e791e3e, 0x889f7c4b, 0x0e6c753c, 0x1e740ad1, 0x1e244324, 0x1ef33010, 0x5ac01102, 0x9bd97fba, 0x6e290143, 0x1e2220d8, 0x4d8d5aee, 0x6f28570b, 0xfa4ab0c1, 0xdac00b14, 0x7ea1a90e, 0x2e3027d8, 0x6f25a733, 0x4e61a96e, 0x4e1a2fcb, 0x0e22fe0a, 0xc8df7cd0, 0x5e280a55, 0x4e012b20, 0x7e70dbf4, 0x520c5a4e, 0x6ea6c57f, 0x0e861af8, 0xd503233f, 0x889ffe3c, 0x5e274ea9, 0x4e21a89a, 0x0e170c02, 0x6efd4c0b, 0xd5033ebf, 0x6e61a92c, 0x2e205b72, 0x789fb828, 0x0e626e94, 0x2ea6724c, 0x9a10028b, 0x2c6c51fc, 0x5a9de6b9, 0x6e6881f3, 0x5ee0ea6b, 0x0faec36e, 0x0e955bca, 0x1acf206d, 0x7f6f571b, 0x4e286930, 0x12b41ceb, 0x1e770b7a, 0x0ea18ac2, 0x5e282aaf, 0xf2b7fa1e, 0x1ac34311, 0x13167d11, 0x4ea63412, 0x6e758038, 0x2f1d85d6, 0x0f275480, 0x0ead6c71, 0x6e204b69, 0x1e6303f4, 0x5e0031ef, 0x13001e40, 0x7a16006f, 0x6e6ae4c0, 0x0f0f242f, 0x6e674f50, 0x4e606b7a, 0x7e6ee684, 0x1e6b5957, 0x7ea1bbab, 0x7ea0b6cb, 0xce4da241, 0x0ea1b953, 0x0eb2af4b, 0x9ac309d0, 0x6e61d8bd, 0x5ea0d890, 0x5f47d1e7, 0xfa5981ca, 0x1e7f7959, 0x6ef24dd8, 0x0e0a41d1, 0x5ee0e898, 0x4e6038e2, 0x13097d65, 0x6f839088, 0x9e290265, 0x0e208824, 0x2e65af79, 0x6f36a561, 0x9ad3204b, 0x0e21482e, 0x1e24431d, 0xd50330bf, 0x0df641aa, 0x6e602a83, 0xce30505f, 0x5e025238, 0xd503201f, 0x4e608880, 0x4de9c38d, 0x5e0f5348, 0x6eb48ca9, 0x50fda31b, 0x2e251eec, 0x7842ba50, 0xd8a1cd86, 0x2ea09862, 0x0ea09983, 0x2ea333b0, 0x0ea6032c, 0x4f94801b, 0x7e3ee57d, 0x38135e4f, 0xd8fdd9dd, 0x5ee0fcde, 0x9e64033d, 0x6e37f547, 0x6e3dd7ef, 0x13003f3d, 0x0e602f9f, 0x4e7ad014, 0x9b3b6857, 0x5ea0cb67, 0x0eb31c9f, 0x4e7c5372, 0x5e61b8c0, 0x0ea19b23, 0x0ee6e1df, 0x6e63a626, 0x2f139405, 0x7eb0f96d, 0x9e588c63, 0x2e714c3a, 0x6e8c941e, 0x0f61b331, 0x6f01f625, 0x4e78d4ea, 0x6f403709, 0x1a0300da, 0xda0102c8, 0x7e61d9fd, 0xb89469bb, 0x0c838780, 0x2e60a590, 0x4dfd29e1, 0x4e150f2e, 0xce2810bc, 0x5f541591, 0x9ee60259, 0x2eb40e56, 0x5e014027, 0x2ef71faf, 0x4e2d452f, 0x5ee0a813, 0x4eb03301, 0x38443acf, 0x6eabd502, 0x0e2ee71e, 0x5a960364, 0xce7ec596, 0x7efbed09, 0x4ef42ea2, 0x0eb30ea5, 0x5ee0d9f8, 0x6f513552, 0xf89eb3fa, 0x7ea2eca6, 0x9b00cc19, 0xf897409e, 0x1e73485f, 0x381afa77, 0x0f169f3b, 0x5ee1aa70, 0x5e1803ee, 0x0dbf5a4c, 0xce78c7a6, 0x9b0b260c, 0x2ef8fa19, 0x6e70aa4b, 0xce45b805, 0x2ea08e86, 0x4ee0bafd, 0x2ea09a1f, 0x4e218900, 0x6e744f13, 0xce518653, 0xf81b7a68, 0xce45ac5e, 0x7e62e416, 0x1a1b02b6, 0x7e21db48, 0x381daaaf, 0x6b2c0987, 0x0e2ec651, 0x4eae8502, 0x9bde7ca0, 0x6f47201f, 0x7e61a8a3, 0x6e60d5db, 0x4e2879de, 0xf81d194e, 0x4f1b8d05, 0x4d0048b2, 0x6e203be9, 0x4e3e7eb1, 0x0e260ef8, 0x2e688518, 0x7e3fec46, 0xdac00843, 0xf85c8917, 0x2e212a0f, 0x0e8196da, 0xd503359f, 0xce4c81f2, 0x6ee19992, 0x6e21ca79, 0x4d40c1d2, 0x4f5816ef, 0x4e34c3ea, 0x4df7c283, 0x7ef7eeb6, 0x18e276ce, 0xab0d21c0, 0xd5032f7f, 0x4ea00dbf, 0x5ac01251, 0xd0121955, 0x7f1495e4, 0x7ef0fa11, 0x5e24dd9c, 0x9add25b5, 0x0eb2bdef, 0x9e1977c7, 0x6f4b26bd, 0x0e200a9c, 0x9b4f7c00, 0x0ea0392e, 0x7e212a2c, 0x0b248b90, 0x1acc27a1, 0x2e701c90, 0x5ee1b870, 0x5e280aba, 0x5ea0780e, 0x1e264246, 0x4e052d04, 0x0e731dc4, 0xce461997, 0x9a9e9413, 0x3d462048, 0x5ea1fac5, 0x2ea0c8c4, 0x9a030280, 0x2ebda4b8, 0x5eef8614, 0x6eadc4e0, 0xbd035a8f, 0x4e606b84, 0x4eb1aba1, 0x4e286928, 0x4e2858cc, 0x9add0ce9, 0x4e070d65, 0x5fd399d5, 0x0f03fde7, 0x6ee90c74, 0x4ef8e31e, 0x381d986a, 0x5ea0ebf4, 0x5ea0d87e, 0x2e76ac9e, 0x6eb36cd4, 0x2e6e1c4c, 0x2e2feebc, 0x1ace4b03, 0x5ee0db12, 0x5ea0e9b1, 0x2e1c32d5, 0x5fa49a09, 0x0e258737, 0x7e21ca8e, 0xce4f9988, 0x5f7f56a6, 0x0e739766, 0x4e28586c, 0x6e619908, 0xd500401f, 0xf88b9252, 0x6e251c8e, 0x9e20015b, 0x7f1486b9, 0x717c339b, 0x1f31ff70, 0x4ea0eb62, 0x9acb0926, 0x489f7d85, 0x4e209b54, 0x2e84cf03, 0x2e65946c, 0x0e7d80cd, 0xc8dffecc, 0xce668bd8, 0x6e2188af, 0xeb4ada34, 0x2b25ec33, 0x0d40e6e7, 0x4eb2c757, 0x4ec82ad0, 0x7e21cb0a, 0x0e21a847, 0x4e0b1ec0, 0x381e6ac0, 0x6e61c8f5, 0x0f10071c, 0x2ee21daa, 0x5e61ab31, 0x6e218892, 0x2e7e7cb5, 0x6f2826aa, 0x7f6b54df, 0x4eaa2620, 0xdac00034, 0x4f6477be, 0x7e6148ea, 0x4eef1f57, 0x78459aeb, 0x2ebc3f10, 0x2e35f4eb, 0x4fbf19ce, 0xd8d0e58e, 0x2e21bbc7, 0x6ee0cab6, 0x9bc57e3f, 0x2f854037, 0x4e92181c, 0x6e6d1f89, 0x0f305545, 0x4ee19a57, 0x0e887bdf, 0x5e1a4185, 0x7ef0c821, 0x2eb6607c, 0x2ea0d9b8, 0x9e0380f4, 0x2ebf1c83, 0x1e62597d, 0x7f6e2548, 0x5ac00205, 0x4e616adb, 0xce638b8c, 0x5e1653cf, 0x2e6069be, 0x0e2ac641, 0x1e33c76f, 0xce44956d, 0x9bb90d31, 0x1e24c20a, 0x7ee038c1, 0x93407e5e, 0x4e280127, 0xc8df7f7d, 0xba42f263, 0x1e6f199c, 0x6e212889, 0x6e92f60e, 0x6ebdc499, 0x8b9acbf8, 0x4d40c581, 0x3a020250, 0x6e6a6716, 0x9248403b, 0x9081ffea, 0x4e603856, 0x9ad1242b, 0x6f270579, 0x1a070349, 0xcec08133, 0xd503305f, 0x5a1a00ca, 0x2e60b8a2, 0x0e5f28fd, 0x0e31a3da, 0x7e61cbc1, 0xd503399f, 0x5f5e54aa, 0x0eb8bdea, 0x4eba8f10, 0x4e2a2e60, 0x2f3da7d6, 0x1e58e297, 0x6e71aa3e, 0x6b86701a, 0xce4fa5e6, 0x4ee7c463, 0x8a79307f, 0x0ebea541, 0x2e218af4, 0x4e774f8a, 0xb9b95dc5, 0x6e61abd5, 0x4dd1e814, 0x4da72098, 0x98307582, 0x3a512101, 0x7ef95497, 0x1ace5535, 0x5a0c0349, 0x4e28581b, 0x6ebf1c02, 0x5ea1da23, 0x1e274314, 0x5e25dd29, 0x6e75f594, 0x6eaf6ed5, 0x4e214abe, 0x4e064172, 0x2e21c8f4, 0xf84c5b08, 0x1e244312, 0x14000000}; + for (size_t i = 0; i < code32.size(); ++i) + env.MemoryWrite32(100 + i, code32[i]); + env.ignore_invalid_insn = true; jit.SetRegister(0, 0x866524401a1d4e47); jit.SetRegister(1, 0x02ca8cec51301b60); @@ -1619,8 +1670,6 @@ TEST_CASE("A64: rand2", "[a64][.]") { jit.SetPC(100); jit.SetSP(0x000000cdfadeaff0); - env.code_mem_start_address = 100; - jit.SetVector(0, {0x4d5a180ac0ffdac8, 0xfc6eb113cd5ff2a8}); jit.SetVector(1, {0x39f8cecc9de9cefd, 0x3a6b35d333d89a6b}); jit.SetVector(2, {0x791fd8290bbdd2f4, 0xdc0e5e7aee311411}); @@ -1658,7 +1707,7 @@ TEST_CASE("A64: rand2", "[a64][.]") { jit.SetFpcr(0x01000000); env.ticks_left = 110; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0) == Vector{0x0101010211914707, 0x090000007fd9991a}); REQUIRE(jit.GetVector(1) == Vector{0x00000000fffffffe, 0x0000000000000000}); @@ -1730,7 +1779,7 @@ TEST_CASE("A64: SABD", "[a64]") { jit.SetVector(8, vectors[8]); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(0) == vectors[0]); CHECK(jit.GetVector(1) == vectors[1]); @@ -1747,7 +1796,7 @@ TEST_CASE("A64: SABD", "[a64]") { jit.SetVector(8, vectors[7]); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(0) == vectors[0]); CHECK(jit.GetVector(1) == vectors[1]); @@ -1769,7 +1818,7 @@ TEST_CASE("A64: UZP{1,2}.2D", "[a64]") { jit.SetVector(1, {0xA0A1A2A3A4A5A6A7, 0xB0B1B2B3B4B5B6B7}); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(2) == Vector{0xF0F1F2F3F4F5F6F7, 0xA0A1A2A3A4A5A6A7}); REQUIRE(jit.GetVector(3) == Vector{0xE0E1E2E3E4E5E6E7, 0xB0B1B2B3B4B5B6B7}); @@ -1792,7 +1841,7 @@ TEST_CASE("A64: UZP{1,2}.S", "[a64]") { jit.SetVector(1, {0xA4A5A6A7'A0A1A2A3, 0xB4B5B6B7'B0B1B2B3}); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(2) == Vector{0xA0A1A2A3'F0F1F2F3, 0}); REQUIRE(jit.GetVector(3) == Vector{0xA4A5A6A7'F4F5F6F7, 0}); @@ -1817,7 +1866,7 @@ TEST_CASE("A64: UZP{1,2}.H", "[a64]") { jit.SetVector(1, {0xA6A7'A4A5'A2A3'A0A1, 0xB6B7'B4B5'B2B3'B0B1}); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(2) == Vector{0xA4A5'A0A1'F4F5'F0F1, 0}); REQUIRE(jit.GetVector(3) == Vector{0xA6A7'A2A3'F6F7'F2F3, 0}); @@ -1842,7 +1891,7 @@ TEST_CASE("A64: UZP{1,2}.B", "[a64]") { jit.SetVector(1, {0xA7'A6'A5'A4'A3'A2'A1'A0, 0xB7'B6'B5'B4'B3'B2'B1'B0}); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(2) == Vector{0xA6'A4'A2'A0'F6'F4'F2'F0, 0}); REQUIRE(jit.GetVector(3) == Vector{0xA7'A5'A3'A1'F7'F5'F3'F1, 0}); @@ -1883,7 +1932,7 @@ TEST_CASE("A64: {S,U}MIN.S, {S,U}MAX.S", "[a64]") { jit.SetVector(1, vectors[1]); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(2) == vectors[2]); CHECK(jit.GetVector(3) == vectors[3]); @@ -1929,7 +1978,7 @@ TEST_CASE("A64: {S,U}MIN.H, {S,U}MAX.H", "[a64]") { jit.SetVector(1, vectors[1]); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(2) == vectors[2]); CHECK(jit.GetVector(3) == vectors[3]); @@ -1975,7 +2024,7 @@ TEST_CASE("A64: {S,U}MIN.B, {S,U}MAX.B", "[a64]") { jit.SetVector(1, vectors[1]); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(2) == vectors[2]); CHECK(jit.GetVector(3) == vectors[3]); @@ -2027,7 +2076,7 @@ TEST_CASE("A64: {S,U}MINP.S, {S,U}MAXP.S", "[a64]") { jit.SetVector(1, vectors[1]); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(2) == vectors[2]); CHECK(jit.GetVector(3) == vectors[3]); @@ -2046,7 +2095,7 @@ TEST_CASE("A64: {S,U}MINP.S, {S,U}MAXP.S", "[a64]") { jit.SetVector(1, vectors[11]); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(2) == vectors[2]); CHECK(jit.GetVector(3) == vectors[3]); @@ -2097,7 +2146,7 @@ TEST_CASE("A64: {S,U}MINP.H, {S,U}MAXP.H", "[a64]") { jit.SetVector(1, vectors[1]); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(2) == vectors[2]); CHECK(jit.GetVector(3) == vectors[3]); @@ -2116,7 +2165,7 @@ TEST_CASE("A64: {S,U}MINP.H, {S,U}MAXP.H", "[a64]") { jit.SetVector(1, vectors[11]); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(2) == vectors[2]); CHECK(jit.GetVector(3) == vectors[3]); @@ -2167,7 +2216,7 @@ TEST_CASE("A64: {S,U}MINP.B, {S,U}MAXP.B", "[a64]") { jit.SetVector(1, vectors[1]); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(2) == vectors[2]); CHECK(jit.GetVector(3) == vectors[3]); @@ -2189,7 +2238,7 @@ TEST_CASE("A64: {S,U}MINP.B, {S,U}MAXP.B", "[a64]") { jit.SetVector(1, vectors[11]); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(2) == vectors[2]); CHECK(jit.GetVector(3) == vectors[3]); @@ -2258,7 +2307,7 @@ TEST_CASE("A64: SQABS", "[a64]") { jit.SetVector(13, Vector{0x89C1B48FBC43F53B, 0x5FDD5D671D399E2}); env.ticks_left = env.code_mem.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); CHECK(jit.GetVector(0) == Vector{0x2B'7F'14'2A'77'32'7F'10, 0x63'16'7E'45'7F'33'42'04}); CHECK(FP::FPSR{(uint32_t)jit.GetRegister(0)}.QC() == 1); @@ -2278,3 +2327,61 @@ TEST_CASE("A64: SQABS", "[a64]") { CHECK(jit.GetVector(13) == Vector{0x763E4B7043BC0AC5, 0x5FDD5D671D399E2}); CHECK(FP::FPSR{(uint32_t)jit.GetRegister(13)}.QC() == 0); } + +TEST_CASE("A64: RBIT{16b}", "[a64]") { + A64TestEnv env; + A64::UserConfig conf{}; + conf.callbacks = &env; + A64::Jit jit{conf}; + env.code_mem.emplace_back(0x6e605841); // rbit v1.16b, v2.16b + env.code_mem.emplace_back(0x6e605822); // rbit v2.16b, v1.16b + env.code_mem.emplace_back(0x14000000); // b . + jit.SetVector(2, { 0xcafedead, 0xbabebeef }); + jit.SetPC(0); // at _start + env.ticks_left = 4; + CheckedRun([&]() { jit.Run(); }); + REQUIRE(jit.GetVector(1)[0] == 0x537f7bb5); + REQUIRE(jit.GetVector(1)[1] == 0x5d7d7df7); + REQUIRE(jit.GetVector(2)[0] == 0xcafedead); + REQUIRE(jit.GetVector(2)[1] == 0xbabebeef); +} + +TEST_CASE("A64: CLZ{X}", "[a64]") { + A64TestEnv env; + A64::UserConfig conf{}; + conf.callbacks = &env; + A64::Jit jit{conf}; + env.code_mem.emplace_back(0xdac01060); // clz x0, x3 + env.code_mem.emplace_back(0xdac01081); // clz x1, x4 + env.code_mem.emplace_back(0xdac010a2); // clz x2, x5 + env.code_mem.emplace_back(0x14000000); // b . + jit.SetRegister(3, 0xfffffffffffffff0); + jit.SetRegister(4, 0x0fffffff0ffffff0); + jit.SetRegister(5, 0x07fffffeffeffef0); + jit.SetPC(0); // at _start + env.ticks_left = 4; + CheckedRun([&]() { jit.Run(); }); + REQUIRE(jit.GetRegister(0) == 0); + REQUIRE(jit.GetRegister(1) == 4); + REQUIRE(jit.GetRegister(2) == 5); +} + +TEST_CASE("A64: CLZ{W}", "[a64]") { + A64TestEnv env; + A64::UserConfig conf{}; + conf.callbacks = &env; + A64::Jit jit{conf}; + env.code_mem.emplace_back(0x5ac01060); // clz w0, w3 + env.code_mem.emplace_back(0x5ac01081); // clz w1, w4 + env.code_mem.emplace_back(0x5ac010a2); // clz w2, w5 + env.code_mem.emplace_back(0x14000000); // b . + jit.SetRegister(3, 0xffff1110); + jit.SetRegister(4, 0x0fff1110); + jit.SetRegister(5, 0x07fffffe); + jit.SetPC(0); // at _start + env.ticks_left = 4; + CheckedRun([&]() { jit.Run(); }); + REQUIRE(jit.GetRegister(0) == 0); + REQUIRE(jit.GetRegister(1) == 4); + REQUIRE(jit.GetRegister(2) == 5); +} diff --git a/src/dynarmic/tests/A64/fibonacci.cpp b/src/dynarmic/tests/A64/fibonacci.cpp index cbb02d1b01..713a48cab7 100644 --- a/src/dynarmic/tests/A64/fibonacci.cpp +++ b/src/dynarmic/tests/A64/fibonacci.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include "dynarmic/common/common_types.h" @@ -23,7 +23,7 @@ namespace { class MyEnvironment final : public A64::UserCallbacks { public: u64 ticks_left = 0; - std::map memory{}; + std::unordered_map memory{}; u8 MemoryRead8(u64 vaddr) override { return memory[vaddr]; diff --git a/src/dynarmic/tests/A64/fp_min_max.cpp b/src/dynarmic/tests/A64/fp_min_max.cpp index 3d997d956d..d8b45db807 100644 --- a/src/dynarmic/tests/A64/fp_min_max.cpp +++ b/src/dynarmic/tests/A64/fp_min_max.cpp @@ -87,7 +87,7 @@ void run_test(u32 instruction, Fn fn) { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0)[0] == fn(test_case)); @@ -97,7 +97,7 @@ void run_test(u32 instruction, Fn fn) { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0)[0] == fn(test_case)); @@ -109,7 +109,7 @@ void run_test(u32 instruction, Fn fn) { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0)[0] == force_default_nan(fn(test_case))); @@ -119,7 +119,7 @@ void run_test(u32 instruction, Fn fn) { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0)[0] == force_default_nan(fn(test_case))); } @@ -136,7 +136,7 @@ void run_test(u32 instruction, Fn fn) { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0)[0] == fn(test_case)); @@ -148,7 +148,7 @@ void run_test(u32 instruction, Fn fn) { jit.SetPC(0); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(0)[0] == force_default_nan(fn(test_case))); } diff --git a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp index 8eda62f21e..885bf3c0e7 100644 --- a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp +++ b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp @@ -91,6 +91,9 @@ static u32 GenRandomInst(u64 pc, bool is_last_inst) { "MSR_reg", "MSR_imm", "MRS", + // Does not need test + "SVC", + "BRK" }; for (const auto& [fn, bitstring] : list) { @@ -198,9 +201,9 @@ static void RunTestInstance(Dynarmic::A64::Jit& jit, A64Unicorn& uni, A64TestEnv uni.ClearPageCache(); jit_env.ticks_left = instructions.size(); - jit.Run(); + CheckedRun([&]() { jit.Run(); }); - uni_env.ticks_left = instructions.size(); + uni_env.ticks_left = instructions.size() * 4; uni.Run(); SCOPE_FAIL { @@ -296,7 +299,7 @@ static void RunTestInstance(Dynarmic::A64::Jit& jit, A64Unicorn& uni, A64TestEnv return; } - REQUIRE(uni.GetPC() == jit.GetPC()); + REQUIRE(uni.GetPC() + 4 == jit.GetPC()); REQUIRE(uni.GetRegisters() == jit.GetRegisters()); REQUIRE(uni.GetVectors() == jit.GetVectors()); REQUIRE(uni.GetSP() == jit.GetSP()); @@ -306,7 +309,7 @@ static void RunTestInstance(Dynarmic::A64::Jit& jit, A64Unicorn& uni, A64TestEnv REQUIRE(FP::FPSR{uni.GetFpsr()}.QC() == FP::FPSR{jit.GetFpsr()}.QC()); } -TEST_CASE("A64: Single random instruction", "[a64]") { +TEST_CASE("A64: Single random instruction", "[a64][unicorn]") { A64TestEnv jit_env{}; A64TestEnv uni_env{}; @@ -333,7 +336,7 @@ TEST_CASE("A64: Single random instruction", "[a64]") { } } -TEST_CASE("A64: Floating point instructions", "[a64]") { +TEST_CASE("A64: Floating point instructions", "[a64][unicorn]") { A64TestEnv jit_env{}; A64TestEnv uni_env{}; @@ -458,7 +461,7 @@ TEST_CASE("A64: Floating point instructions", "[a64]") { } } -TEST_CASE("A64: Small random block", "[a64]") { +TEST_CASE("A64: Small random block", "[a64][unicorn]") { A64TestEnv jit_env{}; A64TestEnv uni_env{}; @@ -493,7 +496,7 @@ TEST_CASE("A64: Small random block", "[a64]") { } } -TEST_CASE("A64: Large random block", "[a64]") { +TEST_CASE("A64: Large random block", "[a64][unicorn]") { A64TestEnv jit_env{}; A64TestEnv uni_env{}; diff --git a/src/dynarmic/tests/A64/misaligned_page_table.cpp b/src/dynarmic/tests/A64/misaligned_page_table.cpp index 75ac41e06d..8235e14a67 100644 --- a/src/dynarmic/tests/A64/misaligned_page_table.cpp +++ b/src/dynarmic/tests/A64/misaligned_page_table.cpp @@ -24,7 +24,7 @@ TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_ac jit.SetRegister(0, 0x000000000b0afff8); env.ticks_left = 2; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); // If we don't crash we're fine. } diff --git a/src/dynarmic/tests/A64/real_world.cpp b/src/dynarmic/tests/A64/real_world.cpp new file mode 100644 index 0000000000..07532d95af --- /dev/null +++ b/src/dynarmic/tests/A64/real_world.cpp @@ -0,0 +1,102 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include + +#include "./testenv.h" +#include "dynarmic/interface/A64/a64.h" + +using namespace Dynarmic; +/* Following C program: +int M[64]; +int grob(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, int l) { + M[a] += M[b]; // TOTAL GCC DESTRUCTION + return a * b * c * d * e * f * g * h * i * j * k * l; +} +int _start() { + return grob( + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]), + grob(M[1], M[2], M[3], M[4], M[5], M[6], M[7], M[8], M[9], M[10], M[11], M[12]) + ); +} +#ifdef __x86_64__ +#include +int main() { + return printf("%i", start_e()); +} +#endif + +cat < a64-linker.ld >> EOF +ENTRY(_start); +PHDRS { text PT_LOAD; rodata PT_LOAD; data PT_LOAD; } +SECTIONS { + . = 0; + .text : { *(.text .text.*) } :text + .rodata : { *(.rodata .rodata.*) } :rodata + .data : ALIGN(CONSTANT(MAXPAGESIZE)) { *(.data .data.*) } :data + .bss : { *(.bss .bss.*) *(COMMON) } :data + /DISCARD/ : { *(.eh_frame*) *(.note .note.*) } +} +EOF +aarch64-linux-gnu-gcc -Wl,-Ta64-linker.ld -Wall -Wextra -ffreestanding -nostdlib -fno-whole-program -O2 grob.c -o grob | aarch64-linux-gnu-objdump -SC grob | awk '{print "env.code_mem.emplace_back(0x"$2"); //" $0}' +aarch64-linux-gnu-gcc -Wl,-Ta64-linker.ld -Wall -Wextra -ffreestanding -nostdlib -fno-whole-program -O2 grob.c -o grob | aarch64-linux-gnu-objdump -SC grob | awk '{print $2", "}' +*/ +TEST_CASE("high register pressure proper handling with block linking 1", "[a64][c]") { + A64TestEnv env; + A64::UserConfig conf{}; + conf.callbacks = &env; + A64::Jit jit{conf}; + + REQUIRE(conf.HasOptimization(OptimizationFlag::BlockLinking)); + env.code_mem = { 0x90000008, 0x91230108, 0xb860d909, 0xb861d90a, 0x0b0a0129, 0xb820d909, 0x1b017c00, 0xb94003e1, 0x1b027c00, 0x1b037c00, 0x1b047c00, 0x1b057c00, 0x1b067c00, 0x1b077c00, 0x1b017c00, 0xb9400be1, 0x1b017c00, 0xb94013e1, 0x1b017c00, 0xb9401be1, 0x1b017c00, 0xd65f03c0, 0xd503201f, 0xd503201f, 0xa9a27bfd, 0x90000000, 0x91230000, 0x910003fd, 0xa90153f3, 0xa9025bf5, 0xa90363f7, 0xa9046bf9, 0xa90573fb, 0x29408c01, 0x2941b40e, 0xb863d804, 0xb861d802, 0x2942ac0c, 0x0b040042, 0x1b037c24, 0x2943a40a, 0x29449c08, 0x1b0e7c84, 0x29459406, 0xb821d802, 0x1b0d7c84, 0x29408c01, 0x2941b40e, 0x1b0c7c84, 0x1b0b7c84, 0x2942ac0c, 0x1b0a7c84, 0x1b097c84, 0x2943a40a, 0x1b087c84, 0x1b077c84, 0x29449c08, 0xb863d80f, 0x1b037c23, 0x1b067c84, 0xb861d802, 0x0b0f0042, 0x1b0e7c63, 0x1b057c84, 0x29459406, 0xb821d802, 0x1b0d7c63, 0x2943f002, 0x2940d801, 0x1b0c7c63, 0x2941e81b, 0x2942e019, 0x1b0b7c63, 0xb90067e2, 0x1b0a7c63, 0x1b097c63, 0x1b087c63, 0x1b077c63, 0x1b067c63, 0x1b057c63, 0x29449402, 0x290d17e2, 0xb876d805, 0xb861d802, 0x29459c06, 0x0b050042, 0xb821d802, 0x1b167c21, 0x290e1fe6, 0x2940d40c, 0x1b1b7c21, 0x2941a408, 0x290f27e8, 0x2942ac0a, 0x1b1a7c21, 0xb86cd802, 0xb875d805, 0x29102fea, 0x0b050042, 0x1b197c21, 0x2943b80d, 0x29113bed, 0x2944c00f, 0x291243ef, 0x1b187c21, 0x2945c811, 0xb82cd802, 0x29134bf1, 0x1b157d8c, 0x2941f813, 0x2940d00b, 0x29147bf3, 0x29429402, 0x291517e2, 0x29439c06, 0x29161fe6, 0x2944a408, 0xb874d805, 0xb86bd802, 0x291727e8, 0x0b050042, 0x2945b80a, 0xb82bd802, 0x29183bea, 0x1b147d6b, 0x2941c00f, 0x2940cc0a, 0x291943ef, 0x2942c811, 0x291a4bf1, 0x2943881e, 0x291b0bfe, 0x29449805, 0x291c1be5, 0x2945a007, 0x291d23e7, 0xb86ad802, 0xb873d805, 0x0b050042, 0xb82ad802, 0x1b137d4a, 0x2941b80d, 0x2940f809, 0x291e3bed, 0x2942c00f, 0x291f43ef, 0x2943c811, 0xb90103f1, 0xb90107f2, 0x29449402, 0xb9010be2, 0xb9010fe5, 0xb869d802, 0xb87ed805, 0x29459c06, 0x0b050042, 0xb829d802, 0x1b1e7d29, 0xb90113e6, 0xb90117e7, 0x2941bc0e, 0x2940c808, 0xb9011bee, 0xb9011fef, 0x2942c410, 0xb90123f0, 0xb90127f1, 0x29439402, 0xb9012be2, 0xb9012fe5, 0xb868d802, 0x29449c06, 0xb90133e6, 0xb90137e7, 0xb872d805, 0x2945b80d, 0x0b050042, 0xb828d802, 0x1b127d08, 0xb9013bed, 0xb9013fee, 0x2941c00f, 0x2940c407, 0xb90143ef, 0xb90147f0, 0x29429402, 0xb9014be2, 0xb9014fe5, 0x2943b806, 0xb90153e6, 0xb90157ee, 0x2944c00f, 0xb9015bef, 0xb9015ff0, 0x29459402, 0xb90163e2, 0xb867d802, 0xb90167e5, 0xb871d805, 0x0b050042, 0xb827d802, 0x1b117ce7, 0x2940c002, 0x2941b406, 0xb9016be6, 0xb9016fed, 0x2942bc0e, 0xb90173ee, 0xb90177ef, 0x29439805, 0xb9017be5, 0xb9017fe6, 0x2944bc0e, 0xb90183ee, 0xb90187ef, 0x29459805, 0xb9018be5, 0xb862d805, 0xb9018fe6, 0xb870d806, 0x0b0600a5, 0xb822d805, 0x1b107c42, 0x2941b80d, 0x2940bc06, 0xb90193ed, 0xb90197ee, 0x2942b805, 0xb9019be5, 0xb9019fee, 0x2943b405, 0xb901a3e5, 0xb901a7ed, 0xb86fd80d, 0x2944940e, 0xb901abee, 0xb901afe5, 0x2945940e, 0xb901b7e5, 0xb866d805, 0xb901b3ee, 0x0b0d00a5, 0xb826d805, 0x2941dc0d, 0x2940b805, 0xb901bbed, 0xb901bff7, 0x2942b417, 0xb901c3f7, 0xb901c7ed, 0x2943b417, 0xb901cbf7, 0xb901cfed, 0x2944b417, 0xb901d3f7, 0xb901d7ed, 0x2945b417, 0xb901dbf7, 0xb86ed817, 0xb901dfed, 0xb865d80d, 0x0b1701ad, 0xb825d80d, 0xb863d817, 0x1b047c63, 0xb864d80d, 0x0b1701ad, 0xb824d80d, 0xb94067e0, 0xb9408bed, 0x1b007c21, 0xb9406be0, 0x1b1c7c21, 0x1b007c21, 0xb9406fe0, 0x1b007c21, 0xb94073e0, 0x1b007c21, 0xb94077e0, 0x1b007c21, 0xb9407be0, 0x1b007d8c, 0xb9407fe0, 0x1b037c21, 0x1b007d8c, 0xb94083e0, 0x1b007d8c, 0xb94087e0, 0x1b007d8c, 0xb9408fe0, 0x1b0d7d8c, 0xb940f3ed, 0x1b007d8c, 0xb94093e0, 0x1b0d7d29, 0x1b007d8c, 0xb94097e0, 0x1b007d8c, 0xb9409be0, 0x1b007d8c, 0xb9409fe0, 0x1b007d8c, 0xb940a3e0, 0x1b007d6b, 0xb940a7e0, 0x1b0c7c21, 0x1b007d6b, 0xb940abe0, 0x1b007d6b, 0xb940afe0, 0x1b007d6b, 0xb940b3e0, 0x1b007d6b, 0xb940b7e0, 0x1b007d6b, 0xb940bbe0, 0xb9413bed, 0x1b007d6b, 0xb940bfe0, 0x1b007d6b, 0xb940c3e0, 0x1b007d6b, 0xb940c7e0, 0x1b007d6b, 0xb940cbe0, 0x1b007d4a, 0xb940cfe0, 0x1b0b7c21, 0x1b007d4a, 0xb940d3e0, 0x1b007d4a, 0xb940d7e0, 0x1b007d4a, 0xb940dbe0, 0x1b007d4a, 0xb940dfe0, 0x1b007d4a, 0xb940e3e0, 0x1b007d4a, 0xb940e7e0, 0x1b007d4a, 0xb940ebe0, 0x1b007d4a, 0xb940efe0, 0x1b007d4a, 0xb940f7e0, 0x1b007d29, 0xb940fbe0, 0x1b0a7c21, 0x1b007d29, 0x295f8fe0, 0x1b007d20, 0x1b037c00, 0xb94107e3, 0x1b037c00, 0xb9410be3, 0x1b037c00, 0xb9410fe3, 0x1b037c00, 0xb94113e3, 0x1b037c00, 0xb94117e3, 0x1b037c00, 0x1b007c21, 0xb9411be0, 0x1b007d08, 0xb9411fe0, 0x1b007d08, 0xb94123e0, 0x1b007d08, 0xb94127e0, 0x1b007d08, 0xb9412be0, 0x1b007d08, 0xb9412fe0, 0x1b007d08, 0xb94133e0, 0x1b007d08, 0xb94137e0, 0x1b007d08, 0xb9413fe0, 0xb941bff7, 0xa94153f3, 0x1b0d7d08, 0xb9416fed, 0xa9425bf5, 0xa9446bf9, 0x1b007d08, 0xb94143e0, 0x1b087c21, 0x1b007ce7, 0xb94147e0, 0x1b007ce7, 0xb9414be0, 0x1b007ce7, 0xb9414fe0, 0x1b007ce7, 0xb94153e0, 0x1b007ce7, 0xb94157e0, 0x1b007ce7, 0xb9415be0, 0x1b007ce7, 0xb9415fe0, 0x1b007ce7, 0xb94163e0, 0x1b007ce7, 0xb94167e0, 0x1b007ce7, 0xb9416be0, 0x1b007c42, 0xb94173e0, 0x1b077c21, 0x1b0d7c42, 0xb94193ed, 0x1b007c42, 0xb94177e0, 0x1b007c42, 0xb9417be0, 0x1b007c42, 0xb9417fe0, 0x1b007c42, 0xb94183e0, 0x1b007c40, 0xb94187e2, 0x1b027c00, 0xb9418be2, 0x1b027c00, 0xb9418fe2, 0x1b027c00, 0xb94197e2, 0x1b007c20, 0x1b0f7cc1, 0x1b0d7c21, 0xb941a7ed, 0x1b027c21, 0xb9419be2, 0x1b027c21, 0xb9419fe2, 0xa94573fb, 0x1b027c21, 0xb941a3e2, 0x1b027c21, 0xb941abe2, 0x1b0d7c21, 0xb941bbed, 0x1b027c21, 0xb941afe2, 0x1b027c21, 0xb941b3e2, 0x1b027c21, 0xb941b7e2, 0x1b027c21, 0x1b017c01, 0x1b0e7ca0, 0x1b0d7c00, 0xb941c7ed, 0x1b177c00, 0xb941c3f7, 0x1b177c00, 0xb941cbf7, 0x1b0d7c00, 0xb941cfed, 0x1b177c00, 0xb941d3f7, 0x1b0d7c00, 0xb941d7ed, 0x1b177c00, 0xb941dbf7, 0x1b0d7c00, 0xb941dfed, 0x1b177c00, 0xa94363f7, 0xa8de7bfd, 0x1b0d7c00, 0x1b007c20, 0x14000000 }; + jit.SetPC(0x60); // at _start + env.ticks_left = 4; + CheckedRun([&]() { jit.Run(); }); + REQUIRE(jit.GetRegister(0) == 0); +} + +/* +Following C program: +extern int printf(const char*, ...); +int square(int num) { + return (num > 10) ? printf((void*)(num - 10)) : num * num; +} +*/ +TEST_CASE("Block branching (unpredictable)", "[a64][c]") { + A64TestEnv env; + A64::UserConfig conf{}; + conf.callbacks = &env; + //conf.very_verbose_debugging_output = true; + A64::Jit jit{conf}; + REQUIRE(conf.HasOptimization(OptimizationFlag::BlockLinking)); + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + { + using namespace oaknut::util; + oaknut::Label lb0_2, lb_printf, lb_hlt; + code.ADD(W0, W0, 11); + code.CMP(W0, 11); + code.B(LT, lb0_2); + code.SUB(W0, W0, 10); + code.B(lb_printf); + code.l(lb0_2); + code.MUL(W0, W0, W0); + code.l(lb_hlt); + code.B(lb_hlt); + code.l(lb_printf); + code.RET(); + } + jit.SetPC(0); // at _start + env.ticks_left = env.code_mem.size(); + CheckedRun([&]() { jit.Run(); }); +} diff --git a/src/dynarmic/tests/A64/test_invalidation.cpp b/src/dynarmic/tests/A64/test_invalidation.cpp index cba47dd8ca..168043c1cb 100644 --- a/src/dynarmic/tests/A64/test_invalidation.cpp +++ b/src/dynarmic/tests/A64/test_invalidation.cpp @@ -27,38 +27,38 @@ TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have jit.SetPC(100); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 42); jit.SetPC(100); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 42); jit.InvalidateCacheRange(108, 4); jit.SetPC(100); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 42); env.code_mem[2] = 0xd28008a0; // MOV X0, 69 jit.SetPC(100); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 42); jit.InvalidateCacheRange(108, 4); jit.SetPC(100); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 69); jit.SetPC(100); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 69); } @@ -77,37 +77,37 @@ TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have jit.SetPC(0); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 42); jit.SetPC(0); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 42); jit.InvalidateCacheRange(8, 4); jit.SetPC(0); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 42); env.code_mem[2] = 0xd28008a0; // MOV X0, 69 jit.SetPC(0); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 42); jit.InvalidateCacheRange(8, 4); jit.SetPC(0); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 69); jit.SetPC(0); env.ticks_left = 4; - jit.Run(); + CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 69); } diff --git a/src/dynarmic/tests/A64/testenv.h b/src/dynarmic/tests/A64/testenv.h index 2c5a500f75..31e338b138 100644 --- a/src/dynarmic/tests/A64/testenv.h +++ b/src/dynarmic/tests/A64/testenv.h @@ -8,13 +8,11 @@ #pragma once -#include -#include - +#include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A64/a64.h" +#include "../native/testenv.h" using Vector = Dynarmic::A64::Vector; @@ -26,7 +24,7 @@ public: u64 code_mem_start_address = 0; std::vector code_mem; - std::map modified_memory; + std::unordered_map modified_memory; std::vector interrupts; bool IsInCodeMem(u64 vaddr) const { @@ -133,9 +131,9 @@ class A64FastmemTestEnv final : public Dynarmic::A64::UserCallbacks { public: u64 ticks_left = 0; char* backing_memory = nullptr; + bool ignore_invalid_insn = false; - explicit A64FastmemTestEnv(char* addr) - : backing_memory(addr) {} + explicit A64FastmemTestEnv(char* addr) : backing_memory(addr) {} template T read(u64 vaddr) { @@ -205,7 +203,7 @@ public: return true; } - void InterpreterFallback(u64 pc, size_t num_instructions) override { ASSERT_MSG(false, "InterpreterFallback({:016x}, {})", pc, num_instructions); } + void InterpreterFallback(u64 pc, size_t num_instructions) override { ASSERT_MSG(ignore_invalid_insn, "InterpreterFallback({:016x}, {})", pc, num_instructions); } void CallSVC(std::uint32_t swi) override { ASSERT_MSG(false, "CallSVC({})", swi); } diff --git a/src/dynarmic/tests/A64/verify_unicorn.cpp b/src/dynarmic/tests/A64/verify_unicorn.cpp index 5ffe4f15b0..0c0ccc1609 100644 --- a/src/dynarmic/tests/A64/verify_unicorn.cpp +++ b/src/dynarmic/tests/A64/verify_unicorn.cpp @@ -13,7 +13,7 @@ using namespace Dynarmic; -TEST_CASE("Unicorn: Sanity test", "[a64]") { +TEST_CASE("Unicorn: Sanity test", "[a64][unicorn]") { A64TestEnv env; env.code_mem.emplace_back(0x8b020020); // ADD X0, X1, X2 @@ -39,7 +39,7 @@ TEST_CASE("Unicorn: Sanity test", "[a64]") { REQUIRE(unicorn.GetPC() == 4); } -TEST_CASE("Unicorn: Ensure 0xFFFF'FFFF'FFFF'FFFF is readable", "[a64]") { +TEST_CASE("Unicorn: Ensure 0xFFFF'FFFF'FFFF'FFFF is readable", "[a64][unicorn]") { A64TestEnv env; env.code_mem.emplace_back(0x385fed99); // LDRB W25, [X12, #0xfffffffffffffffe]! @@ -59,7 +59,7 @@ TEST_CASE("Unicorn: Ensure 0xFFFF'FFFF'FFFF'FFFF is readable", "[a64]") { REQUIRE(unicorn.GetPC() == 4); } -TEST_CASE("Unicorn: Ensure is able to read across page boundaries", "[a64]") { +TEST_CASE("Unicorn: Ensure is able to read across page boundaries", "[a64][unicorn]") { A64TestEnv env; env.code_mem.emplace_back(0xb85f93d9); // LDUR W25, [X30, #0xfffffffffffffff9] diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index b56f884c38..85d86c7966 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -29,6 +29,7 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS) A64/fp_min_max.cpp A64/misaligned_page_table.cpp A64/test_invalidation.cpp + A64/real_world.cpp A64/testenv.h ) endif() @@ -66,11 +67,14 @@ endif() if ("x86_64" IN_LIST ARCHITECTURE) target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak) - target_architecture_specific_sources(dynarmic_tests "x86_64" x64_cpu_info.cpp ) + target_architecture_specific_sources(dynarmic_tests "x86_64" + native/preserve_xmm.cpp + ) + if (NOT MSVC AND NOT DYNARMIC_MULTIARCH_BUILD) target_sources(dynarmic_tests PRIVATE rsqrt_test.cpp @@ -129,4 +133,6 @@ target_include_directories(dynarmic_tests PRIVATE . ../src) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) +target_compile_options(dynarmic_tests PRIVATE -mavx2) + add_test(dynarmic_tests dynarmic_tests --durations yes) diff --git a/src/dynarmic/tests/native/preserve_xmm.cpp b/src/dynarmic/tests/native/preserve_xmm.cpp new file mode 100644 index 0000000000..0f69697b7a --- /dev/null +++ b/src/dynarmic/tests/native/preserve_xmm.cpp @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include + +#include "../A64/testenv.h" +#include "dynarmic/common/fp/fpsr.h" +#include "dynarmic/interface/exclusive_monitor.h" + +using namespace Dynarmic; +using namespace oaknut::util; + +TEST_CASE("X86: Preserve XMM regs", "[x86]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + A64::Jit jit{jit_user_config}; + + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.SMINP(V2.S2(), V0.S2(), V1.S2()); + code.UMINP(V3.S2(), V0.S2(), V1.S2()); + code.SMINP(V4.S4(), V0.S4(), V1.S4()); + code.UMINP(V5.S4(), V0.S4(), V1.S4()); + code.SMAXP(V6.S2(), V0.S2(), V1.S2()); + code.UMAXP(V7.S2(), V0.S2(), V1.S2()); + code.SMAXP(V8.S4(), V0.S4(), V1.S4()); + code.UMAXP(V9.S4(), V0.S4(), V1.S4()); + + constexpr std::array vectors = { + // initial input vectors [0-1] + Vector{0x00000003'00000002, 0xF1234567'01234567}, + Vector{0x80000000'7FFFFFFF, 0x76543210'76543209}, + // expected output vectors [2-9] + Vector{0x80000000'00000002, 0}, + Vector{0x7FFFFFFF'00000002, 0}, + Vector{0xF1234567'00000002, 0x76543209'80000000}, + Vector{0x01234567'00000002, 0x76543209'7FFFFFFF}, + Vector{0x7FFFFFFF'00000003, 0}, + Vector{0x80000000'00000003, 0}, + Vector{0x01234567'00000003, 0x76543210'7FFFFFFF}, + Vector{0xF1234567'00000003, 0x76543210'80000000}, + // input vectors with elements swapped pairwise [10-11] + Vector{0x00000002'00000003, 0x01234567'F1234567}, + Vector{0x7FFFFFFF'80000000, 0x76543209'76543210}, + }; + + jit.SetPC(0); + jit.SetVector(0, vectors[0]); + jit.SetVector(1, vectors[1]); + + env.ticks_left = env.code_mem.size(); + CheckedRun([&]() { jit.Run(); }); + + CHECK(jit.GetVector(2) == vectors[2]); + CHECK(jit.GetVector(3) == vectors[3]); + CHECK(jit.GetVector(4) == vectors[4]); + CHECK(jit.GetVector(5) == vectors[5]); + CHECK(jit.GetVector(6) == vectors[6]); + CHECK(jit.GetVector(7) == vectors[7]); + CHECK(jit.GetVector(8) == vectors[8]); + CHECK(jit.GetVector(9) == vectors[9]); +} diff --git a/src/dynarmic/tests/native/testenv.h b/src/dynarmic/tests/native/testenv.h new file mode 100644 index 0000000000..7a3d14eea0 --- /dev/null +++ b/src/dynarmic/tests/native/testenv.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#ifdef __AVX__ +#include +#endif +template +void CheckedRun(F&& fn) { +#ifdef __AVX__ + __m256i xmm0 = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, 0); + __m256i xmm1 = _mm256_set_epi32(1, 1, 0, 0, 0, 0, 0, 1); + __m256i xmm2 = _mm256_set_epi32(2, 2, 0, 0, 0, 0, 0, 2); + __m256i xmm3 = _mm256_set_epi32(3, 3, 0, 0, 0, 0, 0, 3); + __m256i xmm4 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 4); + __m256i xmm5 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 5); + __m256i xmm6 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 6); + __m256i xmm7 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 7); + __m256i xmm8 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 8); + __m256i xmm9 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 9); + __m256i xmm10 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 10); + __m256i xmm11 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 11); + asm volatile("" + : "+x"(xmm0), "+x"(xmm1), "+x"(xmm2), "+x"(xmm3) + , "+x"(xmm4), "+x"(xmm5), "+x"(xmm6), "+x"(xmm7) + , "+x"(xmm8), "+x"(xmm9), "+x"(xmm10), "+x"(xmm11) + : + ); + fn(); + asm volatile("" + : "+x"(xmm0), "+x"(xmm1), "+x"(xmm2), "+x"(xmm3) + , "+x"(xmm4), "+x"(xmm5), "+x"(xmm6), "+x"(xmm7) + , "+x"(xmm8), "+x"(xmm9), "+x"(xmm10), "+x"(xmm11) + : + ); + CHECK(std::bit_cast(xmm0[0]) == 0); + CHECK(std::bit_cast(xmm1[0]) == 1); + CHECK(std::bit_cast(xmm2[0]) == 2); + CHECK(std::bit_cast(xmm3[0]) == 3); + CHECK(std::bit_cast(xmm4[0]) == 4); + CHECK(std::bit_cast(xmm5[0]) == 5); + CHECK(std::bit_cast(xmm6[0]) == 6); + CHECK(std::bit_cast(xmm7[0]) == 7); + CHECK(std::bit_cast(xmm8[0]) == 8); + CHECK(std::bit_cast(xmm9[0]) == 9); + CHECK(std::bit_cast(xmm10[0]) == 10); + CHECK(std::bit_cast(xmm11[0]) == 11); +#else + fn(); +#endif +} diff --git a/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp b/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp index 42b72bdb91..aa66ff7f9a 100644 --- a/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp +++ b/src/dynarmic/tests/unicorn_emu/a64_unicorn.cpp @@ -173,7 +173,7 @@ void A64Unicorn::InterruptHook(uc_engine* uc, u32 int_number, void* user_data) { auto* this_ = static_cast(user_data); u32 esr; - CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr)); + //CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR_EL0, &esr)); auto ec = esr >> 26; auto iss = esr & 0xFFFFFF; diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp index da7e88ea03..b6fdefe0fc 100644 --- a/src/tests/video_core/memory_tracker.cpp +++ b/src/tests/video_core/memory_tracker.cpp @@ -28,11 +28,10 @@ public: for (u64 page = page_start; page < page_end; ++page) { int& value = page_table[page]; value += delta; - if (value < 0) { - throw std::logic_error{"negative page"}; - } if (value == 0) { page_table.erase(page); + } else if (value < 0) { + throw std::logic_error{"negative page"}; } } } diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index e80808621b..4c92d4bfa0 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -143,6 +143,10 @@ public: return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0; } + [[nodiscard]] bool IsEmpty() const noexcept { + return commits.empty(); + } + private: [[nodiscard]] static constexpr u32 ShiftType(u32 type) { return 1U << type; @@ -290,36 +294,117 @@ MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, M if (std::optional commit = TryCommit(requirements, flags)) { return std::move(*commit); } - // Commit has failed, allocate more memory. - const u64 chunk_size = AllocationChunkSize(requirements.size); - if (!TryAllocMemory(flags, type_mask, chunk_size)) { - // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory. - throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + // Commit has failed, try progressive fallback strategy + u64 chunk_size = AllocationChunkSize(requirements.size); + const u64 minimum_size = std::max(requirements.size, 4ULL << 20); // 4MB minimum + + // try 1: Try allocating with original chunk size + if (TryAllocMemory(flags, type_mask, chunk_size)) { + return TryCommit(requirements, flags).value(); } - // Commit again, this time it won't fail since there's a fresh allocation above. - // If it does, there's a bug. - return TryCommit(requirements, flags).value(); + + // try 2: Clean up empty allocations and try again + bool cleaned_up = false; + for (auto it = allocations.begin(); it != allocations.end();) { + if ((*it)->IsEmpty()) { + it = allocations.erase(it); + cleaned_up = true; + } else { + ++it; + } + } + + if (cleaned_up && TryAllocMemory(flags, type_mask, chunk_size)) { + LOG_INFO(Render_Vulkan, "Memory allocation succeeded after cleanup"); + return TryCommit(requirements, flags).value(); + } + + // try 3: Progressive size reduction with cleanup between attempts + while (chunk_size > minimum_size) { + chunk_size >>= 1; // Halve the chunk size + chunk_size = std::max(chunk_size, minimum_size); + + if (TryAllocMemory(flags, type_mask, chunk_size)) { + LOG_WARNING(Render_Vulkan, "Memory allocation succeeded with reduced chunk size: {} MB", + chunk_size >> 20); + return TryCommit(requirements, flags).value(); + } + + // Clean up again between size reduction attempts + for (auto it = allocations.begin(); it != allocations.end();) { + if ((*it)->IsEmpty()) { + it = allocations.erase(it); + } else { + ++it; + } + } + } + + // try 4: Try minimum size allocation + if (chunk_size <= minimum_size && TryAllocMemory(flags, type_mask, minimum_size)) { + LOG_WARNING(Render_Vulkan, "Memory allocation succeeded with minimum size: {} MB", + minimum_size >> 20); + return TryCommit(requirements, flags).value(); + } + // try 5: Fallback to non-device-local memory if original was device-local + if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { + const VkMemoryPropertyFlags fallback_flags = flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + // Try with original chunk size first + u64 fallback_chunk_size = AllocationChunkSize(requirements.size); + if (TryAllocMemory(fallback_flags, type_mask, fallback_chunk_size)) { + if (auto commit = TryCommit(requirements, fallback_flags)) { + LOG_WARNING(Render_Vulkan, "Falling back to non-device-local memory due to OOM"); + return std::move(*commit); + } + } + + // Progressive size reduction for non-device-local memory + while (fallback_chunk_size > minimum_size) { + fallback_chunk_size >>= 1; + fallback_chunk_size = std::max(fallback_chunk_size, minimum_size); + + if (TryAllocMemory(fallback_flags, type_mask, fallback_chunk_size)) { + if (auto commit = TryCommit(requirements, fallback_flags)) { + LOG_WARNING(Render_Vulkan, + "Falling back to non-device-local memory with reduced size: {} MB", + fallback_chunk_size >> 20); + return std::move(*commit); + } + } + } + } + + + LOG_CRITICAL(Render_Vulkan, "Vulkan memory allocation failed - exhausted all strategies"); + throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); } bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { - const u32 type = FindType(flags, type_mask).value(); + const auto type_opt = FindType(flags, type_mask); + if (!type_opt) { + return false; + } + + // Adreno requires 4KB alignment(subject to review) + const u64 aligned_size = (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) ? + Common::AlignUp(size, 4096) : + size; + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = nullptr, - .allocationSize = size, - .memoryTypeIndex = type, + .allocationSize = aligned_size, + .memoryTypeIndex = *type_opt, }); + if (!memory) { - if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { - // Try to allocate non device local memory - return TryAllocMemory(flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, type_mask, size); - } else { - // RIP - return false; - } + return false; } + allocations.push_back( - std::make_unique(this, std::move(memory), flags, size, type)); + std::make_unique(this, std::move(memory), flags, aligned_size, *type_opt)); return true; } @@ -331,11 +416,25 @@ void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) { std::optional MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, VkMemoryPropertyFlags flags) { + // Conservative, spec-compliant alignment for suballocation + VkDeviceSize eff_align = requirements.alignment; + const auto& limits = device.GetPhysical().GetProperties().limits; + if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && + !(flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + // Non-coherent memory must be invalidated on atom boundary + if (limits.nonCoherentAtomSize > eff_align) eff_align = limits.nonCoherentAtomSize; + } + // Separate buffers to avoid stalls on tilers + if (buffer_image_granularity > eff_align) { + eff_align = buffer_image_granularity; + } + eff_align = std::bit_ceil(eff_align); + for (auto& allocation : allocations) { if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) { continue; } - if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) { + if (auto commit = allocation->Commit(requirements.size, eff_align)) { return commit; } }