From c9a3baab5d5ba524778492027ef8961da947df2d Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 27 Aug 2025 05:00:38 +0200 Subject: [PATCH] [heap_tracker] Use ankerl map instead of rb tree (#249) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/249 Reviewed-by: CamilleLaVey Co-authored-by: lizzie Co-committed-by: lizzie --- externals/CMakeLists.txt | 11 ++ src/common/CMakeLists.txt | 11 +- src/common/heap_tracker.cpp | 200 ++++++---------------- src/common/heap_tracker.h | 82 +++------ src/core/arm/dynarmic/arm_dynarmic.cpp | 44 +---- src/core/arm/dynarmic/arm_dynarmic.h | 20 --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 5 - src/core/arm/dynarmic/arm_dynarmic_64.cpp | 5 - src/core/hle/kernel/k_process.cpp | 4 - src/core/memory.cpp | 21 +-- src/core/memory.h | 5 - 11 files changed, 103 insertions(+), 305 deletions(-) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 64592a8855..07e9ae7a8f 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -151,6 +151,17 @@ if (ENABLE_WEB_SERVICE) ) endif() +# unordered_dense +AddPackage( + NAME unordered_dense + REPO "Lizzie841/unordered_dense" + SHA e59d30b7b1 + HASH 71eff7bd9ba4b9226967bacd56a8ff000946f8813167cb5664bb01e96fb79e4e220684d824fe9c59c4d1cc98c606f13aff05b7940a1ed8ab3c95d6974ee34fa0 + FIND_PACKAGE_ARGUMENTS "CONFIG" + OPTIONS + "UNORDERED_DENSE_INSTALL OFF" +) + # FFMpeg if (YUZU_USE_BUNDLED_FFMPEG) add_subdirectory(ffmpeg) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e9aed1d7af..cbe1d35fc5 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -262,13 +262,13 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") endif() if (BOOST_NO_HEADERS) - target_link_libraries(common PUBLIC Boost::algorithm Boost::icl Boost::pool) + target_link_libraries(common PUBLIC Boost::algorithm Boost::icl Boost::pool) else() - target_link_libraries(common PUBLIC Boost::headers) + target_link_libraries(common PUBLIC Boost::headers) endif() if (lz4_ADDED) - target_include_directories(common PRIVATE ${lz4_SOURCE_DIR}/lib) + target_include_directories(common PRIVATE ${lz4_SOURCE_DIR}/lib) endif() target_link_libraries(common PUBLIC fmt::fmt stb::headers Threads::Threads) @@ -280,6 +280,11 @@ else() target_link_libraries(common PRIVATE zstd) endif() +if (TARGET unordered_dense::unordered_dense) + # weird quirk of system installs + target_link_libraries(common PUBLIC unordered_dense::unordered_dense) +endif() + if(ANDROID) # For ASharedMemory_create target_link_libraries(common PRIVATE android) diff --git a/src/common/heap_tracker.cpp b/src/common/heap_tracker.cpp index 6832087959..d509f2644c 100644 --- a/src/common/heap_tracker.cpp +++ b/src/common/heap_tracker.cpp @@ -1,3 +1,5 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -34,68 +36,60 @@ HeapTracker::~HeapTracker() = default; void HeapTracker::Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm, bool is_separate_heap) { + bool rebuild_required = false; // When mapping other memory, map pages immediately. if (!is_separate_heap) { m_buffer.Map(virtual_offset, host_offset, length, perm, false); return; } - { - // We are mapping part of a separate heap. + // We are mapping part of a separate heap and insert into mappings. std::scoped_lock lk{m_lock}; - - auto* const map = new SeparateHeapMap{ - .vaddr = virtual_offset, + m_map_count++; + const auto it = m_mappings.insert_or_assign(virtual_offset, SeparateHeapMap{ .paddr = host_offset, .size = length, .tick = m_tick++, .perm = perm, .is_resident = false, - }; - - // Insert into mappings. - m_map_count++; - m_mappings.insert(*map); + }); + // Update tick before possible rebuild. + it.first->second.tick = m_tick++; + // Check if we need to rebuild. + if (m_resident_map_count >= m_max_resident_map_count) + rebuild_required = true; + // Map the area. + m_buffer.Map(it.first->first, it.first->second.paddr, it.first->second.size, it.first->second.perm, false); + // This map is now resident. + it.first->second.is_resident = true; + m_resident_map_count++; + m_resident_mappings.insert(*it.first); } - - // Finally, map. - this->DeferredMapSeparateHeap(virtual_offset); + // A rebuild was required, so perform it now. + if (rebuild_required) + this->RebuildSeparateHeapAddressSpace(); } void HeapTracker::Unmap(size_t virtual_offset, size_t size, bool is_separate_heap) { // If this is a separate heap... if (is_separate_heap) { std::scoped_lock lk{m_lock}; - - const SeparateHeapMap key{ - .vaddr = virtual_offset, - }; - // Split at the boundaries of the region we are removing. this->SplitHeapMapLocked(virtual_offset); this->SplitHeapMapLocked(virtual_offset + size); - // Erase all mappings in range. - auto it = m_mappings.find(key); - while (it != m_mappings.end() && it->vaddr < virtual_offset + size) { - // Get underlying item. - auto* const item = std::addressof(*it); - + auto it = m_mappings.find(virtual_offset); + while (it != m_mappings.end() && it->first < virtual_offset + size) { // If resident, erase from resident map. - if (item->is_resident) { + if (it->second.is_resident) { ASSERT(--m_resident_map_count >= 0); - m_resident_mappings.erase(m_resident_mappings.iterator_to(*item)); + m_resident_mappings.erase(m_resident_mappings.find(it->first)); } - // Erase from map. ASSERT(--m_map_count >= 0); it = m_mappings.erase(it); - - // Free the item. - delete item; } } - // Unmap pages. m_buffer.Unmap(virtual_offset, size, false); } @@ -117,110 +111,51 @@ void HeapTracker::Protect(size_t virtual_offset, size_t size, MemoryPermission p { std::scoped_lock lk2{m_lock}; - - const SeparateHeapMap key{ - .vaddr = next, - }; - // Try to get the next mapping corresponding to this address. - const auto it = m_mappings.nfind(key); - + const auto it = m_mappings.find(next); if (it == m_mappings.end()) { // There are no separate heap mappings remaining. next = end; should_protect = true; - } else if (it->vaddr == cur) { + } else if (it->first == cur) { // We are in range. // Update permission bits. - it->perm = perm; + it->second.perm = perm; // Determine next address and whether we should protect. - next = cur + it->size; - should_protect = it->is_resident; + next = cur + it->second.size; + should_protect = it->second.is_resident; } else /* if (it->vaddr > cur) */ { // We weren't in range, but there is a block coming up that will be. - next = it->vaddr; + next = it->first; should_protect = true; } } // Clamp to end. next = std::min(next, end); - // Reprotect, if we need to. - if (should_protect) { + if (should_protect) m_buffer.Protect(cur, next - cur, perm); - } - // Advance. cur = next; } } -bool HeapTracker::DeferredMapSeparateHeap(u8* fault_address) { - if (m_buffer.IsInVirtualRange(fault_address)) { - return this->DeferredMapSeparateHeap(fault_address - m_buffer.VirtualBasePointer()); - } - - return false; -} - -bool HeapTracker::DeferredMapSeparateHeap(size_t virtual_offset) { - bool rebuild_required = false; - - { - std::scoped_lock lk{m_lock}; - - // Check to ensure this was a non-resident separate heap mapping. - const auto it = this->GetNearestHeapMapLocked(virtual_offset); - if (it == m_mappings.end() || it->is_resident) { - return false; - } - - // Update tick before possible rebuild. - it->tick = m_tick++; - - // Check if we need to rebuild. - if (m_resident_map_count > m_max_resident_map_count) { - rebuild_required = true; - } - - // Map the area. - m_buffer.Map(it->vaddr, it->paddr, it->size, it->perm, false); - - // This map is now resident. - it->is_resident = true; - m_resident_map_count++; - m_resident_mappings.insert(*it); - } - - if (rebuild_required) { - // A rebuild was required, so perform it now. - this->RebuildSeparateHeapAddressSpace(); - } - - return true; -} - void HeapTracker::RebuildSeparateHeapAddressSpace() { std::scoped_lock lk{m_rebuild_lock, m_lock}; - ASSERT(!m_resident_mappings.empty()); - // Dump half of the mappings. - // // Despite being worse in theory, this has proven to be better in practice than more // regularly dumping a smaller amount, because it significantly reduces average case // lock contention. - const size_t desired_count = std::min(m_resident_map_count, m_max_resident_map_count) / 2; - const size_t evict_count = m_resident_map_count - desired_count; + std::size_t const desired_count = std::min(m_resident_map_count, m_max_resident_map_count) / 2; + std::size_t const evict_count = m_resident_map_count - desired_count; auto it = m_resident_mappings.begin(); - - for (size_t i = 0; i < evict_count && it != m_resident_mappings.end(); i++) { + for (std::size_t i = 0; i < evict_count && it != m_resident_mappings.end(); i++) { // Unmark and unmap. - it->is_resident = false; - m_buffer.Unmap(it->vaddr, it->size, false); - + it->second.is_resident = false; + m_buffer.Unmap(it->first, it->second.size, false); // Advance. ASSERT(--m_resident_map_count >= 0); it = m_resident_mappings.erase(it); @@ -229,53 +164,32 @@ void HeapTracker::RebuildSeparateHeapAddressSpace() { void HeapTracker::SplitHeapMap(VAddr offset, size_t size) { std::scoped_lock lk{m_lock}; - this->SplitHeapMapLocked(offset); this->SplitHeapMapLocked(offset + size); } void HeapTracker::SplitHeapMapLocked(VAddr offset) { - const auto it = this->GetNearestHeapMapLocked(offset); - if (it == m_mappings.end() || it->vaddr == offset) { - // Not contained or no split required. - return; + auto it = this->GetNearestHeapMapLocked(offset); + if (it != m_mappings.end() && it->first != offset) { + // Adjust left iterator + auto const orig_size = it->second.size; + auto const left_size = offset - it->first; + it->second.size = left_size; + // Insert the new right map. + auto const right = SeparateHeapMap{ + .paddr = it->second.paddr + left_size, + .size = orig_size - left_size, + .tick = it->second.tick, + .perm = it->second.perm, + .is_resident = it->second.is_resident, + }; + m_map_count++; + auto rit = m_mappings.insert_or_assign(it->first + left_size, right); + if (rit.first->second.is_resident) { + m_resident_map_count++; + m_resident_mappings.insert(*rit.first); + } } - - // Cache the original values. - auto* const left = std::addressof(*it); - const size_t orig_size = left->size; - - // Adjust the left map. - const size_t left_size = offset - left->vaddr; - left->size = left_size; - - // Create the new right map. - auto* const right = new SeparateHeapMap{ - .vaddr = left->vaddr + left_size, - .paddr = left->paddr + left_size, - .size = orig_size - left_size, - .tick = left->tick, - .perm = left->perm, - .is_resident = left->is_resident, - }; - - // Insert the new right map. - m_map_count++; - m_mappings.insert(*right); - - // If resident, also insert into resident map. - if (right->is_resident) { - m_resident_map_count++; - m_resident_mappings.insert(*right); - } -} - -HeapTracker::AddrTree::iterator HeapTracker::GetNearestHeapMapLocked(VAddr offset) { - const SeparateHeapMap key{ - .vaddr = offset, - }; - - return m_mappings.find(key); } } // namespace Common diff --git a/src/common/heap_tracker.h b/src/common/heap_tracker.h index ee5b0bf43a..14b5401c18 100644 --- a/src/common/heap_tracker.h +++ b/src/common/heap_tracker.h @@ -1,93 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once -#include #include -#include #include - +#include #include "common/host_memory.h" -#include "common/intrusive_red_black_tree.h" namespace Common { struct SeparateHeapMap { - Common::IntrusiveRedBlackTreeNode addr_node{}; - Common::IntrusiveRedBlackTreeNode tick_node{}; - VAddr vaddr{}; - PAddr paddr{}; - size_t size{}; - size_t tick{}; - MemoryPermission perm{}; - bool is_resident{}; -}; - -struct SeparateHeapMapAddrComparator { - static constexpr int Compare(const SeparateHeapMap& lhs, const SeparateHeapMap& rhs) { - if (lhs.vaddr < rhs.vaddr) { - return -1; - } else if (lhs.vaddr <= (rhs.vaddr + rhs.size - 1)) { - return 0; - } else { - return 1; - } - } -}; - -struct SeparateHeapMapTickComparator { - static constexpr int Compare(const SeparateHeapMap& lhs, const SeparateHeapMap& rhs) { - if (lhs.tick < rhs.tick) { - return -1; - } else if (lhs.tick > rhs.tick) { - return 1; - } else { - return SeparateHeapMapAddrComparator::Compare(lhs, rhs); - } - } + PAddr paddr{}; //8 + std::size_t size{}; //8 (16) + std::size_t tick{}; //8 (24) + // 4 bits needed, sync with host_memory.h if needed + MemoryPermission perm : 4 = MemoryPermission::Read; + bool is_resident : 1 = false; }; +static_assert(sizeof(SeparateHeapMap) == 32); //half a cache line! good for coherency class HeapTracker { public: explicit HeapTracker(Common::HostMemory& buffer); ~HeapTracker(); - - void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm, - bool is_separate_heap); + void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm, bool is_separate_heap); void Unmap(size_t virtual_offset, size_t size, bool is_separate_heap); void Protect(size_t virtual_offset, size_t length, MemoryPermission perm); - u8* VirtualBasePointer() { + inline u8* VirtualBasePointer() noexcept { return m_buffer.VirtualBasePointer(); } - - bool DeferredMapSeparateHeap(u8* fault_address); - bool DeferredMapSeparateHeap(size_t virtual_offset); - private: - using AddrTreeTraits = - Common::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&SeparateHeapMap::addr_node>; - using AddrTree = AddrTreeTraits::TreeType; - - using TickTreeTraits = - Common::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&SeparateHeapMap::tick_node>; - using TickTree = TickTreeTraits::TreeType; - - AddrTree m_mappings{}; - TickTree m_resident_mappings{}; - + // TODO: You may want to "fake-map" the first 2GB of 64-bit address space + // and dedicate it entirely to a recursive PTE mapping :) + // However Ankerl is way better than using an RB tree, in all senses + using AddrTree = ankerl::unordered_dense::map; + AddrTree m_mappings; + using TicksTree = ankerl::unordered_dense::map; + TicksTree m_resident_mappings; private: void SplitHeapMap(VAddr offset, size_t size); void SplitHeapMapLocked(VAddr offset); - - AddrTree::iterator GetNearestHeapMapLocked(VAddr offset); - void RebuildSeparateHeapAddressSpace(); - + inline HeapTracker::AddrTree::iterator GetNearestHeapMapLocked(VAddr offset) noexcept { + return m_mappings.find(offset); + } private: Common::HostMemory& m_buffer; const s64 m_max_resident_map_count; - std::shared_mutex m_rebuild_lock{}; std::mutex m_lock{}; s64 m_map_count{}; diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index e6e9fc45be..9d26db51f7 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -3,47 +3,9 @@ #ifdef __linux__ -#include "common/signal_chain.h" - +//#include "common/signal_chain.h" #include "core/arm/dynarmic/arm_dynarmic.h" -#include "core/hle/kernel/k_process.h" -#include "core/memory.h" - -namespace Core { - -namespace { - -thread_local Core::Memory::Memory* g_current_memory{}; -std::once_flag g_registered{}; -struct sigaction g_old_segv {}; - -void HandleSigSegv(int sig, siginfo_t* info, void* ctx) { - if (g_current_memory && g_current_memory->InvalidateSeparateHeap(info->si_addr)) { - return; - } - - return g_old_segv.sa_sigaction(sig, info, ctx); -} - -} // namespace - -ScopedJitExecution::ScopedJitExecution(Kernel::KProcess* process) { - g_current_memory = std::addressof(process->GetMemory()); -} - -ScopedJitExecution::~ScopedJitExecution() { - g_current_memory = nullptr; -} - -void ScopedJitExecution::RegisterHandler() { - std::call_once(g_registered, [] { - struct sigaction sa {}; - sa.sa_sigaction = &HandleSigSegv; - sa.sa_flags = SA_SIGINFO | SA_ONSTACK; - Common::SigAction(SIGSEGV, std::addressof(sa), std::addressof(g_old_segv)); - }); -} - -} // namespace Core +//#include "core/hle/kernel/k_process.h" +//#include "core/memory.h" #endif diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index 53dd188151..eef7c31160 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -26,24 +26,4 @@ constexpr HaltReason TranslateHaltReason(Dynarmic::HaltReason hr) { return static_cast(hr); } -#ifdef __linux__ - -class ScopedJitExecution { -public: - explicit ScopedJitExecution(Kernel::KProcess* process); - ~ScopedJitExecution(); - static void RegisterHandler(); -}; - -#else - -class ScopedJitExecution { -public: - explicit ScopedJitExecution(Kernel::KProcess* process) {} - ~ScopedJitExecution() {} - static void RegisterHandler() {} -}; - -#endif - } // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index afbf178349..1731ef1aec 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -336,15 +336,11 @@ bool ArmDynarmic32::IsInThumbMode() const { } HaltReason ArmDynarmic32::RunThread(Kernel::KThread* thread) { - ScopedJitExecution sj(thread->GetOwnerProcess()); - m_jit->ClearExclusiveState(); return TranslateHaltReason(m_jit->Run()); } HaltReason ArmDynarmic32::StepThread(Kernel::KThread* thread) { - ScopedJitExecution sj(thread->GetOwnerProcess()); - m_jit->ClearExclusiveState(); return TranslateHaltReason(m_jit->Step()); } @@ -386,7 +382,6 @@ ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProc m_cp15(std::make_shared(*this)), m_core_index{core_index} { auto& page_table_impl = process->GetPageTable().GetBasePageTable().GetImpl(); m_jit = MakeJit(&page_table_impl); - ScopedJitExecution::RegisterHandler(); } ArmDynarmic32::~ArmDynarmic32() = default; diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 99a80644ad..f9d1232f83 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -367,15 +367,11 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa } HaltReason ArmDynarmic64::RunThread(Kernel::KThread* thread) { - ScopedJitExecution sj(thread->GetOwnerProcess()); - m_jit->ClearExclusiveState(); return TranslateHaltReason(m_jit->Run()); } HaltReason ArmDynarmic64::StepThread(Kernel::KThread* thread) { - ScopedJitExecution sj(thread->GetOwnerProcess()); - m_jit->ClearExclusiveState(); return TranslateHaltReason(m_jit->Step()); } @@ -415,7 +411,6 @@ ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProc auto& page_table = process->GetPageTable().GetBasePageTable(); auto& page_table_impl = page_table.GetImpl(); m_jit = MakeJit(&page_table_impl, page_table.GetAddressSpaceWidth()); - ScopedJitExecution::RegisterHandler(); } ArmDynarmic64::~ArmDynarmic64() = default; diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 80566b7e77..cf03353f84 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -1266,10 +1266,6 @@ void KProcess::InitializeInterfaces() { #ifdef HAS_NCE if (this->IsApplication() && Settings::IsNceEnabled()) { - // Register the scoped JIT handler before creating any NCE instances - // so that its signal handler will appear first in the signal chain. - Core::ScopedJitExecution::RegisterHandler(); - for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { m_arm_interfaces[i] = std::make_unique(m_kernel.System(), true, i); } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 0035c626e2..08391cd815 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -61,8 +61,7 @@ struct Memory::Impl { } #ifdef __linux__ - heap_tracker.emplace(system.DeviceMemory().buffer); - buffer = std::addressof(*heap_tracker); + buffer.emplace(system.DeviceMemory().buffer); #else buffer = std::addressof(system.DeviceMemory().buffer); #endif @@ -1024,9 +1023,8 @@ struct Memory::Impl { std::span gpu_dirty_managers; std::mutex sys_core_guard; - std::optional heap_tracker; #ifdef __linux__ - Common::HeapTracker* buffer{}; + std::optional buffer; #else Common::HostMemory* buffer{}; #endif @@ -1230,22 +1228,7 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { if (rasterizer) { impl->InvalidateGPUMemory(ptr, size); } - -#ifdef __linux__ - if (!rasterizer && mapped) { - impl->buffer->DeferredMapSeparateHeap(GetInteger(vaddr)); - } -#endif - return mapped && ptr != nullptr; } -bool Memory::InvalidateSeparateHeap(void* fault_address) { -#ifdef __linux__ - return impl->buffer->DeferredMapSeparateHeap(static_cast(fault_address)); -#else - return false; -#endif -} - } // namespace Core::Memory diff --git a/src/core/memory.h b/src/core/memory.h index dcca26892b..99108ecf0d 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -487,13 +487,8 @@ public: * marked as debug or non-debug. */ void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); - void SetGPUDirtyManagers(std::span managers); - bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); - - bool InvalidateSeparateHeap(void* fault_address); - private: Core::System& system;