[core/memory] Remove defered heap allocation on Linux.

2025-07-26 23:09:40 +08:00 · 2025-07-26 23:09:40 +08:00 · 57c4859da4
commit 57c4859da4
parent e18c7ba41d
7 changed files with 55 additions and 289 deletions
--- a/src/common/heap_tracker.cpp
+++ b/src/common/heap_tracker.cpp
@ -34,6 +34,8 @@ HeapTracker::~HeapTracker() = default;

 void HeapTracker::Map(size_t virtual_offset, size_t host_offset, size_t length,
                      MemoryPermission perm, bool is_separate_heap) {
+    bool rebuild_required = false;
+
    // When mapping other memory, map pages immediately.
    if (!is_separate_heap) {
        m_buffer.Map(virtual_offset, host_offset, length, perm, false);
@ -55,11 +57,29 @@ void HeapTracker::Map(size_t virtual_offset, size_t host_offset, size_t length,

        // Insert into mappings.
        m_map_count++;
-        m_mappings.insert(*map);
+        const auto it = m_mappings.insert(*map);
+
+        // Update tick before possible rebuild.
+        it->tick = m_tick++;
+
+        // Check if we need to rebuild.
+        if (m_resident_map_count >= m_max_resident_map_count) {
+            rebuild_required = true;
+        }
+
+        // Map the area.
+        m_buffer.Map(it->vaddr, it->paddr, it->size, it->perm, false);
+
+        // This map is now resident.
+        it->is_resident = true;
+        m_resident_map_count++;
+        m_resident_mappings.insert(*it);
    }

-    // Finally, map.
-    this->DeferredMapSeparateHeap(virtual_offset);
+    if (rebuild_required) {
+        // A rebuild was required, so perform it now.
+        this->RebuildSeparateHeapAddressSpace();
+    }
 }

 void HeapTracker::Unmap(size_t virtual_offset, size_t size, bool is_separate_heap) {
@ -157,51 +177,6 @@ void HeapTracker::Protect(size_t virtual_offset, size_t size, MemoryPermission p
    }
 }

-bool HeapTracker::DeferredMapSeparateHeap(u8* fault_address) {
-    if (m_buffer.IsInVirtualRange(fault_address)) {
-        return this->DeferredMapSeparateHeap(fault_address - m_buffer.VirtualBasePointer());
-    }
-
-    return false;
-}
-
-bool HeapTracker::DeferredMapSeparateHeap(size_t virtual_offset) {
-    bool rebuild_required = false;
-
-    {
-        std::scoped_lock lk{m_lock};
-
-        // Check to ensure this was a non-resident separate heap mapping.
-        const auto it = this->GetNearestHeapMapLocked(virtual_offset);
-        if (it == m_mappings.end() || it->is_resident) {
-            return false;
-        }
-
-        // Update tick before possible rebuild.
-        it->tick = m_tick++;
-
-        // Check if we need to rebuild.
-        if (m_resident_map_count > m_max_resident_map_count) {
-            rebuild_required = true;
-        }
-
-        // Map the area.
-        m_buffer.Map(it->vaddr, it->paddr, it->size, it->perm, false);
-
-        // This map is now resident.
-        it->is_resident = true;
-        m_resident_map_count++;
-        m_resident_mappings.insert(*it);
-    }
-
-    if (rebuild_required) {
-        // A rebuild was required, so perform it now.
-        this->RebuildSeparateHeapAddressSpace();
-    }
-
-    return true;
-}
-
 void HeapTracker::RebuildSeparateHeapAddressSpace() {
    std::scoped_lock lk{m_rebuild_lock, m_lock};

--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@ -1,49 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#ifdef __linux__
-
-#include "common/signal_chain.h"
-
-#include "core/arm/dynarmic/arm_dynarmic.h"
-#include "core/hle/kernel/k_process.h"
-#include "core/memory.h"
-
-namespace Core {
-
-namespace {
-
-thread_local Core::Memory::Memory* g_current_memory{};
-std::once_flag g_registered{};
-struct sigaction g_old_segv{};
-
-void HandleSigSegv(int sig, siginfo_t* info, void* ctx) {
-    if (g_current_memory && g_current_memory->InvalidateSeparateHeap(info->si_addr)) {
-        return;
-    }
-
-    return g_old_segv.sa_sigaction(sig, info, ctx);
-}
-
-} // namespace
-
-ScopedJitExecution::ScopedJitExecution(Kernel::KProcess* process) {
-    g_current_memory = std::addressof(process->GetMemory());
-}
-
-ScopedJitExecution::~ScopedJitExecution() {
-    g_current_memory = nullptr;
-}
-
-void ScopedJitExecution::RegisterHandler() {
-    std::call_once(g_registered, [] {
-        struct sigaction sa{};
-        sa.sa_sigaction = &HandleSigSegv;
-        sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
-        Common::SigAction(SIGSEGV, std::addressof(sa), std::addressof(g_old_segv));
-    });
-}
-
-} // namespace Core
-
-#endif
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@ -26,24 +26,4 @@ constexpr HaltReason TranslateHaltReason(Dynarmic::HaltReason hr) {
    return static_cast<HaltReason>(hr);
 }

-#ifdef __linux__
-
-class ScopedJitExecution {
-public:
-    explicit ScopedJitExecution(Kernel::KProcess* process);
-    ~ScopedJitExecution();
-    static void RegisterHandler();
-};
-
-#else
-
-class ScopedJitExecution {
-public:
-    explicit ScopedJitExecution(Kernel::KProcess* process) {}
-    ~ScopedJitExecution() {}
-    static void RegisterHandler() {}
-};
-
-#endif
-
 } // namespace Core
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@ -337,15 +337,11 @@ bool ArmDynarmic32::IsInThumbMode() const {
 }

 HaltReason ArmDynarmic32::RunThread(Kernel::KThread* thread) {
-    ScopedJitExecution sj(thread->GetOwnerProcess());
-
    m_jit->ClearExclusiveState();
    return TranslateHaltReason(m_jit->Run());
 }

 HaltReason ArmDynarmic32::StepThread(Kernel::KThread* thread) {
-    ScopedJitExecution sj(thread->GetOwnerProcess());
-
    m_jit->ClearExclusiveState();
    return TranslateHaltReason(m_jit->Step());
 }
@ -387,7 +383,6 @@ ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProc
      m_cp15(std::make_shared<DynarmicCP15>(*this)), m_core_index{core_index} {
    auto& page_table_impl = process->GetPageTable().GetBasePageTable().GetImpl();
    m_jit = MakeJit(&page_table_impl);
-    ScopedJitExecution::RegisterHandler();
 }

 ArmDynarmic32::~ArmDynarmic32() = default;
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@ -368,15 +368,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
 }

 HaltReason ArmDynarmic64::RunThread(Kernel::KThread* thread) {
-    ScopedJitExecution sj(thread->GetOwnerProcess());
-
    m_jit->ClearExclusiveState();
    return TranslateHaltReason(m_jit->Run());
 }

 HaltReason ArmDynarmic64::StepThread(Kernel::KThread* thread) {
-    ScopedJitExecution sj(thread->GetOwnerProcess());
-
    m_jit->ClearExclusiveState();
    return TranslateHaltReason(m_jit->Step());
 }
@ -416,7 +412,6 @@ ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProc
    auto& page_table = process->GetPageTable().GetBasePageTable();
    auto& page_table_impl = page_table.GetImpl();
    m_jit = MakeJit(&page_table_impl, page_table.GetAddressSpaceWidth());
-    ScopedJitExecution::RegisterHandler();
 }

 ArmDynarmic64::~ArmDynarmic64() = default;
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@ -1266,10 +1266,6 @@ void KProcess::InitializeInterfaces() {

 #ifdef HAS_NCE
    if (this->IsApplication() && Settings::IsNceEnabled()) {
-        // Register the scoped JIT handler before creating any NCE instances
-        // so that its signal handler will appear first in the signal chain.
-        Core::ScopedJitExecution::RegisterHandler();
-
        for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
            m_arm_interfaces[i] = std::make_unique<Core::ArmNce>(m_kernel.System(), true, i);
        }
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@ -37,105 +37,6 @@ namespace Core::Memory {

 namespace {

-inline void FastMemcpy(void* dst, const void* src, std::size_t size) {
-    // Fast path for small copies
-    switch (size) {
-    case 1:
-        *static_cast<u8*>(dst) = *static_cast<const u8*>(src);
-        break;
-    case 2:
-        *static_cast<u16*>(dst) = *static_cast<const u16*>(src);
-        break;
-    case 4:
-        *static_cast<u32*>(dst) = *static_cast<const u32*>(src);
-        break;
-    case 8:
-        *static_cast<u64*>(dst) = *static_cast<const u64*>(src);
-        break;
-    case 16: {
-        // Optimize for 16-byte copy (common case for SIMD registers)
-        const u64* src_64 = static_cast<const u64*>(src);
-        u64* dst_64 = static_cast<u64*>(dst);
-        dst_64[0] = src_64[0];
-        dst_64[1] = src_64[1];
-        break;
-    }
-    case 32: {
-        // Optimize for 32-byte copy
-        const u64* src_64 = static_cast<const u64*>(src);
-        u64* dst_64 = static_cast<u64*>(dst);
-        dst_64[0] = src_64[0];
-        dst_64[1] = src_64[1];
-        dst_64[2] = src_64[2];
-        dst_64[3] = src_64[3];
-        break;
-    }
-    case 64: {
-        // Optimize for 64-byte copy
-        const u64* src_64 = static_cast<const u64*>(src);
-        u64* dst_64 = static_cast<u64*>(dst);
-        dst_64[0] = src_64[0];
-        dst_64[1] = src_64[1];
-        dst_64[2] = src_64[2];
-        dst_64[3] = src_64[3];
-        dst_64[4] = src_64[4];
-        dst_64[5] = src_64[5];
-        dst_64[6] = src_64[6];
-        dst_64[7] = src_64[7];
-        break;
-    }
-    default:
-        // For larger sizes, use standard memcpy which is usually optimized by the compiler
-        std::memcpy(dst, src, size);
-        break;
-    }
-}
-
-inline void FastMemset(void* dst, int value, std::size_t size) {
-    // Fast path for small fills
-    switch (size) {
-    case 1:
-        *static_cast<u8*>(dst) = static_cast<u8>(value);
-        break;
-    case 2:
-        *static_cast<u16*>(dst) = static_cast<u16>(value);
-        break;
-    case 4:
-        *static_cast<u32*>(dst) = static_cast<u32>(value);
-        break;
-    case 8:
-        *static_cast<u64*>(dst) = static_cast<u64>(value);
-        break;
-    case 16: {
-        // Optimize for 16-byte fill (common case for SIMD registers)
-        u64* dst_64 = static_cast<u64*>(dst);
-        const u64 val64 = static_cast<u8>(value) * 0x0101010101010101ULL;
-        dst_64[0] = val64;
-        dst_64[1] = val64;
-        break;
-    }
-    default:
-        if (size <= 128 && value == 0) {
-            // Fast path for small zero-fills
-            u8* dst_bytes = static_cast<u8*>(dst);
-            for (std::size_t i = 0; i < size; i += 8) {
-                if (i + 8 <= size) {
-                    *reinterpret_cast<u64*>(dst_bytes + i) = 0;
-                } else {
-                    // Handle remaining bytes (less than 8)
-                    for (std::size_t j = i; j < size; j++) {
-                        dst_bytes[j] = 0;
-                    }
-                }
-            }
-        } else {
-            // For larger sizes, use standard memset which is usually optimized by the compiler
-            std::memset(dst, value, size);
-        }
-        break;
-    }
-}
-
 bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessAddress addr,
                          const std::size_t size) {
    const Common::ProcessAddress max_addr = 1ULL << table.GetAddressSpaceBits();
@ -416,17 +317,17 @@ struct Memory::Impl {
                LOG_ERROR(HW_Memory,
                          "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                          GetInteger(current_vaddr), GetInteger(src_addr), size);
-               FastMemset(dest_buffer, 0, copy_amount);
+                std::memset(dest_buffer, 0, copy_amount);
            },
            [&](const std::size_t copy_amount, const u8* const src_ptr) {
-                FastMemcpy(dest_buffer, src_ptr, copy_amount);
+                std::memcpy(dest_buffer, src_ptr, copy_amount);
            },
            [&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
                const u8* const host_ptr) {
                if constexpr (!UNSAFE) {
                    HandleRasterizerDownload(GetInteger(current_vaddr), copy_amount);
                }
-                FastMemcpy(dest_buffer, host_ptr, copy_amount);
+                std::memcpy(dest_buffer, host_ptr, copy_amount);
            },
            [&](const std::size_t copy_amount) {
                dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
@ -434,7 +335,7 @@ struct Memory::Impl {
    }

    bool ReadBlockParallel(const Common::ProcessAddress src_addr, void* dest_buffer,
-                          const std::size_t size) {
+                           const std::size_t size) {
        // Calculate chunk size based on thread count
        const size_t chunk_size = (size + thread_count - 1) / thread_count;

@ -457,7 +358,8 @@ struct Memory::Impl {
            void* current_dest = static_cast<u8*>(dest_buffer) + offset;

            // Launch thread
-            threads.emplace_back([this, i, current_addr, current_dest, current_chunk_size, &results] {
+            threads.emplace_back([this, i, current_addr, current_dest, current_chunk_size,
+                                  &results] {
                results[i] = ReadBlockImpl<false>(current_addr, current_dest, current_chunk_size);
            });
        }
@ -515,14 +417,14 @@ struct Memory::Impl {
                          GetInteger(current_vaddr), GetInteger(dest_addr), size);
            },
            [&](const std::size_t copy_amount, u8* const dest_ptr) {
-                FastMemcpy(dest_ptr, src_buffer, copy_amount);
+                std::memcpy(dest_ptr, src_buffer, copy_amount);
            },
            [&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
                u8* const host_ptr) {
                if constexpr (!UNSAFE) {
                    HandleRasterizerWrite(GetInteger(current_vaddr), copy_amount);
                }
-                FastMemcpy(host_ptr, src_buffer, copy_amount);
+                std::memcpy(host_ptr, src_buffer, copy_amount);
            },
            [&](const std::size_t copy_amount) {
                src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
@ -530,7 +432,7 @@ struct Memory::Impl {
    }

    bool WriteBlockParallel(const Common::ProcessAddress dest_addr, const void* src_buffer,
-                          const std::size_t size) {
+                            const std::size_t size) {
        // Calculate chunk size based on thread count
        const size_t chunk_size = (size + thread_count - 1) / thread_count;

@ -553,7 +455,8 @@ struct Memory::Impl {
            const void* current_src = static_cast<const u8*>(src_buffer) + offset;

            // Launch thread
-            threads.emplace_back([this, i, current_addr, current_src, current_chunk_size, &results] {
+            threads.emplace_back([this, i, current_addr, current_src, current_chunk_size,
+                                  &results] {
                results[i] = WriteBlockImpl<false>(current_addr, current_src, current_chunk_size);
            });
        }
@ -593,12 +496,12 @@ struct Memory::Impl {
                          GetInteger(current_vaddr), GetInteger(dest_addr), size);
            },
            [](const std::size_t copy_amount, u8* const dest_ptr) {
-               FastMemset(dest_ptr, 0, copy_amount);
+                std::memset(dest_ptr, 0, copy_amount);
            },
            [&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
                u8* const host_ptr) {
                HandleRasterizerWrite(GetInteger(current_vaddr), copy_amount);
-               FastMemset(host_ptr, 0, copy_amount);
+                std::memset(host_ptr, 0, copy_amount);
            },
            [](const std::size_t copy_amount) {});
    }
@ -876,8 +779,10 @@ struct Memory::Impl {
            return nullptr;
        } else {
            // Avoid adding any extra logic to this fast-path block
-            const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Raw();
-            if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
+            const uintptr_t raw_pointer =
+                current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Raw();
+            if (const uintptr_t pointer =
+                    Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
                return reinterpret_cast<u8*>(pointer + vaddr);
            } else {
                switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
@ -912,8 +817,7 @@ struct Memory::Impl {
    }

    [[nodiscard]] u8* GetPointerSilent(const Common::ProcessAddress vaddr) const {
-        return GetPointerImpl(
-            GetInteger(vaddr), []() {}, []() {});
+        return GetPointerImpl(GetInteger(vaddr), []() {}, []() {});
    }

    /**
@ -934,10 +838,7 @@ struct Memory::Impl {
        if constexpr (std::is_same_v<T, u8> || std::is_same_v<T, s8>) {
            // 8-bit reads are always aligned
            const u8* const ptr = GetPointerImpl(
-                addr,
-                [addr]() {
-                    LOG_ERROR(HW_Memory, "Unmapped Read8 @ 0x{:016X}", addr);
-                },
+                addr, [addr]() { LOG_ERROR(HW_Memory, "Unmapped Read8 @ 0x{:016X}", addr); },
                [&]() { HandleRasterizerDownload(addr, sizeof(T)); });
            if (ptr) {
                return static_cast<T>(*ptr);
@ -947,10 +848,7 @@ struct Memory::Impl {
            // Check alignment for 16-bit reads
            if ((addr & 1) == 0) {
                const u8* const ptr = GetPointerImpl(
-                    addr,
-                    [addr]() {
-                        LOG_ERROR(HW_Memory, "Unmapped Read16 @ 0x{:016X}", addr);
-                    },
+                    addr, [addr]() { LOG_ERROR(HW_Memory, "Unmapped Read16 @ 0x{:016X}", addr); },
                    [&]() { HandleRasterizerDownload(addr, sizeof(T)); });
                if (ptr) {
                    return static_cast<T>(*reinterpret_cast<const u16*>(ptr));
@ -960,10 +858,7 @@ struct Memory::Impl {
            // Check alignment for 32-bit reads
            if ((addr & 3) == 0) {
                const u8* const ptr = GetPointerImpl(
-                    addr,
-                    [addr]() {
-                        LOG_ERROR(HW_Memory, "Unmapped Read32 @ 0x{:016X}", addr);
-                    },
+                    addr, [addr]() { LOG_ERROR(HW_Memory, "Unmapped Read32 @ 0x{:016X}", addr); },
                    [&]() { HandleRasterizerDownload(addr, sizeof(T)); });
                if (ptr) {
                    return static_cast<T>(*reinterpret_cast<const u32*>(ptr));
@ -973,10 +868,7 @@ struct Memory::Impl {
            // Check alignment for 64-bit reads
            if ((addr & 7) == 0) {
                const u8* const ptr = GetPointerImpl(
-                    addr,
-                    [addr]() {
-                        LOG_ERROR(HW_Memory, "Unmapped Read64 @ 0x{:016X}", addr);
-                    },
+                    addr, [addr]() { LOG_ERROR(HW_Memory, "Unmapped Read64 @ 0x{:016X}", addr); },
                    [&]() { HandleRasterizerDownload(addr, sizeof(T)); });
                if (ptr) {
                    return static_cast<T>(*reinterpret_cast<const u64*>(ptr));
@ -988,12 +880,10 @@ struct Memory::Impl {
        T result = 0;
        const u8* const ptr = GetPointerImpl(
            addr,
-            [addr]() {
-                LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr);
-            },
+            [addr]() { LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); },
            [&]() { HandleRasterizerDownload(addr, sizeof(T)); });
        if (ptr) {
-            FastMemcpy(&result, ptr, sizeof(T));
+            std::memcpy(&result, ptr, sizeof(T));
        }
        return result;
    }
@ -1080,7 +970,7 @@ struct Memory::Impl {
            },
            [&]() { HandleRasterizerWrite(addr, sizeof(T)); });
        if (ptr) {
-            FastMemcpy(ptr, &data, sizeof(T));
+            std::memcpy(ptr, &data, sizeof(T));
        }
    }

@ -1114,8 +1004,7 @@ struct Memory::Impl {
    }

    void HandleRasterizerDownload(VAddr v_address, size_t size) {
-        const auto* p = GetPointerImpl(
-            v_address, []() {}, []() {});
+        const auto* p = GetPointerImpl(v_address, []() {}, []() {});
        if (!gpu_device_memory) [[unlikely]] {
            gpu_device_memory = &system.Host1x().MemoryManager();
        }
@ -1132,8 +1021,7 @@ struct Memory::Impl {
    }

    void HandleRasterizerWrite(VAddr v_address, size_t size) {
-        const auto* p = GetPointerImpl(
-            v_address, []() {}, []() {});
+        const auto* p = GetPointerImpl(v_address, []() {}, []() {});
        constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
        const size_t core = std::min(system.GetCurrentHostThreadID(),
                                     sys_core); // any other calls threads go to syscore.
@ -1153,13 +1041,13 @@ struct Memory::Impl {
            auto& current_area = rasterizer_write_areas[core];
            PAddr subaddress = address >> YUZU_PAGEBITS;
            // Performance note:
-            // It may not be a good idea to assume accesses are within the same subaddress (i.e same page)
-            // It is often the case the games like to access wildly different addresses. Hence why I propose
-            // we should let the compiler just do it's thing...
+            // It may not be a good idea to assume accesses are within the same subaddress (i.e same
+            // page) It is often the case the games like to access wildly different addresses. Hence
+            // why I propose we should let the compiler just do it's thing...
            if (current_area.last_address != subaddress) {
                // Short circuit the need to check for address/size
-                auto const do_collection = (address != 0 && size != 0)
-                    && system.GPU().OnCPUWrite(address, size);
+                auto const do_collection =
+                    (address != 0 && size != 0) && system.GPU().OnCPUWrite(address, size);
                if (do_collection) {
                    current_area.last_address = subaddress;
                } else {
@ -1418,21 +1306,7 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
        impl->InvalidateGPUMemory(ptr, size);
    }

-#ifdef __linux__
-    if (!rasterizer && mapped) {
-        impl->buffer->DeferredMapSeparateHeap(GetInteger(vaddr));
-    }
-#endif
-
    return mapped && ptr != nullptr;
 }

-bool Memory::InvalidateSeparateHeap(void* fault_address) {
-#ifdef __linux__
-    return impl->buffer->DeferredMapSeparateHeap(static_cast<u8*>(fault_address));
-#else
-    return false;
-#endif
-}
-
 } // namespace Core::Memory