From 4bb141d4a2fc04eb8b4b6ee73a44ba83e6e0dd3c Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:54:53 +0000 Subject: [PATCH 01/52] [dynarmic] reduce opt pass latency Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 3 - .../backend/riscv64/a32_address_space.cpp | 14 +---- .../src/dynarmic/common/memory_pool.cpp | 13 ---- .../src/dynarmic/common/memory_pool.h | 61 ------------------- src/dynarmic/src/dynarmic/ir/basic_block.cpp | 7 +-- src/dynarmic/src/dynarmic/ir/basic_block.h | 3 - src/dynarmic/src/dynarmic/ir/ir_emitter.cpp | 21 ------- src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 8 +-- 8 files changed, 7 insertions(+), 123 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.cpp delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.h delete mode 100644 src/dynarmic/src/dynarmic/ir/ir_emitter.cpp diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 58efcac747..5333973164 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -58,8 +58,6 @@ add_library(dynarmic common/lut_from_list.h common/math_util.cpp common/math_util.h - common/memory_pool.cpp - common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h @@ -80,7 +78,6 @@ add_library(dynarmic ir/basic_block.cpp ir/basic_block.h ir/cond.h - ir/ir_emitter.cpp ir/ir_emitter.h ir/location_descriptor.cpp ir/location_descriptor.h diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index 8a21aafcf8..0bb9591411 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.cpp b/src/dynarmic/src/dynarmic/common/memory_pool.cpp deleted file mode 100644 index f41dd92af5..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/common/memory_pool.h" - -#include - -namespace Dynarmic::Common { - - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.h b/src/dynarmic/src/dynarmic/common/memory_pool.h deleted file mode 100644 index c99316e107..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -namespace Dynarmic::Common { - -/// @tparam object_size Byte-size of objects to construct -/// @tparam slab_size Number of objects to have per slab -template -class Pool { -public: - inline Pool() noexcept { - AllocateNewSlab(); - } - inline ~Pool() noexcept { - std::free(current_slab); - for (char* slab : slabs) { - std::free(slab); - } - } - - Pool(const Pool&) = delete; - Pool(Pool&&) = delete; - - Pool& operator=(const Pool&) = delete; - Pool& operator=(Pool&&) = delete; - - /// @brief Returns a pointer to an `object_size`-bytes block of memory. - [[nodiscard]] void* Alloc() noexcept { - if (remaining == 0) { - slabs.push_back(current_slab); - AllocateNewSlab(); - } - void* ret = static_cast(current_ptr); - current_ptr += object_size; - remaining--; - return ret; - } -private: - /// @brief Allocates a completely new memory slab. - /// Used when an entirely new slab is needed - /// due the current one running out of usable space. - void AllocateNewSlab() noexcept { - current_slab = static_cast(std::malloc(object_size * slab_size)); - current_ptr = current_slab; - remaining = slab_size; - } - - std::vector slabs; - char* current_slab = nullptr; - char* current_ptr = nullptr; - size_t remaining = 0; -}; - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index b00ab3cb20..a13a7ebfc9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -15,8 +15,6 @@ #include #include "dynarmic/common/assert.h" - -#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -27,8 +25,7 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL}, - instruction_alloc_pool{std::make_unique>()} + cond{Cond::AL} { } @@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); + IR::Inst* inst = new IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index e88dc92fc4..2f2d9ab6de 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -21,7 +21,6 @@ #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" #include "dynarmic/ir/dense_list.h" -#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -174,8 +173,6 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; - /// Memory pool for instruction list - std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp deleted file mode 100644 index a8ef7e2989..0000000000 --- a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/ir_emitter.h" - -#include - -#include "dynarmic/common/assert.h" -#include - -#include "dynarmic/ir/opcodes.h" - -namespace Dynarmic::IR { - - -} // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 844e29023c..e9175f0e6b 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -1491,9 +1491,9 @@ void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization: Optimization::DeadCodeElimination(block); } Optimization::IdentityRemovalPass(block); - //if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { + if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { Optimization::VerificationPass(block); - //} + } } void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { @@ -1511,9 +1511,9 @@ void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization: if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); } - //if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { + if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { Optimization::VerificationPass(block); - //} + } } } // namespace Dynarmic::Optimization From 45250b3d88011003727f57ca00c79870c2f4cfe6 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 00:11:40 +0000 Subject: [PATCH 02/52] [dynarmic] use small vector experiment Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index bfacdcca52..66c75aead9 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -12,6 +12,7 @@ #include #include +#include "boost/container/small_vector.hpp" #include "dynarmic/common/common_types.h" #include #include @@ -83,7 +84,7 @@ public: void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial - std::vector values; //24 + boost::container::small_vector values; //24 // Block state uint16_t total_uses = 0; //8 //sometimes zeroed @@ -96,7 +97,7 @@ private: uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - alignas(16) uint8_t lru_counter = 0; //1 + uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From 9d4a72fe26ad86a668f5f7de1b23dac7975d7248 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:07 +0000 Subject: [PATCH 03/52] [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 29eab7908b..919e75b77b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.push_back(inst); ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits::max)()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); + max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); } void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 66c75aead9..2bd274730a 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -78,7 +78,7 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } inline size_t GetMaxBitWidth() const noexcept { - return max_bit_width; + return 1 << max_bit_width; } void AddValue(IR::Inst* inst) noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; @@ -94,10 +94,10 @@ private: uint16_t is_being_used_count = 0; //8 uint16_t current_references = 0; //8 // Value state - uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 + uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6) + uint8_t lru_counter : 2 = 0; //1 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From dbe643328ad78e6d4ad75a992e638bc141737163 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 09:10:18 +0000 Subject: [PATCH 04/52] [docs] fastmem draft Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md index c4f57996ba..32c516ddcd 100644 --- a/src/dynarmic/docs/FastMemory.md +++ b/src/dynarmic/docs/FastMemory.md @@ -4,7 +4,7 @@ The main way of accessing memory in JITed programs is via an invoked function, s The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput. -Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. +Many kernels however, do not support fast signal dispatching (Solaris, *BSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. ![Host to guest translation](./HostToGuest.svg) From c2d1dab721476afb5f5f5feae80a3c87a4cc51eb Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 10:16:22 +0000 Subject: [PATCH 05/52] [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 6 +- .../frontend/decoder/decoder_detail.h | 65 ++++++++----------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index d2035d0fe0..b74817a611 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,12 +210,8 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 438b7b691c..682b0cfbf6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,12 +269,8 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index bd76efda2a..4050033ef5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -70,11 +70,9 @@ struct detail { return std::make_tuple(mask, expect); } - /** - * Generates the masks and shifts for each argument. - * A '-' in a bitstring indicates that we don't care about that value. - * An argument is specified by a continuous string of the same character. - */ + /// @brief Generates the masks and shifts for each argument. + /// A '-' in a bitstring indicates that we don't care about that value. + /// An argument is specified by a continuous string of the same character. template static consteval auto GetArgInfo(std::array bitstring) { std::array masks = {}; @@ -98,7 +96,6 @@ struct detail { if constexpr (N > 0) { const size_t bit_position = opcode_bitsize - i - 1; - if (arg_index >= N) throw std::out_of_range("Unexpected field"); @@ -109,20 +106,16 @@ struct detail { } } } - #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) // Avoids a MSVC ICE, and avoids Android NDK issue. ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); #endif - return std::make_tuple(masks, shifts); } - /** - * This struct's Make member function generates a lambda which decodes an instruction based on - * the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a - * template argument. - */ + /// @brief This struct's Make member function generates a lambda which decodes an instruction + /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is + /// provided as a template argument. template struct VisitorCaller; @@ -130,36 +123,36 @@ struct detail { # pragma warning(push) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) #endif - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...), + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...), const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...) const, + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...) const, const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; @@ -167,22 +160,16 @@ struct detail { # pragma warning(pop) #endif - /** - * Creates a matcher that can match and parse instructions based on bitstring. - * See also: GetMaskAndExpect and GetArgInfo for format of bitstring. - */ - template - static auto GetMatcher(FnT fn, const char* const name) { - constexpr size_t args_count = mcl::parameter_count_v; - + /// @brief Creates a matcher that can match and parse instructions based on bitstring. + /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. + template + static constexpr auto GetMatcher(F fn, const char* const name) { + constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); - - using Iota = std::make_index_sequence; - - const auto proxy_fn = VisitorCaller::Make(Iota(), fn, arg_masks, arg_shifts); + const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); return MatcherT(name, mask, expect, proxy_fn); } }; From 254fd53b415b6d276e85a0b5f0eb06c5fc9a3bea Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 4 Sep 2025 07:24:49 +0000 Subject: [PATCH 06/52] [dynarmic] reduce matcher table noise and cache misses Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/a32_types.h | 22 +++++---------- .../src/dynarmic/frontend/A32/decoder/asimd.h | 27 +++++++++---------- .../src/dynarmic/frontend/A32/decoder/vfp.h | 12 +++------ .../src/dynarmic/frontend/A64/decoder/a64.h | 23 +++++++--------- .../frontend/decoder/decoder_detail.h | 6 ++--- .../src/dynarmic/frontend/decoder/matcher.h | 11 ++------ 6 files changed, 38 insertions(+), 63 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index 3f4501a528..2a0cc25751 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -9,12 +9,9 @@ #pragma once #include -#include - #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/ir/cond.h" @@ -89,24 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) { inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); - return static_cast(reg); + return size_t(reg); } inline size_t RegNumber(ExtReg reg) { if (IsSingleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::S0); + return size_t(reg) - size_t(ExtReg::S0); + } else if (IsDoubleExtReg(reg)) { + return size_t(reg) - size_t(ExtReg::D0); } - - if (IsDoubleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::D0); - } - - if (IsQuadExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::Q0); - } - - ASSERT_MSG(false, "Invalid extended register"); - return 0; + ASSERT(IsQuadExtReg(reg)); + return size_t(reg) - size_t(ExtReg::Q0); } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f2e206695b..f63816d2b1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher; template std::vector> GetASIMDDecodeTable() { - std::vector> table = { - -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> table = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" #undef INST - }; - // Exceptions to the rule of thumb. const std::set comes_first{ "VBIC, VMOV, VMVN, VORR (immediate)", @@ -53,19 +50,21 @@ std::vector> GetASIMDDecodeTable() { "VQDMULH (scalar)", "VQRDMULH (scalar)", }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_first.count(matcher.GetName()) > 0; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_last.count(matcher.GetName()) == 0; + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - - return table; + std::vector> final_table; + std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + return e.second; + }); + return final_table; } template diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index 5fcacd2bda..a969570547 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -30,22 +30,18 @@ std::optional>> DecodeVFP(u32 instruc static const struct Tables { Table unconditional; Table conditional; - } tables = [] { + } tables = []() { Table list = { - #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./vfp.inc" #undef INST - }; - - const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return (matcher.GetMask() & 0xF0000000) == 0xF0000000; }); - return Tables{ - Table{list.begin(), division}, - Table{division, list.end()}, + Table{list.begin(), it}, + Table{it, list.end()}, }; }(); diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c651dd7cde..c7bc981ac6 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) { template constexpr DecodeTable GetDecodeTable() { - std::vector> list = { -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" #undef INST }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { return std::set{ "MOVI, MVNI, ORR, BIC (vector, immediate)", "FMOV (vector, immediate)", "Unallocated SIMD modified immediate", - }.count(matcher.GetName()) > 0; + }.count(e.first) > 0; }); - DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); + for (auto const& e : list) { + const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e.second); } } } diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 4050033ef5..0d9da6169c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -163,18 +163,18 @@ struct detail { /// @brief Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template - static constexpr auto GetMatcher(F fn, const char* const name) { + static constexpr auto GetMatcher(F fn) { constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); - return MatcherT(name, mask, expect, proxy_fn); + return MatcherT(mask, expect, proxy_fn); } }; -#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn, name) +#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn) } // namespace detail } // namespace Dynarmic::Decoder diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 7e5c9c5a8f..5a2afe57c5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -31,14 +31,8 @@ public: using visitor_type = Visitor; using handler_return_type = typename Visitor::instruction_return_type; using handler_function = std::function; - - Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) - : name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {} - - /// Gets the name of this type of instruction. - const char* GetName() const { - return name; - } + Matcher(opcode_type mask, opcode_type expected, handler_function func) + : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. opcode_type GetMask() const { @@ -70,7 +64,6 @@ public: } private: - const char* name; opcode_type mask; opcode_type expected; handler_function fn; From 6e4b66c8e7dbe15e50df7d3fba05903931621b2a Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 20:59:23 +0000 Subject: [PATCH 07/52] [dynarmic] fix tests Signed-off-by: lizzie --- .../src/dynarmic/frontend/decoder/matcher.h | 40 ++++++++----------- src/dynarmic/tests/decoder_tests.cpp | 4 +- src/dynarmic/tests/print_info.cpp | 22 +++++----- 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 5a2afe57c5..f7e2884e0c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -14,16 +14,12 @@ namespace Dynarmic::Decoder { -/** - * Generic instruction handling construct. - * - * @tparam Visitor An arbitrary visitor type that will be passed through - * to the function being handled. This type must be the - * type of the first parameter in a handler function. - * - * @tparam OpcodeType Type representing an opcode. This must be the - * type of the second parameter in a handler function. - */ +/// Generic instruction handling construct. +/// @tparam Visitor An arbitrary visitor type that will be passed through +/// to the function being handled. This type must be the +/// type of the first parameter in a handler function. +/// @tparam OpcodeType Type representing an opcode. This must be the +/// type of the second parameter in a handler function. template class Matcher { public: @@ -35,30 +31,26 @@ public: : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. - opcode_type GetMask() const { + inline opcode_type GetMask() const noexcept { return mask; } /// Gets the expected value after masking for this instruction. - opcode_type GetExpected() const { + inline opcode_type GetExpected() const noexcept { return expected; } - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - bool Matches(opcode_type instruction) const { + /// Tests to see if the given instruction is the instruction this matcher represents. + /// @param instruction The instruction to test + /// @returns true if the given instruction matches. + inline bool Matches(opcode_type instruction) const noexcept { return (instruction & mask) == expected; } - /** - * Calls the corresponding instruction handler on visitor for this type of instruction. - * @param v The visitor to use - * @param instruction The instruction to decode. - */ - handler_return_type call(Visitor& v, opcode_type instruction) const { + /// Calls the corresponding instruction handler on visitor for this type of instruction. + /// @param v The visitor to use + /// @param instruction The instruction to decode. + inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept { ASSERT(Matches(instruction)); return fn(v, instruction); } diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index cdd2c70cd9..c09df406f2 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,6 +20,7 @@ using namespace Dynarmic; +/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -67,4 +68,5 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { x = ((x | mask) + 1) & ~mask; } while (x != 0); } -} \ No newline at end of file +} +*/ diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 3d1268f467..0c89e780ad 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -39,19 +39,21 @@ using namespace Dynarmic; -std::string_view GetNameOfA32Instruction(u32 instruction) { - //if (auto const vfp_decoder = A32::DecodeVFP(instruction)) - // return *A32::GetNameVFP(instruction); - //else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) - // return *A32::GetNameASIMD(instruction); - //else if (auto const decoder = A32::DecodeArm(instruction)) - // return *A32::GetNameARM(instruction); +const char* GetNameOfA32Instruction(u32 instruction) { + /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { + return vfp_decoder->get().GetName(); + } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { + return asimd_decoder->get().GetName(); + } else if (auto decoder = A32::DecodeArm(instruction)) { + return decoder->get().GetName(); + }*/ return ""; } -std::string_view GetNameOfA64Instruction(u32 instruction) { - //if (auto const decoder = A64::Decode(instruction)) - // return *A64::GetName(instruction); +const char* GetNameOfA64Instruction(u32 instruction) { + /*if (auto decoder = A64::Decode(instruction)) { + return decoder->get().GetName(); + }*/ return ""; } From 2355c253712deee4c662a9c698d2f953ec8fe20c Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:07:18 +0000 Subject: [PATCH 08/52] [dynarmic] fix ASIMD execution Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f63816d2b1..2cea7a14c1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -61,7 +61,7 @@ std::vector> GetASIMDDecodeTable() { return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); std::vector> final_table; - std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) { return e.second; }); return final_table; From 8888090cf315e026f2e41ea792e59e0318707776 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:40:13 +0000 Subject: [PATCH 09/52] [dynarmic] add back encoding names (for print_info) Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/decoder/arm.h | 20 +++++++++++--- .../src/dynarmic/frontend/A32/decoder/asimd.h | 26 ++++++++++++++----- .../dynarmic/frontend/A32/decoder/thumb16.h | 24 ++++++++++++----- .../dynarmic/frontend/A32/decoder/thumb32.h | 24 ++++++++++++----- .../src/dynarmic/frontend/A32/decoder/vfp.h | 24 ++++++++++++----- .../A32/translate/translate_thumb.cpp | 2 +- .../src/dynarmic/frontend/A64/decoder/a64.h | 20 +++++++++++--- src/dynarmic/tests/decoder_tests.cpp | 6 ++--- src/dynarmic/tests/print_info.cpp | 22 +++++++--------- 9 files changed, 115 insertions(+), 53 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index 0257c28ddb..c6f034ae21 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -30,13 +30,13 @@ template using ArmDecodeTable = std::array>, 0x1000>; namespace detail { -inline size_t ToFastLookupIndexArm(u32 instruction) { +inline size_t ToFastLookupIndexArm(u32 instruction) noexcept { return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0); } } // namespace detail template -constexpr ArmDecodeTable GetArmDecodeTable() { +constexpr ArmDecodeTable GetArmDecodeTable() noexcept { std::vector> list = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" @@ -62,15 +62,27 @@ constexpr ArmDecodeTable GetArmDecodeTable() { } template -std::optional>> DecodeArm(u32 instruction) { +std::optional>> DecodeArm(u32 instruction) noexcept { alignas(64) static const auto table = GetArmDecodeTable(); const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameARM(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./arm.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index 2cea7a14c1..57e1392871 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -26,7 +26,7 @@ template using ASIMDMatcher = Decoder::Matcher; template -std::vector> GetASIMDDecodeTable() { +std::vector> GetASIMDDecodeTable() noexcept { std::vector>> table = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" @@ -68,13 +68,25 @@ std::vector> GetASIMDDecodeTable() { } template -std::optional>> DecodeASIMD(u32 instruction) { - static const auto table = GetASIMDDecodeTable(); - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); +std::optional>> DecodeASIMD(u32 instruction) noexcept { + alignas(64) static const auto table = GetASIMDDecodeTable(); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameASIMD(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./asimd.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h index 8073ee5d47..16b99ba5aa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h @@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher; template std::optional>> DecodeThumb16(u16 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #include "./thumb16.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb16(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) }, +#include "./thumb16.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h index 86a4d767a7..19418de67c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h @@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher; template std::optional>> DecodeThumb32(u32 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./thumb32.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb32(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./thumb32.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index a969570547..a346304a9a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -26,8 +26,7 @@ using VFPMatcher = Decoder::Matcher; template std::optional>> DecodeVFP(u32 instruction) { using Table = std::vector>; - - static const struct Tables { + alignas(64) static const struct Tables { Table unconditional; Table conditional; } tables = []() { @@ -44,14 +43,25 @@ std::optional>> DecodeVFP(u32 instruc Table{it, list.end()}, }; }(); - const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000; const Table& table = is_unconditional ? tables.unconditional : tables.conditional; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameVFP(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./vfp.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp index 5bb516ccfd..0381c984cc 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp @@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) { return 0xF7F0A000; // UDF } -bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) { +inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept { return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c7bc981ac6..7e6cdc3935 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -71,12 +71,24 @@ constexpr DecodeTable GetDecodeTable() { template std::optional>> Decode(u32 instruction) { alignas(64) static const auto table = GetDecodeTable(); - const auto matches_instruction = [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }; const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; - auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); + auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetName(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./a64.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A64 diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index c09df406f2..4ad9d90833 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,7 +20,6 @@ using namespace Dynarmic; -/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -58,7 +57,7 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction); INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect); INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x); - //INFO("Name: " << *A32::GetNameASIMD(instruction)); + INFO("Name: " << *A32::GetNameASIMD(instruction)); INFO("iserr: " << iserr); //INFO("alternative: " << alternative->GetName()); INFO("altiserr: " << altiserr); @@ -68,5 +67,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { x = ((x | mask) + 1) & ~mask; } while (x != 0); } -} -*/ +} \ No newline at end of file diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 0c89e780ad..8936f32bd3 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -39,21 +39,19 @@ using namespace Dynarmic; -const char* GetNameOfA32Instruction(u32 instruction) { - /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { - return vfp_decoder->get().GetName(); - } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { - return asimd_decoder->get().GetName(); - } else if (auto decoder = A32::DecodeArm(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA32Instruction(u32 instruction) { + if (auto const vfp_decoder = A32::DecodeVFP(instruction)) + return *A32::GetNameVFP(instruction); + else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) + return *A32::GetNameASIMD(instruction); + else if (auto const decoder = A32::DecodeArm(instruction)) + return *A32::GetNameARM(instruction); return ""; } -const char* GetNameOfA64Instruction(u32 instruction) { - /*if (auto decoder = A64::Decode(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA64Instruction(u32 instruction) { + if (auto const decoder = A64::Decode(instruction)) + return *A64::GetName(instruction); return ""; } From ac4fe93fa2528e9c927dc3e3630bc30465e72b1c Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 14:08:17 +0000 Subject: [PATCH 10/52] [dynarmic] use ARCHITECTURE_ macros instead of MCL ones Signed-off-by: lizzie --- .../src/dynarmic/backend/exception_handler.h | 18 +++++++------- .../backend/exception_handler_posix.cpp | 24 +++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler.h b/src/dynarmic/src/dynarmic/backend/exception_handler.h index 173949628c..cd274b111f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler.h +++ b/src/dynarmic/src/dynarmic/backend/exception_handler.h @@ -15,15 +15,15 @@ #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) namespace Dynarmic::Backend::X64 { class BlockOfCode; } // namespace Dynarmic::Backend::X64 -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) namespace oaknut { class CodeBlock; } // namespace oaknut -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) namespace Dynarmic::Backend::RV64 { class CodeBlock; } // namespace Dynarmic::Backend::RV64 @@ -33,16 +33,16 @@ class CodeBlock; namespace Dynarmic::Backend { -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) struct FakeCall { u64 call_rip; u64 ret_rip; }; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) struct FakeCall { u64 call_pc; }; -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) struct FakeCall { }; #else @@ -54,11 +54,11 @@ public: ExceptionHandler(); ~ExceptionHandler(); -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void Register(X64::BlockOfCode& code); -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void Register(oaknut::CodeBlock& mem, std::size_t mem_size); -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void Register(RV64::CodeBlock& mem, std::size_t mem_size); #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index f1f208179f..c95cbbb74e 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -7,10 +7,7 @@ */ #include -#include -#include #include -#include #include #include #include "dynarmic/backend/exception_handler.h" @@ -118,9 +115,11 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { CTX_DECLARE(raw_context); #if defined(ARCHITECTURE_x86_64) { - std::shared_lock guard(sig_handler->code_block_infos_mutex); - if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->second.cb(CTX_RIP); + std::lock_guard guard(sig_handler->code_block_infos_mutex); + + const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); + if (iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->cb(CTX_RIP); CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; @@ -130,9 +129,10 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) { - std::shared_lock guard(sig_handler->code_block_infos_mutex); - if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->second.cb(CTX_PC); + std::lock_guard guard(sig_handler->code_block_infos_mutex); + const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); + if (iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->cb(CTX_PC); CTX_PC = fc.call_pc; return; } @@ -187,15 +187,15 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { impl = std::make_unique(mcl::bit_cast(code.getCode()), code.GetTotalCodeSize()); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } From 33d1873d5f7adf423f2b56346d0da9631a181dff Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 03:36:35 +0000 Subject: [PATCH 11/52] [dynarmic, docs] fastmem docs Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md index 32c516ddcd..c4f57996ba 100644 --- a/src/dynarmic/docs/FastMemory.md +++ b/src/dynarmic/docs/FastMemory.md @@ -4,7 +4,7 @@ The main way of accessing memory in JITed programs is via an invoked function, s The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput. -Many kernels however, do not support fast signal dispatching (Solaris, *BSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. +Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. ![Host to guest translation](./HostToGuest.svg) From 97a40015479b6bd47f52eb52678fa1545b29cf86 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:29 +0000 Subject: [PATCH 12/52] [dynarmic] remove use of mcl reverse iterator Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/abi.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1e673338a8..1691bbb3b7 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -764,7 +764,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { target_code_ptr = code.GetReturnFromRunCodeAddress(); } const CodePtr patch_location = code.getCurr(); - code.mov(code.rcx, reinterpret_cast(target_code_ptr)); + code.mov(code.rcx, u64(target_code_ptr)); code.EnsurePatchLocationSize(patch_location, 10); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp index a9bbab3d10..299bf1d1d6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp @@ -10,7 +10,6 @@ #include -#include #include "dynarmic/common/common_types.h" #include @@ -76,7 +75,8 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size); size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE); - for (auto const xmm : mcl::iterator::reverse(regs)) { + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const xmm = *it; if (HostLocIsXMM(xmm)) { xmm_offset -= XMM_SIZE; if (code.HasHostFeature(HostFeature::AVX)) { @@ -88,9 +88,11 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, } if (frame_info.stack_subtraction != 0) code.add(rsp, u32(frame_info.stack_subtraction)); - for (auto const gpr : mcl::iterator::reverse(regs)) + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const gpr = *it; if (HostLocIsGPR(gpr)) code.pop(HostLocToReg64(gpr)); + } } void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) { From 9881cb8503c23bddfa70af682685d9350c4f4272 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:41 +0000 Subject: [PATCH 13/52] [dynarmic] checked code alignment Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_interface.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index c65b582982..33acc46dfa 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -80,16 +80,16 @@ public: }; // TODO: Check code alignment - - const CodePtr current_code_ptr = [this] { + const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15)); + const CodePtr current_code_ptr = [this, aligned_code_ptr] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { jit_state.rsb_ptr = new_rsb_ptr; - return reinterpret_cast(jit_state.rsb_codeptrs[new_rsb_ptr]); + return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]); } - - return GetCurrentBlock(); + return aligned_code_ptr; + //return GetCurrentBlock(); }(); const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr); From 81e5d18f7911ccac507fa0b03f545fddd46276d3 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:31:12 +0000 Subject: [PATCH 14/52] [dynarmic] fix hardcoded AVX512 registers, use xmm0 instead of xmm16 to align with spec Signed-off-by: lizzie --- .../backend/x64/emit_x64_floating_point.cpp | 7 ++----- .../src/dynarmic/backend/x64/emit_x64_vector.cpp | 16 ++++++++-------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 47e51acb03..c7d1f8aa1c 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -92,13 +92,10 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t FpFixup::Norm_Src, FpFixup::Norm_Src, FpFixup::Norm_Src); - - const Xbyak::Xmm tmp = xmm16; + const Xbyak::Xmm tmp = xmm0; FCODE(vmovap)(tmp, code.BConst(xword, denormal_to_zero)); - - for (const Xbyak::Xmm& xmm : to_daz) { + for (const Xbyak::Xmm& xmm : to_daz) FCODE(vfixupimms)(xmm, xmm, tmp, u8(0)); - } return; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index 99000c2a57..c6b0e3b864 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vpxorq(right_shift, right_shift, right_shift); @@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm c = xmm16; + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); code.vpsraq(c, a, 32); code.vpsllq(a, a, 32); code.vpsraq(a, a, 32); @@ -5461,7 +5461,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm masked = xmm16; + const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(); code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); From f67a89e12f7e348deb9173206b5433b07998e809 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 10:33:12 +0000 Subject: [PATCH 15/52] [dynarmic] unconditional branches always take Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp | 4 +--- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 7 ++++--- .../dynarmic/frontend/A64/translate/impl/a64_branch.cpp | 9 ++++++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index d5d5f089ff..31b10ec6d5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); - lock(); - or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); + lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); SwitchMxcsrOnEntry(); jmp(ABI_PARAM2); @@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function rcp) { } xor_(eax, eax); - lock(); xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 919e75b77b..502a093d08 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -9,6 +9,7 @@ #include "dynarmic/backend/x64/reg_alloc.h" #include +#include #include #include @@ -152,19 +153,19 @@ bool Argument::GetImmediateU1() const noexcept { u8 Argument::GetImmediateU8() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100); + ASSERT(imm <= u64(std::numeric_limits::max())); return u8(imm); } u16 Argument::GetImmediateU16() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x10000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u16(imm); } u32 Argument::GetImmediateU32() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100000000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u32(imm); } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp index faf0686231..1bb0be823a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp @@ -20,9 +20,12 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) { bool TranslatorVisitor::B_uncond(Imm<26> imm26) { const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend(); const u64 target = ir.PC() + offset; - - //ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); - ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + // Pattern to halt execution (B .) + if (target == ir.PC()) { + ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + return false; + } + ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); return false; } From de45a0ac2054cf4fee3054792a691302ce59d2d3 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 14:46:35 +0000 Subject: [PATCH 16/52] [dynarmic] regalloc use scratchimpl that uses all instead of iteraiting Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 4 +-- .../src/dynarmic/backend/x64/reg_alloc.cpp | 30 ++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1691bbb3b7..fa1f071fbf 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { auto const opcode = inst.GetOpcode(); // Call the relevant Emit* member function. switch (opcode) { -#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch; +#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch; #define A32OPC(name, type, ...) -#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch; +#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch; #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 502a093d08..2db817a90f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -367,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def, if (result_def) { DefineValueImpl(result_def, ABI_RETURN); } - + for (size_t i = 0; i < args.size(); i++) { + if (args[i]) { + UseScratch(*args[i], args_hostloc[i]); + } else { + ScratchGpr(args_hostloc[i]); // TODO: Force spill + } + } + // Must match with with ScratchImpl + for (auto const gpr : other_caller_save) { + MoveOutOfTheWay(gpr); + LocInfo(gpr).WriteLock(); + } for (size_t i = 0; i < args.size(); i++) { if (args[i] && !args[i]->get().IsVoid()) { - UseScratch(*args[i], args_hostloc[i]); // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]); switch (args[i]->get().GetType()) { @@ -390,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def, } } } - - for (size_t i = 0; i < args.size(); i++) - if (!args[i]) { - // TODO: Force spill - ScratchGpr(args_hostloc[i]); - } - for (auto const caller_saved : other_caller_save) - ScratchImpl({caller_saved}); } void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { @@ -560,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept { } HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { -#if 0 // TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows // but it's fine anyways. We can find other ways to cheat it later - but which?!?! // we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end? // TODO(lizzie): This needs to be investigated further later. // Do not spill XMM into other XMM silly - if (!is_xmm) { + /*if (!is_xmm) { // TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY // Intel recommends to spill GPR onto XMM registers IF POSSIBLE // TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call? @@ -574,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) return loc; - } -#endif + }*/ + // TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits + // of spilling on XMM versus the potential cost of using XMM registers..... // Otherwise go to stack spilling for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) From d2f10fba10d5928301d0a8cb2f74217b208aacd8 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 18:29:39 +0000 Subject: [PATCH 17/52] [dynarmic] use better boost::visitor Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 1 - .../src/dynarmic/backend/x64/a32_emit_x64.cpp | 1 - .../src/dynarmic/backend/x64/emit_x64.cpp | 6 ++-- .../src/dynarmic/common/variant_util.h | 29 ------------------- 4 files changed, 3 insertions(+), 34 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/variant_util.h diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 5333973164..34cbc308be 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -63,7 +63,6 @@ add_library(dynarmic common/string_util.h common/u128.cpp common/u128.h - common/variant_util.h frontend/A32/a32_types.cpp frontend/A32/a32_types.h frontend/A64/a64_types.cpp diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index fb306336cf..3186758380 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -28,7 +28,6 @@ #include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 3bc93e6fd5..0bb2604025 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -11,6 +11,7 @@ #include #include "dynarmic/common/assert.h" +#include #include #include #include "dynarmic/common/common_types.h" @@ -21,7 +22,6 @@ #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/verbose_debugging_output.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de } void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - Common::VisitVariant(terminal, [this, initial_location, is_single_step](auto x) { + boost::apply_visitor([this, initial_location, is_single_step](auto x) { using T = std::decay_t; if constexpr (!std::is_same_v) { this->EmitTerminalImpl(x, initial_location, is_single_step); } else { ASSERT_MSG(false, "Invalid terminal"); } - }); + }, terminal); } void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { diff --git a/src/dynarmic/src/dynarmic/common/variant_util.h b/src/dynarmic/src/dynarmic/common/variant_util.h deleted file mode 100644 index 4dd7f67167..0000000000 --- a/src/dynarmic/src/dynarmic/common/variant_util.h +++ /dev/null @@ -1,29 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include - -namespace Dynarmic::Common { -namespace detail { - -template -struct VariantVisitor : boost::static_visitor - , Lambda { - VariantVisitor(Lambda&& lambda) - : Lambda(std::move(lambda)) {} - - using Lambda::operator(); -}; - -} // namespace detail - -template -inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) { - return boost::apply_visitor(detail::VariantVisitor(std::move(lambda)), variant); -} - -} // namespace Dynarmic::Common From 5fa48b8ec8a32802676a438f4022db067fc55977 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 16 Sep 2025 20:20:21 +0000 Subject: [PATCH 18/52] [dynarmic] fix exception posix handler Signed-off-by: lizzie --- .../backend/exception_handler_posix.cpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index c95cbbb74e..f1f208179f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -7,7 +7,10 @@ */ #include +#include +#include #include +#include #include #include #include "dynarmic/backend/exception_handler.h" @@ -115,11 +118,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { CTX_DECLARE(raw_context); #if defined(ARCHITECTURE_x86_64) { - std::lock_guard guard(sig_handler->code_block_infos_mutex); - - const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); - if (iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->cb(CTX_RIP); + std::shared_lock guard(sig_handler->code_block_infos_mutex); + if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->second.cb(CTX_RIP); CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; @@ -129,10 +130,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) { - std::lock_guard guard(sig_handler->code_block_infos_mutex); - const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); - if (iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->cb(CTX_PC); + std::shared_lock guard(sig_handler->code_block_infos_mutex); + if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->second.cb(CTX_PC); CTX_PC = fc.call_pc; return; } @@ -187,15 +187,15 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(ARCHITECTURE_x86_64) +#if defined(MCL_ARCHITECTURE_X86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { impl = std::make_unique(mcl::bit_cast(code.getCode()), code.GetTotalCodeSize()); } -#elif defined(ARCHITECTURE_arm64) +#elif defined(MCL_ARCHITECTURE_ARM64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } -#elif defined(ARCHITECTURE_riscv64) +#elif defined(MCL_ARCHITECTURE_RISCV) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } From 2becd9e48882c30e6ca3dc551d7105459ab41ebc Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 17 Sep 2025 20:51:37 +0000 Subject: [PATCH 19/52] [dynarmic] Allow to skip verification pass Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/interface/optimization_flags.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dynarmic/src/dynarmic/interface/optimization_flags.h b/src/dynarmic/src/dynarmic/interface/optimization_flags.h index 743d902767..2e8822b27a 100644 --- a/src/dynarmic/src/dynarmic/interface/optimization_flags.h +++ b/src/dynarmic/src/dynarmic/interface/optimization_flags.h @@ -34,6 +34,8 @@ enum class OptimizationFlag : std::uint32_t { MiscIROpt = 0x00000020, /// Optimize for code speed rather than for code size (this serves well for tight loops) CodeSpeed = 0x00000040, + /// Disable verification passes + DisableVerification = 0x00000080, /// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations. /// This unfuses fused instructions to improve performance on host CPUs without FMA support. From e44ff3ba68dbed2cb433b6691e94710dd9a610f6 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 17 Sep 2025 21:27:23 +0000 Subject: [PATCH 20/52] [dynarmic] inlined pool in block + slab-like for each block Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/ir/basic_block.cpp | 16 +++++++++++++++- src/dynarmic/src/dynarmic/ir/basic_block.h | 8 ++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index a13a7ebfc9..612a3d28e9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -37,7 +37,21 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new IR::Inst(opcode); + // First try using the "inline" buffer, otherwise fallback to a slower slab-like allocation scheme + // purpouse is to avoid many calls to new/delete which invoke malloc which invokes mmap + // just pool it!!! - reason why there is an inline buffer is because many small blocks are created + // with few instructions due to subpar optimisations on other passes... plus branch-heavy code will + // hugely benefit from the coherency of faster allocations... + IR::Inst* inst; + if (inlined_inst.size() < inlined_inst.max_size()) { + inst = &inlined_inst[inlined_inst.size()]; + inlined_inst.emplace_back(opcode); + } else { + if (pooled_inst.empty() || pooled_inst.back().size() == pooled_inst.back().max_size()) + pooled_inst.emplace_back(); + inst = &pooled_inst.back()[pooled_inst.back().size()]; + pooled_inst.back().emplace_back(opcode); + } DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index 2f2d9ab6de..166a5e4d1b 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -13,6 +13,9 @@ #include #include +#include +#include +#include #include #include "dynarmic/common/common_types.h" @@ -163,8 +166,12 @@ public: return cycle_count; } private: + /// "Hot cache" for small blocks so we don't call global allocator + boost::container::static_vector inlined_inst; /// List of instructions in this block. instruction_list_type instructions; + /// "Long/far" memory pool + boost::container::stable_vector> pooled_inst; /// Block to execute next if `cond` did not pass. std::optional cond_failed = {}; /// Description of the starting location of this block @@ -180,6 +187,7 @@ private: /// Number of cycles this block takes to execute. size_t cycle_count = 0; }; +static_assert(sizeof(Block) == 2048); /// Returns a string representation of the contents of block. Intended for debugging. std::string DumpBlock(const IR::Block& block) noexcept; From de588465b60b10ec5a6167b61d3befd9bf827d61 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 20 Sep 2025 19:14:05 +0000 Subject: [PATCH 21/52] [dynarmic] increase cache code size Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 1 + src/core/arm/dynarmic/arm_dynarmic_64.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index b74817a611..b77e4f613d 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -211,6 +211,7 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.enable_cycle_counting = !m_uses_wall_clock; // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB + // Solaris doesn't support kPageSize >= 512MiB config.code_cache_size = std::uint32_t(128_MiB); // Allow memory fault handling to work diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 682b0cfbf6..eb328b919d 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -270,6 +270,7 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.enable_cycle_counting = !m_uses_wall_clock; // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB + // Solaris doesn't support kPageSize >= 512MiB config.code_cache_size = std::uint32_t(128_MiB); // Allow memory fault handling to work From 23ab89636c4e810ff3462d753c07fcf6ae211f9e Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 23 Sep 2025 01:15:51 +0000 Subject: [PATCH 22/52] Fix license headers Signed-off-by: lizzie --- .../src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp | 3 +++ src/dynarmic/src/dynarmic/interface/optimization_flags.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp index 1bb0be823a..41dd8cb4d4 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/interface/optimization_flags.h b/src/dynarmic/src/dynarmic/interface/optimization_flags.h index 2e8822b27a..9e58197b47 100644 --- a/src/dynarmic/src/dynarmic/interface/optimization_flags.h +++ b/src/dynarmic/src/dynarmic/interface/optimization_flags.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2020 MerryMage * SPDX-License-Identifier: 0BSD From 2407dfbcb0a35a126893144e59966eb9e819bfc0 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 23 Sep 2025 03:18:07 +0000 Subject: [PATCH 23/52] [dynarmic] enforce higher constraints Signed-off-by: lizzie --- .../backend/x64/emit_x64_memory.cpp.inc | 93 +++++++++---------- 1 file changed, 43 insertions(+), 50 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 34f77b0446..28699fa33b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -273,34 +273,31 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); - if constexpr (bitsize != 128) { - ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]); - } else { + if constexpr (bitsize == 128) { ctx.reg_alloc.Use(args[1], ABI_PARAM2); ctx.reg_alloc.Use(args[2], HostLoc::XMM1); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(inst); + } else { + ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]); } + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); Xbyak::Label end; - code.mov(code.ABI_RETURN, u32(1)); - code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); + code.test(tmp.cvt8(), tmp.cvt8()); code.je(end); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.xor_(tmp.cvt32(), tmp.cvt32()); + code.xchg(tmp.cvt8(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); + code.mov(code.ABI_PARAM1, u64(&conf)); if constexpr (bitsize != 128) { using T = mcl::unsigned_integer_of_size; - - code.CallLambda( - [](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](T expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) - ? 0 - : 1; - }); + code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, [&](T expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) ? 0 : 1; + }); if (ordered) { code.mfence(); } @@ -308,15 +305,11 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.movaps(xword[code.ABI_PARAM3], xmm1); - code.CallLambda( - [](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](Vector expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) - ? 0 - : 1; - }); + code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, [&](Vector expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) ? 0 : 1; + }); if (ordered) { code.mfence(); } @@ -437,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i SharedLabel end = GenSharedLabel(); - code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.mov(status, u32(1)); - code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); + code.test(tmp.cvt8(), tmp.cvt8()); code.je(*end, code.T_NEAR); + code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.cmp(qword[tmp], vaddr); code.jne(*end, code.T_NEAR); @@ -474,30 +468,29 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const auto location = code.getCurr(); - if constexpr (bitsize == 128) { + switch (bitsize) { + case 8: + code.lock(); + code.cmpxchg(code.byte[dest_ptr], value.cvt8()); + break; + case 16: + code.lock(); + code.cmpxchg(word[dest_ptr], value.cvt16()); + break; + case 32: + code.lock(); + code.cmpxchg(dword[dest_ptr], value.cvt32()); + break; + case 64: + code.lock(); + code.cmpxchg(qword[dest_ptr], value.cvt64()); + break; + case 128: code.lock(); code.cmpxchg16b(ptr[dest_ptr]); - } else { - switch (bitsize) { - case 8: - code.lock(); - code.cmpxchg(code.byte[dest_ptr], value.cvt8()); - break; - case 16: - code.lock(); - code.cmpxchg(word[dest_ptr], value.cvt16()); - break; - case 32: - code.lock(); - code.cmpxchg(dword[dest_ptr], value.cvt32()); - break; - case 64: - code.lock(); - code.cmpxchg(qword[dest_ptr], value.cvt64()); - break; - default: - UNREACHABLE(); - } + break; + default: + UNREACHABLE(); } code.setnz(status.cvt8()); From bf302d7917f2bd4b2638ac61a751fedd5727c7a6 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 18:40:29 +0200 Subject: [PATCH 24/52] [common] No need to specify min/max for settings; fix crash when OOB value is given for some settings (#2609) This fixes issues when migrating settings that refer to invalid filters/scales. For example if we had 5 filters, but we set filter=6, the program would crash. This also makes so specifying min/max manually isn't needed (but can still be set for cases like NCE). Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2609 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- src/common/settings.h | 41 ++----- src/common/settings_enums.h | 120 ++++++++------------- src/common/settings_setting.h | 67 ++++++------ src/yuzu/configuration/configure_audio.cpp | 6 +- 4 files changed, 84 insertions(+), 150 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index 8605445837..891bde608c 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -178,7 +178,7 @@ struct Values { SwitchableSetting audio_input_device_id{ linkage, "auto", "input_device", Category::Audio, Specialization::RuntimeList}; SwitchableSetting sound_index{ - linkage, AudioMode::Stereo, AudioMode::Mono, AudioMode::Surround, + linkage, AudioMode::Stereo, "sound_index", Category::SystemAudio, Specialization::Default, true, true}; SwitchableSetting volume{linkage, @@ -199,8 +199,6 @@ struct Values { SwitchableSetting use_multi_core{linkage, true, "use_multi_core", Category::Core}; SwitchableSetting memory_layout_mode{linkage, MemoryLayout::Memory_4Gb, - MemoryLayout::Memory_4Gb, - MemoryLayout::Memory_12Gb, "memory_layout_mode", Category::Core, Specialization::Default, @@ -240,9 +238,8 @@ struct Values { #endif "cpu_backend", Category::Cpu}; - SwitchableSetting cpu_accuracy{linkage, CpuAccuracy::Auto, - CpuAccuracy::Auto, CpuAccuracy::Paranoid, - "cpu_accuracy", Category::Cpu}; + SwitchableSetting cpu_accuracy{linkage, CpuAccuracy::Auto, + "cpu_accuracy", Category::Cpu}; SwitchableSetting use_fast_cpu_time{linkage, false, @@ -324,10 +321,10 @@ struct Values { // Renderer SwitchableSetting renderer_backend{ - linkage, RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, + linkage, RendererBackend::Vulkan, "backend", Category::Renderer}; SwitchableSetting shader_backend{ - linkage, ShaderBackend::SpirV, ShaderBackend::Glsl, ShaderBackend::SpirV, + linkage, ShaderBackend::SpirV, "shader_backend", Category::Renderer, Specialization::RuntimeList}; SwitchableSetting vulkan_device{linkage, 0, "vulkan_device", Category::Renderer, Specialization::RuntimeList}; @@ -342,8 +339,6 @@ struct Values { Category::Renderer}; SwitchableSetting optimize_spirv_output{linkage, SpirvOptimizeMode::Never, - SpirvOptimizeMode::Never, - SpirvOptimizeMode::Always, "optimize_spirv_output", Category::Renderer}; SwitchableSetting use_asynchronous_gpu_emulation{ @@ -354,12 +349,10 @@ struct Values { #else AstcDecodeMode::Gpu, #endif - AstcDecodeMode::Cpu, - AstcDecodeMode::CpuAsynchronous, "accelerate_astc", Category::Renderer}; SwitchableSetting vsync_mode{ - linkage, VSyncMode::Fifo, VSyncMode::Immediate, VSyncMode::FifoRelaxed, + linkage, VSyncMode::Fifo, "use_vsync", Category::Renderer, Specialization::RuntimeList, true, true}; SwitchableSetting nvdec_emulation{linkage, NvdecEmulation::Gpu, @@ -372,8 +365,6 @@ struct Values { #else FullscreenMode::Exclusive, #endif - FullscreenMode::Borderless, - FullscreenMode::Exclusive, "fullscreen_mode", Category::Renderer, Specialization::Default, @@ -381,8 +372,6 @@ struct Values { true}; SwitchableSetting aspect_ratio{linkage, AspectRatio::R16_9, - AspectRatio::R16_9, - AspectRatio::Stretch, "aspect_ratio", Category::Renderer, Specialization::Default, @@ -430,8 +419,6 @@ struct Values { #else GpuAccuracy::High, #endif - GpuAccuracy::Normal, - GpuAccuracy::Extreme, "gpu_accuracy", Category::RendererAdvanced, Specialization::Default, @@ -442,8 +429,6 @@ struct Values { SwitchableSetting dma_accuracy{linkage, DmaAccuracy::Default, - DmaAccuracy::Default, - DmaAccuracy::Safe, "dma_accuracy", Category::RendererAdvanced, Specialization::Default, @@ -456,20 +441,14 @@ struct Values { #else AnisotropyMode::Automatic, #endif - AnisotropyMode::Automatic, - AnisotropyMode::X16, "max_anisotropy", Category::RendererAdvanced}; SwitchableSetting astc_recompression{linkage, AstcRecompression::Uncompressed, - AstcRecompression::Uncompressed, - AstcRecompression::Bc3, "astc_recompression", Category::RendererAdvanced}; SwitchableSetting vram_usage_mode{linkage, VramUsageMode::Conservative, - VramUsageMode::Conservative, - VramUsageMode::Aggressive, "vram_usage_mode", Category::RendererAdvanced}; SwitchableSetting skip_cpu_inner_invalidation{linkage, @@ -595,14 +574,10 @@ struct Values { // System SwitchableSetting language_index{linkage, Language::EnglishAmerican, - Language::Japanese, - Language::Serbian, "language_index", Category::System}; - SwitchableSetting region_index{linkage, Region::Usa, Region::Japan, - Region::Taiwan, "region_index", Category::System}; - SwitchableSetting time_zone_index{linkage, TimeZone::Auto, - TimeZone::Auto, TimeZone::Zulu, + SwitchableSetting region_index{linkage, Region::Usa, "region_index", Category::System}; + SwitchableSetting time_zone_index{linkage, TimeZone::Auto, "time_zone_index", Category::System}; // Measured in seconds since epoch SwitchableSetting custom_rtc_enabled{ diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index ebfa4ceb9e..0e5a08d845 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -10,6 +10,7 @@ #pragma once #include +#include #include #include #include "common/common_types.h" @@ -18,8 +19,10 @@ namespace Settings { template struct EnumMetadata { - static std::vector> Canonicalizations(); + static std::vector> Canonicalizations(); static u32 Index(); + static constexpr T GetFirst(); + static constexpr T GetLast(); }; #define PAIR_45(N, X, ...) {#X, N::X} __VA_OPT__(, PAIR_46(N, __VA_ARGS__)) @@ -69,138 +72,101 @@ struct EnumMetadata { #define PAIR_1(N, X, ...) {#X, N::X} __VA_OPT__(, PAIR_2(N, __VA_ARGS__)) #define PAIR(N, X, ...) {#X, N::X} __VA_OPT__(, PAIR_1(N, __VA_ARGS__)) -#define ENUM(NAME, ...) \ - enum class NAME : u32 { __VA_ARGS__ }; \ - template <> \ - inline std::vector> EnumMetadata::Canonicalizations() { \ - return {PAIR(NAME, __VA_ARGS__)}; \ - } \ - template <> \ - inline u32 EnumMetadata::Index() { \ - return __COUNTER__; \ +#define PP_HEAD(A, ...) A + +#define ENUM(NAME, ...) \ + enum class NAME : u32 { __VA_ARGS__ }; \ + template<> inline std::vector> EnumMetadata::Canonicalizations() { \ + return {PAIR(NAME, __VA_ARGS__)}; \ + } \ + template<> inline u32 EnumMetadata::Index() { \ + return __COUNTER__; \ + } \ + template<> inline constexpr NAME EnumMetadata::GetFirst() { \ + return NAME::PP_HEAD(__VA_ARGS__); \ + } \ + template<> inline constexpr NAME EnumMetadata::GetLast() { \ + return (std::vector>{PAIR(NAME, __VA_ARGS__)}).back().second; \ } // AudioEngine must be specified discretely due to having existing but slightly different // canonicalizations // TODO (lat9nq): Remove explicit definition of AudioEngine/sink_id -enum class AudioEngine : u32 { - Auto, - Cubeb, - Sdl2, - Null, - Oboe, -}; - -template <> -inline std::vector> -EnumMetadata::Canonicalizations() { +enum class AudioEngine : u32 { Auto, Cubeb, Sdl2, Null, Oboe, }; +template<> +inline std::vector> EnumMetadata::Canonicalizations() { return { {"auto", AudioEngine::Auto}, {"cubeb", AudioEngine::Cubeb}, {"sdl2", AudioEngine::Sdl2}, {"null", AudioEngine::Null}, {"oboe", AudioEngine::Oboe}, }; } - -template <> +/// @brief This is just a sufficiently large number that is more than the number of other enums declared here +template<> inline u32 EnumMetadata::Index() { - // This is just a sufficiently large number that is more than the number of other enums declared - // here return 100; } +template<> +inline constexpr AudioEngine EnumMetadata::GetFirst() { + return AudioEngine::Auto; +} +template<> +inline constexpr AudioEngine EnumMetadata::GetLast() { + return AudioEngine::Oboe; +} ENUM(AudioMode, Mono, Stereo, Surround); +static_assert(EnumMetadata::GetFirst() == AudioMode::Mono); +static_assert(EnumMetadata::GetLast() == AudioMode::Surround); ENUM(Language, Japanese, EnglishAmerican, French, German, Italian, Spanish, Chinese, Korean, Dutch, Portuguese, Russian, Taiwanese, EnglishBritish, FrenchCanadian, SpanishLatin, ChineseSimplified, ChineseTraditional, PortugueseBrazilian, Serbian); - ENUM(Region, Japan, Usa, Europe, Australia, China, Korea, Taiwan); - ENUM(TimeZone, Auto, Default, Cet, Cst6Cdt, Cuba, Eet, Egypt, Eire, Est, Est5Edt, Gb, GbEire, Gmt, - GmtPlusZero, GmtMinusZero, GmtZero, Greenwich, Hongkong, Hst, Iceland, Iran, Israel, Jamaica, - Japan, Kwajalein, Libya, Met, Mst, Mst7Mdt, Navajo, Nz, NzChat, Poland, Portugal, Prc, Pst8Pdt, - Roc, Rok, Singapore, Turkey, Uct, Universal, Utc, WSu, Wet, Zulu); - + GmtPlusZero, GmtMinusZero, GmtZero, Greenwich, Hongkong, Hst, Iceland, Iran, Israel, Jamaica, + Japan, Kwajalein, Libya, Met, Mst, Mst7Mdt, Navajo, Nz, NzChat, Poland, Portugal, Prc, Pst8Pdt, + Roc, Rok, Singapore, Turkey, Uct, Universal, Utc, WSu, Wet, Zulu); ENUM(AnisotropyMode, Automatic, Default, X2, X4, X8, X16); - ENUM(AstcDecodeMode, Cpu, Gpu, CpuAsynchronous); - ENUM(AstcRecompression, Uncompressed, Bc1, Bc3); - ENUM(VSyncMode, Immediate, Mailbox, Fifo, FifoRelaxed); - ENUM(VramUsageMode, Conservative, Aggressive); - ENUM(RendererBackend, OpenGL, Vulkan, Null); - ENUM(ShaderBackend, Glsl, Glasm, SpirV); - ENUM(GpuAccuracy, Normal, High, Extreme); - ENUM(DmaAccuracy, Default, Unsafe, Safe); - ENUM(CpuBackend, Dynarmic, Nce); - ENUM(CpuAccuracy, Auto, Accurate, Unsafe, Paranoid); - ENUM(CpuClock, Boost, Fast) - ENUM(MemoryLayout, Memory_4Gb, Memory_6Gb, Memory_8Gb, Memory_10Gb, Memory_12Gb); - ENUM(ConfirmStop, Ask_Always, Ask_Based_On_Game, Ask_Never); - ENUM(FullscreenMode, Borderless, Exclusive); - ENUM(NvdecEmulation, Off, Cpu, Gpu); - -ENUM(ResolutionSetup, - Res1_4X, - Res1_2X, - Res3_4X, - Res1X, - Res3_2X, - Res2X, - Res3X, - Res4X, - Res5X, - Res6X, - Res7X, - Res8X); - +ENUM(ResolutionSetup, Res1_4X, Res1_2X, Res3_4X, Res1X, Res3_2X, Res2X, Res3X, Res4X, Res5X, Res6X, Res7X, Res8X); ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Spline1, Gaussian, Lanczos, ScaleForce, Fsr, Area, MaxEnum); - ENUM(AntiAliasing, None, Fxaa, Smaa, MaxEnum); - ENUM(AspectRatio, R16_9, R4_3, R21_9, R16_10, Stretch); - ENUM(ConsoleMode, Handheld, Docked); - ENUM(AppletMode, HLE, LLE); - ENUM(SpirvOptimizeMode, Never, OnLoad, Always); - ENUM(GpuOverclock, Low, Medium, High) - ENUM(TemperatureUnits, Celsius, Fahrenheit) template -inline std::string CanonicalizeEnum(Type id) { +inline std::string_view CanonicalizeEnum(Type id) { const auto group = EnumMetadata::Canonicalizations(); - for (auto& [name, value] : group) { - if (value == id) { + for (auto& [name, value] : group) + if (value == id) return name; - } - } return "unknown"; } template inline Type ToEnum(const std::string& canonicalization) { const auto group = EnumMetadata::Canonicalizations(); - for (auto& [name, value] : group) { - if (name == canonicalization) { + for (auto& [name, value] : group) + if (name == canonicalization) return value; - } - } return {}; } } // namespace Settings diff --git a/src/common/settings_setting.h b/src/common/settings_setting.h index 0aba2e11c9..a7e6bb6168 100644 --- a/src/common/settings_setting.h +++ b/src/common/settings_setting.h @@ -72,10 +72,17 @@ public: u32 specialization_ = Specialization::Default, bool save_ = true, bool runtime_modifiable_ = false, BasicSetting* other_setting_ = nullptr) requires(ranged) - : BasicSetting(linkage, name, category_, save_, runtime_modifiable_, specialization_, - other_setting_), + : BasicSetting(linkage, name, category_, save_, runtime_modifiable_, specialization_, other_setting_), value{default_val}, default_value{default_val}, maximum{max_val}, minimum{min_val} {} + explicit Setting(Linkage& linkage, const Type& default_val, + const std::string& name, Category category_, + u32 specialization_ = Specialization::Default, bool save_ = true, + bool runtime_modifiable_ = false, BasicSetting* other_setting_ = nullptr) + requires(ranged && std::is_enum_v) + : BasicSetting(linkage, name, category_, save_, runtime_modifiable_, specialization_, other_setting_), + value{default_val}, default_value{default_val}, maximum{EnumMetadata::GetLast()}, minimum{EnumMetadata::GetFirst()} {} + /** * Returns a reference to the setting's value. * @@ -119,9 +126,6 @@ protected: return value_.has_value() ? std::to_string(*value_) : "none"; } else if constexpr (std::is_same_v) { return value_ ? "true" : "false"; - } else if constexpr (std::is_same_v) { - // Compatibility with old AudioEngine setting being a string - return CanonicalizeEnum(value_); } else if constexpr (std::is_floating_point_v) { return fmt::format("{:f}", value_); } else if constexpr (std::is_enum_v) { @@ -207,7 +211,7 @@ public: [[nodiscard]] std::string Canonicalize() const override final { if constexpr (std::is_enum_v) { - return CanonicalizeEnum(this->GetValue()); + return std::string{CanonicalizeEnum(this->GetValue())}; } else { return ToString(this->GetValue()); } @@ -288,41 +292,32 @@ public: * @param other_setting_ A second Setting to associate to this one in metadata */ template - explicit SwitchableSetting(Linkage& linkage, const Type& default_val, const std::string& name, - Category category_, u32 specialization_ = Specialization::Default, - bool save_ = true, bool runtime_modifiable_ = false, - typename std::enable_if::type other_setting_ = nullptr) - : Setting{ - linkage, default_val, name, category_, specialization_, - save_, runtime_modifiable_, other_setting_} { + explicit SwitchableSetting(Linkage& linkage, const Type& default_val, const std::string& name, Category category_, u32 specialization_ = Specialization::Default, bool save_ = true, bool runtime_modifiable_ = false, T* other_setting_ = nullptr) requires(!ranged) + : Setting{ linkage, default_val, name, category_, specialization_, save_, runtime_modifiable_, other_setting_} { linkage.restore_functions.emplace_back([this]() { this->SetGlobal(true); }); } virtual ~SwitchableSetting() = default; - /** - * Sets a default value, minimum value, maximum value, and label. - * - * @param linkage Setting registry - * @param default_val Initial value of the setting, and default value of the setting - * @param min_val Sets the minimum allowed value of the setting - * @param max_val Sets the maximum allowed value of the setting - * @param name Label for the setting - * @param category_ Category of the setting AKA INI group - * @param specialization_ Suggestion for how frontend implementations represent this in a config - * @param save_ Suggests that this should or should not be saved to a frontend config file - * @param runtime_modifiable_ Suggests whether this is modifiable while a guest is loaded - * @param other_setting_ A second Setting to associate to this one in metadata - */ + /// @brief Sets a default value, minimum value, maximum value, and label. + /// @param linkage Setting registry + /// @param default_val Initial value of the setting, and default value of the setting + /// @param min_val Sets the minimum allowed value of the setting + /// @param max_val Sets the maximum allowed value of the setting + /// @param name Label for the setting + /// @param category_ Category of the setting AKA INI group + /// @param specialization_ Suggestion for how frontend implementations represent this in a config + /// @param save_ Suggests that this should or should not be saved to a frontend config file + /// @param runtime_modifiable_ Suggests whether this is modifiable while a guest is loaded + /// @param other_setting_ A second Setting to associate to this one in metadata template - explicit SwitchableSetting(Linkage& linkage, const Type& default_val, const Type& min_val, - const Type& max_val, const std::string& name, Category category_, - u32 specialization_ = Specialization::Default, bool save_ = true, - bool runtime_modifiable_ = false, - typename std::enable_if::type other_setting_ = nullptr) - : Setting{linkage, default_val, min_val, - max_val, name, category_, - specialization_, save_, runtime_modifiable_, - other_setting_} { + explicit SwitchableSetting(Linkage& linkage, const Type& default_val, const Type& min_val, const Type& max_val, const std::string& name, Category category_, u32 specialization_ = Specialization::Default, bool save_ = true, bool runtime_modifiable_ = false, T* other_setting_ = nullptr) requires(ranged) + : Setting{linkage, default_val, min_val, max_val, name, category_, specialization_, save_, runtime_modifiable_, other_setting_} { + linkage.restore_functions.emplace_back([this]() { this->SetGlobal(true); }); + } + + template + explicit SwitchableSetting(Linkage& linkage, const Type& default_val, const std::string& name, Category category_, u32 specialization_ = Specialization::Default, bool save_ = true, bool runtime_modifiable_ = false, T* other_setting_ = nullptr) requires(ranged) + : Setting{linkage, default_val, EnumMetadata::GetFirst(), EnumMetadata::GetLast(), name, category_, specialization_, save_, runtime_modifiable_, other_setting_} { linkage.restore_functions.emplace_back([this]() { this->SetGlobal(true); }); } diff --git a/src/yuzu/configuration/configure_audio.cpp b/src/yuzu/configuration/configure_audio.cpp index a7ebae91f8..af81ef552e 100644 --- a/src/yuzu/configuration/configure_audio.cpp +++ b/src/yuzu/configuration/configure_audio.cpp @@ -270,10 +270,8 @@ void ConfigureAudio::UpdateAudioDevices(int sink_index) { void ConfigureAudio::InitializeAudioSinkComboBox() { sink_combo_box->clear(); sink_combo_box->addItem(QString::fromUtf8(AudioCore::Sink::auto_device_name)); - - for (const auto& id : AudioCore::Sink::GetSinkIDs()) { - sink_combo_box->addItem(QString::fromStdString(Settings::CanonicalizeEnum(id))); - } + for (const auto& id : AudioCore::Sink::GetSinkIDs()) + sink_combo_box->addItem(QString::fromStdString(std::string{Settings::CanonicalizeEnum(id)})); } void ConfigureAudio::RetranslateUI() { From ecb811ad04e75b221e0eee31f6bbd582620bd7c7 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 18:41:28 +0200 Subject: [PATCH 25/52] [qt] move addons row to rightmost side (#2610) This is because the rightmost row is "extended" to the rest of the table, and add-ons have long names, play time doesn't need that much space. Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2610 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- src/yuzu/game_list.h | 4 ++-- src/yuzu/game_list_worker.cpp | 36 ++++++++++++----------------------- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index 94e7b2dc42..cd71fb2139 100644 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h @@ -58,11 +58,11 @@ class GameList : public QWidget { public: enum { COLUMN_NAME, - COLUMN_COMPATIBILITY, - COLUMN_ADD_ONS, COLUMN_FILE_TYPE, COLUMN_SIZE, COLUMN_PLAY_TIME, + COLUMN_ADD_ONS, + COLUMN_COMPATIBILITY, COLUMN_COUNT, // Number of columns }; diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp index 538c7ab822..2914c275a8 100644 --- a/src/yuzu/game_list_worker.cpp +++ b/src/yuzu/game_list_worker.cpp @@ -204,36 +204,24 @@ QList MakeGameListEntry(const std::string& path, const PlayTime::PlayTimeManager& play_time_manager, const FileSys::PatchManager& patch) { - const auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); + auto const it = FindMatchingCompatibilityEntry(compatibility_list, program_id); + // The game list uses 99 as compatibility number for untested games + QString compatibility = it != compatibility_list.end() ? it->second.first : QStringLiteral("99"); - // The game list uses this as compatibility number for untested games - QString compatibility{QStringLiteral("99")}; - if (it != compatibility_list.end()) { - compatibility = it->second.first; - } + auto const file_type = loader.GetFileType(); + auto const file_type_string = QString::fromStdString(Loader::GetFileTypeString(file_type)); - const auto file_type = loader.GetFileType(); - const auto file_type_string = QString::fromStdString(Loader::GetFileTypeString(file_type)); - - QList list{ - new GameListItemPath(FormatGameName(path), icon, QString::fromStdString(name), - file_type_string, program_id), - new GameListItemCompat(compatibility), + QString patch_versions = GetGameListCachedObject(fmt::format("{:016X}", patch.GetTitleID()), "pv.txt", [&patch, &loader] { + return FormatPatchNameVersions(patch, loader, loader.IsRomFSUpdatable()); + }); + return QList{ + new GameListItemPath(FormatGameName(path), icon, QString::fromStdString(name), file_type_string, program_id), new GameListItem(file_type_string), new GameListItemSize(size), new GameListItemPlayTime(play_time_manager.GetPlayTime(program_id)), + new GameListItem(patch_versions), + new GameListItemCompat(compatibility), }; - - QString patch_versions; - - patch_versions = GetGameListCachedObject( - fmt::format("{:016X}", patch.GetTitleID()), "pv.txt", [&patch, &loader] { - return FormatPatchNameVersions(patch, loader, loader.IsRomFSUpdatable()); - }); - - list.insert(2, new GameListItem(patch_versions)); - - return list; } } // Anonymous namespace From 50ceb9a43a6dfe417a7760042e4710ce59553d94 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Mon, 29 Sep 2025 18:42:04 +0200 Subject: [PATCH 26/52] [.ci] install-msvc: fix installation on MSVC (#2611) * changed from Build Tools to Community (congrats Microsoft very cool) * add spining to show it didnt stopped installing Signed-off-by: Caio Oliveira Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2611 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: Caio Oliveira Co-committed-by: Caio Oliveira --- .ci/windows/install-msvc.ps1 | 40 ++++++++++++++++++++++++++---------- docs/Deps.md | 6 +++--- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/.ci/windows/install-msvc.ps1 b/.ci/windows/install-msvc.ps1 index b88f727ed8..788b2848ad 100755 --- a/.ci/windows/install-msvc.ps1 +++ b/.ci/windows/install-msvc.ps1 @@ -10,7 +10,7 @@ if (-not ([bool](net session 2>$null))) { } $VSVer = "17" -$ExeFile = "vs_BuildTools.exe" +$ExeFile = "vs_community.exe" $Uri = "https://aka.ms/vs/$VSVer/release/$ExeFile" $Destination = "./$ExeFile" @@ -19,21 +19,39 @@ $WebClient = New-Object System.Net.WebClient $WebClient.DownloadFile($Uri, $Destination) Write-Host "Finished downloading $ExeFile" -$VSROOT = "C:/VSBuildTools/$VSVer" $Arguments = @( - "--installPath `"$VSROOT`"", # set custom installation path - "--quiet", # suppress UI - "--wait", # wait for installation to complete - "--norestart", # prevent automatic restart - "--add Microsoft.VisualStudio.Workload.VCTools", # add C++ build tools workload - "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64", # add core x86/x64 C++ tools - "--add Microsoft.VisualStudio.Component.Windows10SDK.19041" # add specific Windows SDK + "--quiet", # Suppress installer UI + "--wait", # Wait for installation to complete + "--norestart", # Prevent automatic restart + "--force", # Force installation even if components are already installed + "--add Microsoft.VisualStudio.Workload.NativeDesktop", # Desktop development with C++ + "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64", # Core C++ compiler/tools for x86/x64 + "--add Microsoft.VisualStudio.Component.Windows11SDK.26100",# Windows 11 SDK (26100) + "--add Microsoft.VisualStudio.Component.Windows10SDK.19041",# Windows 10 SDK (19041) + "--add Microsoft.VisualStudio.Component.VC.Llvm.Clang", # LLVM Clang compiler + "--add Microsoft.VisualStudio.Component.VC.Llvm.ClangToolset", # LLVM Clang integration toolset + "--add Microsoft.VisualStudio.Component.Windows11SDK.22621",# Windows 11 SDK (22621) + "--add Microsoft.VisualStudio.Component.VC.CMake.Project", # CMake project support + "--add Microsoft.VisualStudio.ComponentGroup.VC.Tools.142.x86.x64", # VC++ 14.2 toolset + "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Llvm.Clang" # LLVM Clang for native desktop ) Write-Host "Installing Visual Studio Build Tools" -$InstallProcess = Start-Process -FilePath $Destination -NoNewWindow -PassThru -Wait -ArgumentList $Arguments -$ExitCode = $InstallProcess.ExitCode +$InstallProcess = Start-Process -FilePath $Destination -NoNewWindow -PassThru -ArgumentList $Arguments +# Spinner while installing +$Spinner = "|/-\" +$i = 0 +while (-not $InstallProcess.HasExited) { + Write-Host -NoNewline ("`rInstalling... " + $Spinner[$i % $Spinner.Length]) + Start-Sleep -Milliseconds 250 + $i++ +} + +# Clear spinner line +Write-Host "`rSetup completed! " + +$ExitCode = $InstallProcess.ExitCode if ($ExitCode -ne 0) { Write-Host "Error installing Visual Studio Build Tools (Error: $ExitCode)" Exit $ExitCode diff --git a/docs/Deps.md b/docs/Deps.md index cfc6f0365b..0e7b7cff62 100644 --- a/docs/Deps.md +++ b/docs/Deps.md @@ -4,8 +4,8 @@ To build Eden, you MUST have a C++ compiler. * On Linux, this is usually [GCC](https://gcc.gnu.org/) 11+ or [Clang](https://clang.llvm.org/) v14+ - GCC 12 also requires Clang 14+ * On Windows, this is either: - - **[MSVC](https://visualstudio.microsoft.com/downloads/)**, - * *A convenience script to install the **minimal** version (Visual Build Tools) is provided in `.ci/windows/install-msvc.ps1`* + - **[MSVC](https://visualstudio.microsoft.com/downloads/)** (you should select *Community* option), + * *A convenience script to install the Visual Community Studio 2022 with necessary tools is provided in `.ci/windows/install-msvc.ps1`* - clang-cl - can be downloaded from the MSVC installer, - or **[MSYS2](https://www.msys2.org)** * On macOS, this is Apple Clang @@ -211,4 +211,4 @@ Then install the libraries: `sudo pkg install qt6 boost glslang libzip library/l ## All Done -You may now return to the **[root build guide](Build.md)**. \ No newline at end of file +You may now return to the **[root build guide](Build.md)**. From 9f423a24b82e1b3223d8d4204455e637f86271fa Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 18:42:28 +0200 Subject: [PATCH 27/52] [linux] fix aarch64 builds (again) + fix with slightly outdated qt (#2612) Fixes issues building on aarch64 linux with a slightly outdated system qt; also fixes linker selection process Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2612 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- CMakeLists.txt | 5 ++--- src/dynarmic/tests/CMakeLists.txt | 4 +++- src/yuzu/configuration/shared_widget.h | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ef3c0bef6e..f5d7126f92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -895,13 +895,13 @@ if (MSVC AND CXX_CLANG) endif() if (YUZU_USE_FASTER_LD) + # fallback if everything fails (bfd) + set(LINKER bfd) # clang should always use lld find_program(LLD lld) - if (LLD) set(LINKER lld) endif() - # GNU appears to work better with mold # TODO: mold has been slow lately, see if better options exist (search for gold?) if (CXX_GCC) @@ -910,7 +910,6 @@ if (YUZU_USE_FASTER_LD) set(LINKER mold) endif() endif() - message(NOTICE "Selecting ${LINKER} as linker") add_link_options("-fuse-ld=${LINKER}") endif() diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 4ace6c2afd..df90168a52 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -135,6 +135,8 @@ target_include_directories(dynarmic_tests PRIVATE . ../src) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -target_compile_options(dynarmic_tests PRIVATE -mavx2) +if ("x86_64" IN_LIST ARCHITECTURE) + target_compile_options(dynarmic_tests PRIVATE -mavx2) +endif() add_test(dynarmic_tests dynarmic_tests --durations yes) diff --git a/src/yuzu/configuration/shared_widget.h b/src/yuzu/configuration/shared_widget.h index 9e718098a3..dd5d5b7257 100644 --- a/src/yuzu/configuration/shared_widget.h +++ b/src/yuzu/configuration/shared_widget.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "qt_common/shared_translation.h" From 33f93ad003a47419d545a6bae018d3bb1c5ee3fb Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 18:42:51 +0200 Subject: [PATCH 28/52] [macos, qt] workaround upstream rendering bug (#2616) See https://bugreports.qt.io/browse/QTBUG-138942 Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2616 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- src/yuzu/Info.plist | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/yuzu/Info.plist b/src/yuzu/Info.plist index 96096c84d1..0c43c834d4 100644 --- a/src/yuzu/Info.plist +++ b/src/yuzu/Info.plist @@ -49,5 +49,7 @@ SPDX-License-Identifier: GPL-2.0-or-later NSApplication NSHighResolutionCapable True + UIDesignRequiresCompatibility + From 324ace3cd635bac4230a04c4a769bed236ed1e8d Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 18:43:13 +0200 Subject: [PATCH 29/52] [macos] associate .XCI/NSP file extensions (#2617) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2617 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- src/yuzu/Info.plist | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/yuzu/Info.plist b/src/yuzu/Info.plist index 0c43c834d4..773c4ee302 100644 --- a/src/yuzu/Info.plist +++ b/src/yuzu/Info.plist @@ -45,6 +45,26 @@ SPDX-License-Identifier: GPL-2.0-or-later NSHumanReadableCopyright + + LSApplicationCategoryType + public.app-category.games + CFBundleDocumentTypes + + + CFBundleTypeExtensions + + nsp + xci + nro + + CFBundleTypeName + Switch File + CFBundleTypeRole + Viewer + LSHandlerRank + Default + + NSPrincipalClass NSApplication NSHighResolutionCapable From dbad75ae706b83e583c6bbe39bd8ac1f2c9f44f1 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:54:53 +0000 Subject: [PATCH 30/52] [dynarmic] reduce opt pass latency Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 3 - .../backend/riscv64/a32_address_space.cpp | 14 +---- .../src/dynarmic/common/memory_pool.cpp | 13 ---- .../src/dynarmic/common/memory_pool.h | 61 ------------------- src/dynarmic/src/dynarmic/ir/basic_block.cpp | 7 +-- src/dynarmic/src/dynarmic/ir/basic_block.h | 3 - src/dynarmic/src/dynarmic/ir/ir_emitter.cpp | 21 ------- src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 8 +-- 8 files changed, 7 insertions(+), 123 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.cpp delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.h delete mode 100644 src/dynarmic/src/dynarmic/ir/ir_emitter.cpp diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 58efcac747..5333973164 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -58,8 +58,6 @@ add_library(dynarmic common/lut_from_list.h common/math_util.cpp common/math_util.h - common/memory_pool.cpp - common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h @@ -80,7 +78,6 @@ add_library(dynarmic ir/basic_block.cpp ir/basic_block.h ir/cond.h - ir/ir_emitter.cpp ir/ir_emitter.h ir/location_descriptor.cpp ir/location_descriptor.h diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index 8a21aafcf8..0bb9591411 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.cpp b/src/dynarmic/src/dynarmic/common/memory_pool.cpp deleted file mode 100644 index f41dd92af5..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/common/memory_pool.h" - -#include - -namespace Dynarmic::Common { - - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.h b/src/dynarmic/src/dynarmic/common/memory_pool.h deleted file mode 100644 index c99316e107..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -namespace Dynarmic::Common { - -/// @tparam object_size Byte-size of objects to construct -/// @tparam slab_size Number of objects to have per slab -template -class Pool { -public: - inline Pool() noexcept { - AllocateNewSlab(); - } - inline ~Pool() noexcept { - std::free(current_slab); - for (char* slab : slabs) { - std::free(slab); - } - } - - Pool(const Pool&) = delete; - Pool(Pool&&) = delete; - - Pool& operator=(const Pool&) = delete; - Pool& operator=(Pool&&) = delete; - - /// @brief Returns a pointer to an `object_size`-bytes block of memory. - [[nodiscard]] void* Alloc() noexcept { - if (remaining == 0) { - slabs.push_back(current_slab); - AllocateNewSlab(); - } - void* ret = static_cast(current_ptr); - current_ptr += object_size; - remaining--; - return ret; - } -private: - /// @brief Allocates a completely new memory slab. - /// Used when an entirely new slab is needed - /// due the current one running out of usable space. - void AllocateNewSlab() noexcept { - current_slab = static_cast(std::malloc(object_size * slab_size)); - current_ptr = current_slab; - remaining = slab_size; - } - - std::vector slabs; - char* current_slab = nullptr; - char* current_ptr = nullptr; - size_t remaining = 0; -}; - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index b00ab3cb20..a13a7ebfc9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -15,8 +15,6 @@ #include #include "dynarmic/common/assert.h" - -#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -27,8 +25,7 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL}, - instruction_alloc_pool{std::make_unique>()} + cond{Cond::AL} { } @@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); + IR::Inst* inst = new IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index e88dc92fc4..2f2d9ab6de 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -21,7 +21,6 @@ #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" #include "dynarmic/ir/dense_list.h" -#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -174,8 +173,6 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; - /// Memory pool for instruction list - std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp deleted file mode 100644 index a8ef7e2989..0000000000 --- a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/ir_emitter.h" - -#include - -#include "dynarmic/common/assert.h" -#include - -#include "dynarmic/ir/opcodes.h" - -namespace Dynarmic::IR { - - -} // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 844e29023c..e9175f0e6b 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -1491,9 +1491,9 @@ void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization: Optimization::DeadCodeElimination(block); } Optimization::IdentityRemovalPass(block); - //if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { + if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { Optimization::VerificationPass(block); - //} + } } void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { @@ -1511,9 +1511,9 @@ void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization: if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); } - //if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { + if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { Optimization::VerificationPass(block); - //} + } } } // namespace Dynarmic::Optimization From cca70732bf541f6cf5fa39e99b37c3d20bb46188 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 00:11:40 +0000 Subject: [PATCH 31/52] [dynarmic] use small vector experiment Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index bfacdcca52..66c75aead9 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -12,6 +12,7 @@ #include #include +#include "boost/container/small_vector.hpp" #include "dynarmic/common/common_types.h" #include #include @@ -83,7 +84,7 @@ public: void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial - std::vector values; //24 + boost::container::small_vector values; //24 // Block state uint16_t total_uses = 0; //8 //sometimes zeroed @@ -96,7 +97,7 @@ private: uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - alignas(16) uint8_t lru_counter = 0; //1 + uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From ac5c2ae64bccaedd779ec9094df18f67db692abf Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:07 +0000 Subject: [PATCH 32/52] [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 29eab7908b..919e75b77b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.push_back(inst); ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits::max)()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); + max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); } void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 66c75aead9..2bd274730a 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -78,7 +78,7 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } inline size_t GetMaxBitWidth() const noexcept { - return max_bit_width; + return 1 << max_bit_width; } void AddValue(IR::Inst* inst) noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; @@ -94,10 +94,10 @@ private: uint16_t is_being_used_count = 0; //8 uint16_t current_references = 0; //8 // Value state - uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 + uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6) + uint8_t lru_counter : 2 = 0; //1 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From 52052c337e69777a28eba3967082d64fdff19e24 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 09:10:18 +0000 Subject: [PATCH 33/52] [docs] fastmem draft Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md index c4f57996ba..32c516ddcd 100644 --- a/src/dynarmic/docs/FastMemory.md +++ b/src/dynarmic/docs/FastMemory.md @@ -4,7 +4,7 @@ The main way of accessing memory in JITed programs is via an invoked function, s The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput. -Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. +Many kernels however, do not support fast signal dispatching (Solaris, *BSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. ![Host to guest translation](./HostToGuest.svg) From 2b93847ee098479e0c0cd28f258069f495fa8813 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 10:16:22 +0000 Subject: [PATCH 34/52] [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 6 +- .../frontend/decoder/decoder_detail.h | 65 ++++++++----------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index d2035d0fe0..b74817a611 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,12 +210,8 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 438b7b691c..682b0cfbf6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,12 +269,8 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index bd76efda2a..4050033ef5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -70,11 +70,9 @@ struct detail { return std::make_tuple(mask, expect); } - /** - * Generates the masks and shifts for each argument. - * A '-' in a bitstring indicates that we don't care about that value. - * An argument is specified by a continuous string of the same character. - */ + /// @brief Generates the masks and shifts for each argument. + /// A '-' in a bitstring indicates that we don't care about that value. + /// An argument is specified by a continuous string of the same character. template static consteval auto GetArgInfo(std::array bitstring) { std::array masks = {}; @@ -98,7 +96,6 @@ struct detail { if constexpr (N > 0) { const size_t bit_position = opcode_bitsize - i - 1; - if (arg_index >= N) throw std::out_of_range("Unexpected field"); @@ -109,20 +106,16 @@ struct detail { } } } - #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) // Avoids a MSVC ICE, and avoids Android NDK issue. ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); #endif - return std::make_tuple(masks, shifts); } - /** - * This struct's Make member function generates a lambda which decodes an instruction based on - * the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a - * template argument. - */ + /// @brief This struct's Make member function generates a lambda which decodes an instruction + /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is + /// provided as a template argument. template struct VisitorCaller; @@ -130,36 +123,36 @@ struct detail { # pragma warning(push) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) #endif - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...), + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...), const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...) const, + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...) const, const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; @@ -167,22 +160,16 @@ struct detail { # pragma warning(pop) #endif - /** - * Creates a matcher that can match and parse instructions based on bitstring. - * See also: GetMaskAndExpect and GetArgInfo for format of bitstring. - */ - template - static auto GetMatcher(FnT fn, const char* const name) { - constexpr size_t args_count = mcl::parameter_count_v; - + /// @brief Creates a matcher that can match and parse instructions based on bitstring. + /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. + template + static constexpr auto GetMatcher(F fn, const char* const name) { + constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); - - using Iota = std::make_index_sequence; - - const auto proxy_fn = VisitorCaller::Make(Iota(), fn, arg_masks, arg_shifts); + const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); return MatcherT(name, mask, expect, proxy_fn); } }; From 8229f89184e83951b413abcd56eefaae6ef85abc Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 4 Sep 2025 07:24:49 +0000 Subject: [PATCH 35/52] [dynarmic] reduce matcher table noise and cache misses Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/a32_types.h | 22 +++++---------- .../src/dynarmic/frontend/A32/decoder/asimd.h | 27 +++++++++---------- .../src/dynarmic/frontend/A32/decoder/vfp.h | 12 +++------ .../src/dynarmic/frontend/A64/decoder/a64.h | 23 +++++++--------- .../frontend/decoder/decoder_detail.h | 6 ++--- .../src/dynarmic/frontend/decoder/matcher.h | 11 ++------ 6 files changed, 38 insertions(+), 63 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index 3f4501a528..2a0cc25751 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -9,12 +9,9 @@ #pragma once #include -#include - #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/ir/cond.h" @@ -89,24 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) { inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); - return static_cast(reg); + return size_t(reg); } inline size_t RegNumber(ExtReg reg) { if (IsSingleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::S0); + return size_t(reg) - size_t(ExtReg::S0); + } else if (IsDoubleExtReg(reg)) { + return size_t(reg) - size_t(ExtReg::D0); } - - if (IsDoubleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::D0); - } - - if (IsQuadExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::Q0); - } - - ASSERT_MSG(false, "Invalid extended register"); - return 0; + ASSERT(IsQuadExtReg(reg)); + return size_t(reg) - size_t(ExtReg::Q0); } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f2e206695b..f63816d2b1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher; template std::vector> GetASIMDDecodeTable() { - std::vector> table = { - -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> table = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" #undef INST - }; - // Exceptions to the rule of thumb. const std::set comes_first{ "VBIC, VMOV, VMVN, VORR (immediate)", @@ -53,19 +50,21 @@ std::vector> GetASIMDDecodeTable() { "VQDMULH (scalar)", "VQRDMULH (scalar)", }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_first.count(matcher.GetName()) > 0; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_last.count(matcher.GetName()) == 0; + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - - return table; + std::vector> final_table; + std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + return e.second; + }); + return final_table; } template diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index 5fcacd2bda..a969570547 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -30,22 +30,18 @@ std::optional>> DecodeVFP(u32 instruc static const struct Tables { Table unconditional; Table conditional; - } tables = [] { + } tables = []() { Table list = { - #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./vfp.inc" #undef INST - }; - - const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return (matcher.GetMask() & 0xF0000000) == 0xF0000000; }); - return Tables{ - Table{list.begin(), division}, - Table{division, list.end()}, + Table{list.begin(), it}, + Table{it, list.end()}, }; }(); diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c651dd7cde..c7bc981ac6 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) { template constexpr DecodeTable GetDecodeTable() { - std::vector> list = { -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" #undef INST }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { return std::set{ "MOVI, MVNI, ORR, BIC (vector, immediate)", "FMOV (vector, immediate)", "Unallocated SIMD modified immediate", - }.count(matcher.GetName()) > 0; + }.count(e.first) > 0; }); - DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); + for (auto const& e : list) { + const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e.second); } } } diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 4050033ef5..0d9da6169c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -163,18 +163,18 @@ struct detail { /// @brief Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template - static constexpr auto GetMatcher(F fn, const char* const name) { + static constexpr auto GetMatcher(F fn) { constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); - return MatcherT(name, mask, expect, proxy_fn); + return MatcherT(mask, expect, proxy_fn); } }; -#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn, name) +#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn) } // namespace detail } // namespace Dynarmic::Decoder diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 7e5c9c5a8f..5a2afe57c5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -31,14 +31,8 @@ public: using visitor_type = Visitor; using handler_return_type = typename Visitor::instruction_return_type; using handler_function = std::function; - - Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) - : name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {} - - /// Gets the name of this type of instruction. - const char* GetName() const { - return name; - } + Matcher(opcode_type mask, opcode_type expected, handler_function func) + : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. opcode_type GetMask() const { @@ -70,7 +64,6 @@ public: } private: - const char* name; opcode_type mask; opcode_type expected; handler_function fn; From 9d2c85caea45cf540f15540060a37f12f4e5bc8e Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 20:59:23 +0000 Subject: [PATCH 36/52] [dynarmic] fix tests Signed-off-by: lizzie --- .../src/dynarmic/frontend/decoder/matcher.h | 40 ++++++++----------- src/dynarmic/tests/decoder_tests.cpp | 4 +- src/dynarmic/tests/print_info.cpp | 22 +++++----- 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 5a2afe57c5..f7e2884e0c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -14,16 +14,12 @@ namespace Dynarmic::Decoder { -/** - * Generic instruction handling construct. - * - * @tparam Visitor An arbitrary visitor type that will be passed through - * to the function being handled. This type must be the - * type of the first parameter in a handler function. - * - * @tparam OpcodeType Type representing an opcode. This must be the - * type of the second parameter in a handler function. - */ +/// Generic instruction handling construct. +/// @tparam Visitor An arbitrary visitor type that will be passed through +/// to the function being handled. This type must be the +/// type of the first parameter in a handler function. +/// @tparam OpcodeType Type representing an opcode. This must be the +/// type of the second parameter in a handler function. template class Matcher { public: @@ -35,30 +31,26 @@ public: : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. - opcode_type GetMask() const { + inline opcode_type GetMask() const noexcept { return mask; } /// Gets the expected value after masking for this instruction. - opcode_type GetExpected() const { + inline opcode_type GetExpected() const noexcept { return expected; } - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - bool Matches(opcode_type instruction) const { + /// Tests to see if the given instruction is the instruction this matcher represents. + /// @param instruction The instruction to test + /// @returns true if the given instruction matches. + inline bool Matches(opcode_type instruction) const noexcept { return (instruction & mask) == expected; } - /** - * Calls the corresponding instruction handler on visitor for this type of instruction. - * @param v The visitor to use - * @param instruction The instruction to decode. - */ - handler_return_type call(Visitor& v, opcode_type instruction) const { + /// Calls the corresponding instruction handler on visitor for this type of instruction. + /// @param v The visitor to use + /// @param instruction The instruction to decode. + inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept { ASSERT(Matches(instruction)); return fn(v, instruction); } diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index cdd2c70cd9..c09df406f2 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,6 +20,7 @@ using namespace Dynarmic; +/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -67,4 +68,5 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { x = ((x | mask) + 1) & ~mask; } while (x != 0); } -} \ No newline at end of file +} +*/ diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 3d1268f467..0c89e780ad 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -39,19 +39,21 @@ using namespace Dynarmic; -std::string_view GetNameOfA32Instruction(u32 instruction) { - //if (auto const vfp_decoder = A32::DecodeVFP(instruction)) - // return *A32::GetNameVFP(instruction); - //else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) - // return *A32::GetNameASIMD(instruction); - //else if (auto const decoder = A32::DecodeArm(instruction)) - // return *A32::GetNameARM(instruction); +const char* GetNameOfA32Instruction(u32 instruction) { + /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { + return vfp_decoder->get().GetName(); + } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { + return asimd_decoder->get().GetName(); + } else if (auto decoder = A32::DecodeArm(instruction)) { + return decoder->get().GetName(); + }*/ return ""; } -std::string_view GetNameOfA64Instruction(u32 instruction) { - //if (auto const decoder = A64::Decode(instruction)) - // return *A64::GetName(instruction); +const char* GetNameOfA64Instruction(u32 instruction) { + /*if (auto decoder = A64::Decode(instruction)) { + return decoder->get().GetName(); + }*/ return ""; } From bd6b1fe471d8be1409dcf70963f0982fdf3b6d62 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:07:18 +0000 Subject: [PATCH 37/52] [dynarmic] fix ASIMD execution Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f63816d2b1..2cea7a14c1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -61,7 +61,7 @@ std::vector> GetASIMDDecodeTable() { return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); std::vector> final_table; - std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) { return e.second; }); return final_table; From 666e89b871701d0b0533b3327af175b445593067 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:40:13 +0000 Subject: [PATCH 38/52] [dynarmic] add back encoding names (for print_info) Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/decoder/arm.h | 20 +++++++++++--- .../src/dynarmic/frontend/A32/decoder/asimd.h | 26 ++++++++++++++----- .../dynarmic/frontend/A32/decoder/thumb16.h | 24 ++++++++++++----- .../dynarmic/frontend/A32/decoder/thumb32.h | 24 ++++++++++++----- .../src/dynarmic/frontend/A32/decoder/vfp.h | 24 ++++++++++++----- .../A32/translate/translate_thumb.cpp | 2 +- .../src/dynarmic/frontend/A64/decoder/a64.h | 20 +++++++++++--- src/dynarmic/tests/decoder_tests.cpp | 6 ++--- src/dynarmic/tests/print_info.cpp | 22 +++++++--------- 9 files changed, 115 insertions(+), 53 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index 0257c28ddb..c6f034ae21 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -30,13 +30,13 @@ template using ArmDecodeTable = std::array>, 0x1000>; namespace detail { -inline size_t ToFastLookupIndexArm(u32 instruction) { +inline size_t ToFastLookupIndexArm(u32 instruction) noexcept { return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0); } } // namespace detail template -constexpr ArmDecodeTable GetArmDecodeTable() { +constexpr ArmDecodeTable GetArmDecodeTable() noexcept { std::vector> list = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" @@ -62,15 +62,27 @@ constexpr ArmDecodeTable GetArmDecodeTable() { } template -std::optional>> DecodeArm(u32 instruction) { +std::optional>> DecodeArm(u32 instruction) noexcept { alignas(64) static const auto table = GetArmDecodeTable(); const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameARM(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./arm.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index 2cea7a14c1..57e1392871 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -26,7 +26,7 @@ template using ASIMDMatcher = Decoder::Matcher; template -std::vector> GetASIMDDecodeTable() { +std::vector> GetASIMDDecodeTable() noexcept { std::vector>> table = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" @@ -68,13 +68,25 @@ std::vector> GetASIMDDecodeTable() { } template -std::optional>> DecodeASIMD(u32 instruction) { - static const auto table = GetASIMDDecodeTable(); - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); +std::optional>> DecodeASIMD(u32 instruction) noexcept { + alignas(64) static const auto table = GetASIMDDecodeTable(); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameASIMD(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./asimd.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h index 8073ee5d47..16b99ba5aa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h @@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher; template std::optional>> DecodeThumb16(u16 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #include "./thumb16.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb16(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) }, +#include "./thumb16.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h index 86a4d767a7..19418de67c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h @@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher; template std::optional>> DecodeThumb32(u32 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./thumb32.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb32(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./thumb32.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index a969570547..a346304a9a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -26,8 +26,7 @@ using VFPMatcher = Decoder::Matcher; template std::optional>> DecodeVFP(u32 instruction) { using Table = std::vector>; - - static const struct Tables { + alignas(64) static const struct Tables { Table unconditional; Table conditional; } tables = []() { @@ -44,14 +43,25 @@ std::optional>> DecodeVFP(u32 instruc Table{it, list.end()}, }; }(); - const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000; const Table& table = is_unconditional ? tables.unconditional : tables.conditional; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameVFP(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./vfp.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp index 5bb516ccfd..0381c984cc 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp @@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) { return 0xF7F0A000; // UDF } -bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) { +inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept { return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c7bc981ac6..7e6cdc3935 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -71,12 +71,24 @@ constexpr DecodeTable GetDecodeTable() { template std::optional>> Decode(u32 instruction) { alignas(64) static const auto table = GetDecodeTable(); - const auto matches_instruction = [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }; const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; - auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); + auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetName(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./a64.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A64 diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index c09df406f2..4ad9d90833 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,7 +20,6 @@ using namespace Dynarmic; -/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -58,7 +57,7 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction); INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect); INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x); - //INFO("Name: " << *A32::GetNameASIMD(instruction)); + INFO("Name: " << *A32::GetNameASIMD(instruction)); INFO("iserr: " << iserr); //INFO("alternative: " << alternative->GetName()); INFO("altiserr: " << altiserr); @@ -68,5 +67,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { x = ((x | mask) + 1) & ~mask; } while (x != 0); } -} -*/ +} \ No newline at end of file diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 0c89e780ad..8936f32bd3 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -39,21 +39,19 @@ using namespace Dynarmic; -const char* GetNameOfA32Instruction(u32 instruction) { - /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { - return vfp_decoder->get().GetName(); - } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { - return asimd_decoder->get().GetName(); - } else if (auto decoder = A32::DecodeArm(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA32Instruction(u32 instruction) { + if (auto const vfp_decoder = A32::DecodeVFP(instruction)) + return *A32::GetNameVFP(instruction); + else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) + return *A32::GetNameASIMD(instruction); + else if (auto const decoder = A32::DecodeArm(instruction)) + return *A32::GetNameARM(instruction); return ""; } -const char* GetNameOfA64Instruction(u32 instruction) { - /*if (auto decoder = A64::Decode(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA64Instruction(u32 instruction) { + if (auto const decoder = A64::Decode(instruction)) + return *A64::GetName(instruction); return ""; } From 5ade202fa894aea7de4e4381d02fd166720f02c2 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 14:08:17 +0000 Subject: [PATCH 39/52] [dynarmic] use ARCHITECTURE_ macros instead of MCL ones Signed-off-by: lizzie --- .../src/dynarmic/backend/exception_handler.h | 18 +++++++------- .../backend/exception_handler_posix.cpp | 24 +++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler.h b/src/dynarmic/src/dynarmic/backend/exception_handler.h index 173949628c..cd274b111f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler.h +++ b/src/dynarmic/src/dynarmic/backend/exception_handler.h @@ -15,15 +15,15 @@ #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) namespace Dynarmic::Backend::X64 { class BlockOfCode; } // namespace Dynarmic::Backend::X64 -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) namespace oaknut { class CodeBlock; } // namespace oaknut -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) namespace Dynarmic::Backend::RV64 { class CodeBlock; } // namespace Dynarmic::Backend::RV64 @@ -33,16 +33,16 @@ class CodeBlock; namespace Dynarmic::Backend { -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) struct FakeCall { u64 call_rip; u64 ret_rip; }; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) struct FakeCall { u64 call_pc; }; -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) struct FakeCall { }; #else @@ -54,11 +54,11 @@ public: ExceptionHandler(); ~ExceptionHandler(); -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void Register(X64::BlockOfCode& code); -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void Register(oaknut::CodeBlock& mem, std::size_t mem_size); -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void Register(RV64::CodeBlock& mem, std::size_t mem_size); #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index f1f208179f..c95cbbb74e 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -7,10 +7,7 @@ */ #include -#include -#include #include -#include #include #include #include "dynarmic/backend/exception_handler.h" @@ -118,9 +115,11 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { CTX_DECLARE(raw_context); #if defined(ARCHITECTURE_x86_64) { - std::shared_lock guard(sig_handler->code_block_infos_mutex); - if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->second.cb(CTX_RIP); + std::lock_guard guard(sig_handler->code_block_infos_mutex); + + const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); + if (iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->cb(CTX_RIP); CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; @@ -130,9 +129,10 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) { - std::shared_lock guard(sig_handler->code_block_infos_mutex); - if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->second.cb(CTX_PC); + std::lock_guard guard(sig_handler->code_block_infos_mutex); + const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); + if (iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->cb(CTX_PC); CTX_PC = fc.call_pc; return; } @@ -187,15 +187,15 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { impl = std::make_unique(mcl::bit_cast(code.getCode()), code.GetTotalCodeSize()); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } From 7d00944f8934fe6366cc452e890e235fa423b8be Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 03:36:35 +0000 Subject: [PATCH 40/52] [dynarmic, docs] fastmem docs Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md index 32c516ddcd..c4f57996ba 100644 --- a/src/dynarmic/docs/FastMemory.md +++ b/src/dynarmic/docs/FastMemory.md @@ -4,7 +4,7 @@ The main way of accessing memory in JITed programs is via an invoked function, s The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput. -Many kernels however, do not support fast signal dispatching (Solaris, *BSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. +Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. ![Host to guest translation](./HostToGuest.svg) From 5022a463f7968737efd08b5920b5f79fbe0f2fda Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:29 +0000 Subject: [PATCH 41/52] [dynarmic] remove use of mcl reverse iterator Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/abi.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1e673338a8..1691bbb3b7 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -764,7 +764,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { target_code_ptr = code.GetReturnFromRunCodeAddress(); } const CodePtr patch_location = code.getCurr(); - code.mov(code.rcx, reinterpret_cast(target_code_ptr)); + code.mov(code.rcx, u64(target_code_ptr)); code.EnsurePatchLocationSize(patch_location, 10); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp index a9bbab3d10..299bf1d1d6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp @@ -10,7 +10,6 @@ #include -#include #include "dynarmic/common/common_types.h" #include @@ -76,7 +75,8 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size); size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE); - for (auto const xmm : mcl::iterator::reverse(regs)) { + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const xmm = *it; if (HostLocIsXMM(xmm)) { xmm_offset -= XMM_SIZE; if (code.HasHostFeature(HostFeature::AVX)) { @@ -88,9 +88,11 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, } if (frame_info.stack_subtraction != 0) code.add(rsp, u32(frame_info.stack_subtraction)); - for (auto const gpr : mcl::iterator::reverse(regs)) + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const gpr = *it; if (HostLocIsGPR(gpr)) code.pop(HostLocToReg64(gpr)); + } } void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) { From 15da08c78a435eb0ebdc63afb3bd444ee34b7d65 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:41 +0000 Subject: [PATCH 42/52] [dynarmic] checked code alignment Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_interface.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index c65b582982..33acc46dfa 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -80,16 +80,16 @@ public: }; // TODO: Check code alignment - - const CodePtr current_code_ptr = [this] { + const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15)); + const CodePtr current_code_ptr = [this, aligned_code_ptr] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { jit_state.rsb_ptr = new_rsb_ptr; - return reinterpret_cast(jit_state.rsb_codeptrs[new_rsb_ptr]); + return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]); } - - return GetCurrentBlock(); + return aligned_code_ptr; + //return GetCurrentBlock(); }(); const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr); From e71e91343274e4e60024c78eb3ea4a4c02d3399e Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:31:12 +0000 Subject: [PATCH 43/52] [dynarmic] fix hardcoded AVX512 registers, use xmm0 instead of xmm16 to align with spec Signed-off-by: lizzie --- .../backend/x64/emit_x64_floating_point.cpp | 7 ++----- .../src/dynarmic/backend/x64/emit_x64_vector.cpp | 16 ++++++++-------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 47e51acb03..c7d1f8aa1c 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -92,13 +92,10 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t FpFixup::Norm_Src, FpFixup::Norm_Src, FpFixup::Norm_Src); - - const Xbyak::Xmm tmp = xmm16; + const Xbyak::Xmm tmp = xmm0; FCODE(vmovap)(tmp, code.BConst(xword, denormal_to_zero)); - - for (const Xbyak::Xmm& xmm : to_daz) { + for (const Xbyak::Xmm& xmm : to_daz) FCODE(vfixupimms)(xmm, xmm, tmp, u8(0)); - } return; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index 99000c2a57..c6b0e3b864 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vpxorq(right_shift, right_shift, right_shift); @@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm c = xmm16; + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); code.vpsraq(c, a, 32); code.vpsllq(a, a, 32); code.vpsraq(a, a, 32); @@ -5461,7 +5461,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm masked = xmm16; + const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(); code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); From 83606e83c9664b563baf6720650d737e835d9978 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 10:33:12 +0000 Subject: [PATCH 44/52] [dynarmic] unconditional branches always take Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp | 4 +--- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 7 ++++--- .../dynarmic/frontend/A64/translate/impl/a64_branch.cpp | 9 ++++++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index d5d5f089ff..31b10ec6d5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); - lock(); - or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); + lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); SwitchMxcsrOnEntry(); jmp(ABI_PARAM2); @@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function rcp) { } xor_(eax, eax); - lock(); xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 919e75b77b..502a093d08 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -9,6 +9,7 @@ #include "dynarmic/backend/x64/reg_alloc.h" #include +#include #include #include @@ -152,19 +153,19 @@ bool Argument::GetImmediateU1() const noexcept { u8 Argument::GetImmediateU8() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100); + ASSERT(imm <= u64(std::numeric_limits::max())); return u8(imm); } u16 Argument::GetImmediateU16() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x10000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u16(imm); } u32 Argument::GetImmediateU32() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100000000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u32(imm); } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp index faf0686231..1bb0be823a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp @@ -20,9 +20,12 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) { bool TranslatorVisitor::B_uncond(Imm<26> imm26) { const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend(); const u64 target = ir.PC() + offset; - - //ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); - ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + // Pattern to halt execution (B .) + if (target == ir.PC()) { + ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + return false; + } + ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); return false; } From 9e15e31dd3d3c5261c24216c10633ad833c0c06b Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 14:46:35 +0000 Subject: [PATCH 45/52] [dynarmic] regalloc use scratchimpl that uses all instead of iteraiting Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 4 +-- .../src/dynarmic/backend/x64/reg_alloc.cpp | 30 ++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1691bbb3b7..fa1f071fbf 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { auto const opcode = inst.GetOpcode(); // Call the relevant Emit* member function. switch (opcode) { -#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch; +#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch; #define A32OPC(name, type, ...) -#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch; +#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch; #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 502a093d08..2db817a90f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -367,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def, if (result_def) { DefineValueImpl(result_def, ABI_RETURN); } - + for (size_t i = 0; i < args.size(); i++) { + if (args[i]) { + UseScratch(*args[i], args_hostloc[i]); + } else { + ScratchGpr(args_hostloc[i]); // TODO: Force spill + } + } + // Must match with with ScratchImpl + for (auto const gpr : other_caller_save) { + MoveOutOfTheWay(gpr); + LocInfo(gpr).WriteLock(); + } for (size_t i = 0; i < args.size(); i++) { if (args[i] && !args[i]->get().IsVoid()) { - UseScratch(*args[i], args_hostloc[i]); // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]); switch (args[i]->get().GetType()) { @@ -390,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def, } } } - - for (size_t i = 0; i < args.size(); i++) - if (!args[i]) { - // TODO: Force spill - ScratchGpr(args_hostloc[i]); - } - for (auto const caller_saved : other_caller_save) - ScratchImpl({caller_saved}); } void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { @@ -560,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept { } HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { -#if 0 // TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows // but it's fine anyways. We can find other ways to cheat it later - but which?!?! // we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end? // TODO(lizzie): This needs to be investigated further later. // Do not spill XMM into other XMM silly - if (!is_xmm) { + /*if (!is_xmm) { // TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY // Intel recommends to spill GPR onto XMM registers IF POSSIBLE // TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call? @@ -574,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) return loc; - } -#endif + }*/ + // TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits + // of spilling on XMM versus the potential cost of using XMM registers..... // Otherwise go to stack spilling for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) From 047ad2ace4f2a8bd5463e30672b0cedc8e85f180 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 18:29:39 +0000 Subject: [PATCH 46/52] [dynarmic] use better boost::visitor Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 1 - .../src/dynarmic/backend/x64/a32_emit_x64.cpp | 1 - .../src/dynarmic/backend/x64/emit_x64.cpp | 6 ++-- .../src/dynarmic/common/variant_util.h | 29 ------------------- 4 files changed, 3 insertions(+), 34 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/variant_util.h diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 5333973164..34cbc308be 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -63,7 +63,6 @@ add_library(dynarmic common/string_util.h common/u128.cpp common/u128.h - common/variant_util.h frontend/A32/a32_types.cpp frontend/A32/a32_types.h frontend/A64/a64_types.cpp diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index fb306336cf..3186758380 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -28,7 +28,6 @@ #include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 3bc93e6fd5..0bb2604025 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -11,6 +11,7 @@ #include #include "dynarmic/common/assert.h" +#include #include #include #include "dynarmic/common/common_types.h" @@ -21,7 +22,6 @@ #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/verbose_debugging_output.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de } void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - Common::VisitVariant(terminal, [this, initial_location, is_single_step](auto x) { + boost::apply_visitor([this, initial_location, is_single_step](auto x) { using T = std::decay_t; if constexpr (!std::is_same_v) { this->EmitTerminalImpl(x, initial_location, is_single_step); } else { ASSERT_MSG(false, "Invalid terminal"); } - }); + }, terminal); } void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { diff --git a/src/dynarmic/src/dynarmic/common/variant_util.h b/src/dynarmic/src/dynarmic/common/variant_util.h deleted file mode 100644 index 4dd7f67167..0000000000 --- a/src/dynarmic/src/dynarmic/common/variant_util.h +++ /dev/null @@ -1,29 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include - -namespace Dynarmic::Common { -namespace detail { - -template -struct VariantVisitor : boost::static_visitor - , Lambda { - VariantVisitor(Lambda&& lambda) - : Lambda(std::move(lambda)) {} - - using Lambda::operator(); -}; - -} // namespace detail - -template -inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) { - return boost::apply_visitor(detail::VariantVisitor(std::move(lambda)), variant); -} - -} // namespace Dynarmic::Common From e5bff0215628ea87ea375180ebaf780fd4e7da5f Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 16 Sep 2025 20:20:21 +0000 Subject: [PATCH 47/52] [dynarmic] fix exception posix handler Signed-off-by: lizzie --- .../backend/exception_handler_posix.cpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index c95cbbb74e..f1f208179f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -7,7 +7,10 @@ */ #include +#include +#include #include +#include #include #include #include "dynarmic/backend/exception_handler.h" @@ -115,11 +118,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { CTX_DECLARE(raw_context); #if defined(ARCHITECTURE_x86_64) { - std::lock_guard guard(sig_handler->code_block_infos_mutex); - - const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); - if (iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->cb(CTX_RIP); + std::shared_lock guard(sig_handler->code_block_infos_mutex); + if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->second.cb(CTX_RIP); CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; @@ -129,10 +130,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) { - std::lock_guard guard(sig_handler->code_block_infos_mutex); - const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); - if (iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->cb(CTX_PC); + std::shared_lock guard(sig_handler->code_block_infos_mutex); + if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->second.cb(CTX_PC); CTX_PC = fc.call_pc; return; } @@ -187,15 +187,15 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(ARCHITECTURE_x86_64) +#if defined(MCL_ARCHITECTURE_X86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { impl = std::make_unique(mcl::bit_cast(code.getCode()), code.GetTotalCodeSize()); } -#elif defined(ARCHITECTURE_arm64) +#elif defined(MCL_ARCHITECTURE_ARM64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } -#elif defined(ARCHITECTURE_riscv64) +#elif defined(MCL_ARCHITECTURE_RISCV) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } From 119a373bd7b8678e19f85c1aeb062e8ab3e76690 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 17 Sep 2025 20:51:37 +0000 Subject: [PATCH 48/52] [dynarmic] Allow to skip verification pass Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/interface/optimization_flags.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dynarmic/src/dynarmic/interface/optimization_flags.h b/src/dynarmic/src/dynarmic/interface/optimization_flags.h index 743d902767..2e8822b27a 100644 --- a/src/dynarmic/src/dynarmic/interface/optimization_flags.h +++ b/src/dynarmic/src/dynarmic/interface/optimization_flags.h @@ -34,6 +34,8 @@ enum class OptimizationFlag : std::uint32_t { MiscIROpt = 0x00000020, /// Optimize for code speed rather than for code size (this serves well for tight loops) CodeSpeed = 0x00000040, + /// Disable verification passes + DisableVerification = 0x00000080, /// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations. /// This unfuses fused instructions to improve performance on host CPUs without FMA support. From 202caab3dea2881ebc0f41140f79a2f3462875b1 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 17 Sep 2025 21:27:23 +0000 Subject: [PATCH 49/52] [dynarmic] inlined pool in block + slab-like for each block Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/ir/basic_block.cpp | 16 +++++++++++++++- src/dynarmic/src/dynarmic/ir/basic_block.h | 8 ++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index a13a7ebfc9..612a3d28e9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -37,7 +37,21 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new IR::Inst(opcode); + // First try using the "inline" buffer, otherwise fallback to a slower slab-like allocation scheme + // purpouse is to avoid many calls to new/delete which invoke malloc which invokes mmap + // just pool it!!! - reason why there is an inline buffer is because many small blocks are created + // with few instructions due to subpar optimisations on other passes... plus branch-heavy code will + // hugely benefit from the coherency of faster allocations... + IR::Inst* inst; + if (inlined_inst.size() < inlined_inst.max_size()) { + inst = &inlined_inst[inlined_inst.size()]; + inlined_inst.emplace_back(opcode); + } else { + if (pooled_inst.empty() || pooled_inst.back().size() == pooled_inst.back().max_size()) + pooled_inst.emplace_back(); + inst = &pooled_inst.back()[pooled_inst.back().size()]; + pooled_inst.back().emplace_back(opcode); + } DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index 2f2d9ab6de..166a5e4d1b 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -13,6 +13,9 @@ #include #include +#include +#include +#include #include #include "dynarmic/common/common_types.h" @@ -163,8 +166,12 @@ public: return cycle_count; } private: + /// "Hot cache" for small blocks so we don't call global allocator + boost::container::static_vector inlined_inst; /// List of instructions in this block. instruction_list_type instructions; + /// "Long/far" memory pool + boost::container::stable_vector> pooled_inst; /// Block to execute next if `cond` did not pass. std::optional cond_failed = {}; /// Description of the starting location of this block @@ -180,6 +187,7 @@ private: /// Number of cycles this block takes to execute. size_t cycle_count = 0; }; +static_assert(sizeof(Block) == 2048); /// Returns a string representation of the contents of block. Intended for debugging. std::string DumpBlock(const IR::Block& block) noexcept; From c57ea19c83d57576108bd6e1f6887396719b83b7 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 20 Sep 2025 19:14:05 +0000 Subject: [PATCH 50/52] [dynarmic] increase cache code size Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 1 + src/core/arm/dynarmic/arm_dynarmic_64.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index b74817a611..b77e4f613d 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -211,6 +211,7 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.enable_cycle_counting = !m_uses_wall_clock; // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB + // Solaris doesn't support kPageSize >= 512MiB config.code_cache_size = std::uint32_t(128_MiB); // Allow memory fault handling to work diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 682b0cfbf6..eb328b919d 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -270,6 +270,7 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.enable_cycle_counting = !m_uses_wall_clock; // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB + // Solaris doesn't support kPageSize >= 512MiB config.code_cache_size = std::uint32_t(128_MiB); // Allow memory fault handling to work From 7233fce90bf53ce617c5d15d90edae7f13f8faa2 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 23 Sep 2025 01:15:51 +0000 Subject: [PATCH 51/52] Fix license headers Signed-off-by: lizzie --- .../src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp | 3 +++ src/dynarmic/src/dynarmic/interface/optimization_flags.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp index 1bb0be823a..41dd8cb4d4 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/interface/optimization_flags.h b/src/dynarmic/src/dynarmic/interface/optimization_flags.h index 2e8822b27a..9e58197b47 100644 --- a/src/dynarmic/src/dynarmic/interface/optimization_flags.h +++ b/src/dynarmic/src/dynarmic/interface/optimization_flags.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2020 MerryMage * SPDX-License-Identifier: 0BSD From 8a84c4cfba375bb8d306c54f32145bb674469cee Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 23 Sep 2025 03:18:07 +0000 Subject: [PATCH 52/52] [dynarmic] enforce higher constraints Signed-off-by: lizzie --- .../backend/x64/emit_x64_memory.cpp.inc | 93 +++++++++---------- 1 file changed, 43 insertions(+), 50 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 34f77b0446..28699fa33b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -273,34 +273,31 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); - if constexpr (bitsize != 128) { - ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]); - } else { + if constexpr (bitsize == 128) { ctx.reg_alloc.Use(args[1], ABI_PARAM2); ctx.reg_alloc.Use(args[2], HostLoc::XMM1); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(inst); + } else { + ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]); } + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); Xbyak::Label end; - code.mov(code.ABI_RETURN, u32(1)); - code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); + code.test(tmp.cvt8(), tmp.cvt8()); code.je(end); - code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.xor_(tmp.cvt32(), tmp.cvt32()); + code.xchg(tmp.cvt8(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); + code.mov(code.ABI_PARAM1, u64(&conf)); if constexpr (bitsize != 128) { using T = mcl::unsigned_integer_of_size; - - code.CallLambda( - [](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](T expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) - ? 0 - : 1; - }); + code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, [&](T expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) ? 0 : 1; + }); if (ordered) { code.mfence(); } @@ -308,15 +305,11 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.movaps(xword[code.ABI_PARAM3], xmm1); - code.CallLambda( - [](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](Vector expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) - ? 0 - : 1; - }); + code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, [&](Vector expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) ? 0 : 1; + }); if (ordered) { code.mfence(); } @@ -437,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i SharedLabel end = GenSharedLabel(); - code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.mov(status, u32(1)); - code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); + code.test(tmp.cvt8(), tmp.cvt8()); code.je(*end, code.T_NEAR); + code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.cmp(qword[tmp], vaddr); code.jne(*end, code.T_NEAR); @@ -474,30 +468,29 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const auto location = code.getCurr(); - if constexpr (bitsize == 128) { + switch (bitsize) { + case 8: + code.lock(); + code.cmpxchg(code.byte[dest_ptr], value.cvt8()); + break; + case 16: + code.lock(); + code.cmpxchg(word[dest_ptr], value.cvt16()); + break; + case 32: + code.lock(); + code.cmpxchg(dword[dest_ptr], value.cvt32()); + break; + case 64: + code.lock(); + code.cmpxchg(qword[dest_ptr], value.cvt64()); + break; + case 128: code.lock(); code.cmpxchg16b(ptr[dest_ptr]); - } else { - switch (bitsize) { - case 8: - code.lock(); - code.cmpxchg(code.byte[dest_ptr], value.cvt8()); - break; - case 16: - code.lock(); - code.cmpxchg(word[dest_ptr], value.cvt16()); - break; - case 32: - code.lock(); - code.cmpxchg(dword[dest_ptr], value.cvt32()); - break; - case 64: - code.lock(); - code.cmpxchg(qword[dest_ptr], value.cvt64()); - break; - default: - UNREACHABLE(); - } + break; + default: + UNREACHABLE(); } code.setnz(status.cvt8());