From 211fefde20d4acbc8aafebbfc6b7f2fd434b88ff Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:54:53 +0000 Subject: [PATCH 01/32] [dynarmic] reduce opt pass latency Signed-off-by: lizzie --- src/dynarmic/CMakeLists.txt | 3 - src/dynarmic/docs/RegisterAllocator.md | 40 +- .../docs/ReturnStackBufferOptimization.md | 162 +- src/dynarmic/src/dynarmic/CMakeLists.txt | 199 +-- .../backend/arm64/a32_address_space.cpp | 18 +- .../backend/arm64/a64_address_space.cpp | 19 +- .../backend/riscv64/a32_address_space.cpp | 16 +- .../dynarmic/backend/x64/a32_interface.cpp | 16 +- .../dynarmic/backend/x64/a64_interface.cpp | 18 +- .../src/dynarmic/common/memory_pool.cpp | 13 - .../src/dynarmic/common/memory_pool.h | 61 - src/dynarmic/src/dynarmic/ir/basic_block.cpp | 7 +- src/dynarmic/src/dynarmic/ir/basic_block.h | 3 - src/dynarmic/src/dynarmic/ir/ir_emitter.cpp | 21 - .../ir/opt/a32_constant_memory_reads_pass.cpp | 70 - .../ir/opt/a32_get_set_elimination_pass.cpp | 382 ----- .../ir/opt/a64_callback_config_pass.cpp | 57 - .../ir/opt/a64_get_set_elimination_pass.cpp | 165 -- .../ir/opt/a64_merge_interpret_blocks.cpp | 57 - .../ir/opt/constant_propagation_pass.cpp | 559 ------ .../ir/opt/dead_code_elimination_pass.cpp | 23 - .../dynarmic/ir/opt/identity_removal_pass.cpp | 44 - src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h | 127 -- .../src/dynarmic/ir/opt/naming_pass.cpp | 18 - src/dynarmic/src/dynarmic/ir/opt/passes.h | 47 - .../src/dynarmic/ir/opt/polyfill_pass.cpp | 218 --- .../src/dynarmic/ir/opt/verification_pass.cpp | 51 - src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 1502 +++++++++++++++++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 34 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 10 +- src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 2 +- src/dynarmic/tests/CMakeLists.txt | 146 +- src/dynarmic/tests/print_info.cpp | 2 +- 33 files changed, 1808 insertions(+), 2302 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.cpp delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.h delete mode 100644 src/dynarmic/src/dynarmic/ir/ir_emitter.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/passes.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.h diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 5c28435f72..471a08c703 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -23,9 +23,6 @@ option(DYNARMIC_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON) option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF) option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF) option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT}) -if (NOT DEFINED DYNARMIC_FRONTENDS) - set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable") -endif() # Default to a Release build if (NOT CMAKE_BUILD_TYPE) diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index fea6f19e6a..5a7210c791 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -16,19 +16,31 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun The member functions on `RegAlloc` are just a combination of the above concepts. +The following registers are reserved for internal use and should NOT participate in register allocation: +- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. +- `%rsp`: Stack pointer. + +The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. + +Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration. + ### `Scratch` - Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) - Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) +```c++ +Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr); +Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm); +``` At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope. ### Pure `Use` - Xbyak::Reg64 UseGpr(Argument& arg); - Xbyak::Xmm UseXmm(Argument& arg); - OpArg UseOpArg(Argument& arg); - void Use(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseGpr(Argument& arg); +Xbyak::Xmm UseXmm(Argument& arg); +OpArg UseOpArg(Argument& arg); +void Use(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it @@ -39,9 +51,11 @@ This register **must not** have it's value changed. ### `UseScratch` - Xbyak::Reg64 UseScratchGpr(Argument& arg); - Xbyak::Xmm UseScratchXmm(Argument& arg); - void UseScratch(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseScratchGpr(Argument& arg); +Xbyak::Xmm UseScratchXmm(Argument& arg); +void UseScratch(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it @@ -55,7 +69,9 @@ You are free to modify the value in the register. The register is discarded at t A `Define` is the defintion of a value. This is the only time when a value may be set. - void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +```c++ +void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +``` By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the value to the specified register `reg`. @@ -64,7 +80,9 @@ value to the specified register `reg`. Adding a `Define` to an existing value. - void DefineValue(IR::Inst* inst, Argument& arg); +```c++ +void DefineValue(IR::Inst* inst, Argument& arg); +``` You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are emitted. diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index 6ffe41bcc6..0e72c3bce8 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -23,15 +23,17 @@ One complication dynarmic has is that a compiled block is not uniquely identifia the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block. - u64 LocationDescriptor::UniqueHash() const { - // This value MUST BE UNIQUE. - // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint - u64 pc_u64 = u64(arm_pc) << 32; - u64 fpscr_u64 = u64(fpscr.Value()); - u64 t_u64 = cpsr.T() ? 1 : 0; - u64 e_u64 = cpsr.E() ? 2 : 0; - return pc_u64 | fpscr_u64 | t_u64 | e_u64; - } +```c++ +u64 LocationDescriptor::UniqueHash() const { + // This value MUST BE UNIQUE. + // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint + u64 pc_u64 = u64(arm_pc) << 32; + u64 fpscr_u64 = u64(fpscr.Value()); + u64 t_u64 = cpsr.T() ? 1 : 0; + u64 e_u64 = cpsr.E() ? 2 : 0; + return pc_u64 | fpscr_u64 | t_u64 | e_u64; +} +``` ## Our implementation isn't actually a stack @@ -49,97 +51,107 @@ host addresses for the corresponding the compiled blocks. size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8 showed degraded performance. - struct JitState { - // ... +```c++ +struct JitState { + // ... - static constexpr size_t RSBSize = 8; // MUST be a power of 2. - u32 rsb_ptr = 0; - std::array rsb_location_descriptors; - std::array rsb_codeptrs; - void ResetRSB(); + static constexpr size_t RSBSize = 8; // MUST be a power of 2. + u32 rsb_ptr = 0; + std::array rsb_location_descriptors; + std::array rsb_codeptrs; + void ResetRSB(); - // ... - }; + // ... +}; +``` ### RSB Push We insert our prediction at the insertion point iff the RSB doesn't already contain a prediction with the same `UniqueHash`. - void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { + using namespace Xbyak::util; - ASSERT(inst->GetArg(0).IsImmediate()); - u64 imm64 = inst->GetArg(0).GetU64(); + ASSERT(inst->GetArg(0).IsImmediate()); + u64 imm64 = inst->GetArg(0).GetU64(); - Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); - Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); - Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); - u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() - ? u64(unique_hash_to_code_ptr[imm64]) - : u64(code->GetReturnFromRunCodeAddress()); + Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); + Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); + Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); + u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() + ? u64(unique_hash_to_code_ptr[imm64]) + : u64(code->GetReturnFromRunCodeAddress()); - code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); - code->add(index_reg, 1); - code->and_(index_reg, u32(JitState::RSBSize - 1)); + code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); + code->add(index_reg, 1); + code->and_(index_reg, u32(JitState::RSBSize - 1)); - code->mov(loc_desc_reg, u64(imm64)); - CodePtr patch_location = code->getCurr(); - patch_unique_hash_locations[imm64].emplace_back(patch_location); - code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. - code->EnsurePatchLocationSize(patch_location, 10); + code->mov(loc_desc_reg, u64(imm64)); + CodePtr patch_location = code->getCurr(); + patch_unique_hash_locations[imm64].emplace_back(patch_location); + code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. + code->EnsurePatchLocationSize(patch_location, 10); - Xbyak::Label label; - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->je(label, code->T_SHORT); - } - - code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); - code->L(label); + Xbyak::Label label; + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->je(label, code->T_SHORT); } + code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); + code->L(label); +} +``` + In pseudocode: - for (i := 0 .. RSBSize-1) - if (rsb_location_descriptors[i] == imm64) - goto label; - rsb_ptr++; - rsb_ptr %= RSBSize; - rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash - rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; - label: +```c++ + for (i := 0 .. RSBSize-1) + if (rsb_location_descriptors[i] == imm64) + goto label; + rsb_ptr++; + rsb_ptr %= RSBSize; + rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash + rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; +label: +``` ## RSB Pop To check if a predicition is in the RSB, we linearly scan the RSB. - void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { + using namespace Xbyak::util; - // This calculation has to match up with IREmitter::PushRSB - code->mov(ecx, MJitStateReg(Arm::Reg::PC)); - code->shl(rcx, 32); - code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); - code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); - code->or_(rbx, rcx); + // This calculation has to match up with IREmitter::PushRSB + code->mov(ecx, MJitStateReg(Arm::Reg::PC)); + code->shl(rcx, 32); + code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); + code->or_(rbx, rcx); - code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); - } - - code->jmp(rax); + code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); } + code->jmp(rax); +} +``` + In pseudocode: - rbx := ComputeUniqueHash() - rax := ReturnToDispatch - for (i := 0 .. RSBSize-1) - if (rbx == rsb_location_descriptors[i]) - rax = rsb_codeptrs[i] - goto rax \ No newline at end of file +```c++ +rbx := ComputeUniqueHash() +rax := ReturnToDispatch +for (i := 0 .. RSBSize-1) + if (rbx == rsb_location_descriptors[i]) + rax = rsb_codeptrs[i] +goto rax +``` diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index e060989f82..6948bf39fb 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -56,8 +56,6 @@ add_library(dynarmic common/lut_from_list.h common/math_util.cpp common/math_util.h - common/memory_pool.cpp - common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h @@ -78,7 +76,6 @@ add_library(dynarmic ir/basic_block.cpp ir/basic_block.h ir/cond.h - ir/ir_emitter.cpp ir/ir_emitter.h ir/location_descriptor.cpp ir/location_descriptor.h @@ -87,78 +84,59 @@ add_library(dynarmic ir/opcodes.cpp ir/opcodes.h ir/opcodes.inc - ir/opt/constant_propagation_pass.cpp - ir/opt/dead_code_elimination_pass.cpp - ir/opt/identity_removal_pass.cpp - ir/opt/ir_matcher.h - ir/opt/naming_pass.cpp - ir/opt/passes.h - ir/opt/polyfill_pass.cpp - ir/opt/verification_pass.cpp + ir/opt_passes.cpp + ir/opt_passes.h ir/terminal.h ir/type.cpp ir/type.h ir/value.cpp ir/value.h + # A32 + frontend/A32/a32_ir_emitter.cpp + frontend/A32/a32_ir_emitter.h + frontend/A32/a32_location_descriptor.cpp + frontend/A32/a32_location_descriptor.h + frontend/A32/decoder/arm.h + frontend/A32/decoder/arm.inc + frontend/A32/decoder/asimd.h + frontend/A32/decoder/asimd.inc + frontend/A32/decoder/thumb16.h + frontend/A32/decoder/thumb16.inc + frontend/A32/decoder/thumb32.h + frontend/A32/decoder/thumb32.inc + frontend/A32/decoder/vfp.h + frontend/A32/decoder/vfp.inc + frontend/A32/disassembler/disassembler.h + frontend/A32/disassembler/disassembler_arm.cpp + frontend/A32/disassembler/disassembler_thumb.cpp + frontend/A32/FPSCR.h + frontend/A32/ITState.h + frontend/A32/PSR.h + frontend/A32/translate/a32_translate.cpp + frontend/A32/translate/a32_translate.h + frontend/A32/translate/conditional_state.cpp + frontend/A32/translate/conditional_state.h + frontend/A32/translate/translate_arm.cpp + frontend/A32/translate/translate_thumb.cpp + interface/A32/a32.h + interface/A32/arch_version.h + interface/A32/config.h + interface/A32/coprocessor.h + interface/A32/coprocessor_util.h + interface/A32/disassembler.h + # A64 + frontend/A64/a64_ir_emitter.cpp + frontend/A64/a64_ir_emitter.h + frontend/A64/a64_location_descriptor.cpp + frontend/A64/a64_location_descriptor.h + frontend/A64/decoder/a64.h + frontend/A64/decoder/a64.inc + frontend/A64/translate/a64_translate.cpp + frontend/A64/translate/a64_translate.h + interface/A64/a64.h + interface/A64/config.h ) -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A32/a32_ir_emitter.cpp - frontend/A32/a32_ir_emitter.h - frontend/A32/a32_location_descriptor.cpp - frontend/A32/a32_location_descriptor.h - frontend/A32/decoder/arm.h - frontend/A32/decoder/arm.inc - frontend/A32/decoder/asimd.h - frontend/A32/decoder/asimd.inc - frontend/A32/decoder/thumb16.h - frontend/A32/decoder/thumb16.inc - frontend/A32/decoder/thumb32.h - frontend/A32/decoder/thumb32.inc - frontend/A32/decoder/vfp.h - frontend/A32/decoder/vfp.inc - frontend/A32/disassembler/disassembler.h - frontend/A32/disassembler/disassembler_arm.cpp - frontend/A32/disassembler/disassembler_thumb.cpp - frontend/A32/FPSCR.h - frontend/A32/ITState.h - frontend/A32/PSR.h - frontend/A32/translate/a32_translate.cpp - frontend/A32/translate/a32_translate.h - frontend/A32/translate/conditional_state.cpp - frontend/A32/translate/conditional_state.h - frontend/A32/translate/translate_arm.cpp - frontend/A32/translate/translate_thumb.cpp - interface/A32/a32.h - interface/A32/arch_version.h - interface/A32/config.h - interface/A32/coprocessor.h - interface/A32/coprocessor_util.h - interface/A32/disassembler.h - ir/opt/a32_constant_memory_reads_pass.cpp - ir/opt/a32_get_set_elimination_pass.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A64/a64_ir_emitter.cpp - frontend/A64/a64_ir_emitter.h - frontend/A64/a64_location_descriptor.cpp - frontend/A64/a64_location_descriptor.h - frontend/A64/decoder/a64.h - frontend/A64/decoder/a64.inc - frontend/A64/translate/a64_translate.cpp - frontend/A64/translate/a64_translate.h - interface/A64/a64.h - interface/A64/config.h - ir/opt/a64_callback_config_pass.cpp - ir/opt/a64_get_set_elimination_pass.cpp - ir/opt/a64_merge_interpret_blocks.cpp - ) -endif() - if ("x86_64" IN_LIST ARCHITECTURE) target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1) target_link_libraries(dynarmic @@ -211,29 +189,21 @@ if ("x86_64" IN_LIST ARCHITECTURE) common/spin_lock_x64.h common/x64_disassemble.cpp common/x64_disassemble.h + # A32 + backend/x64/a32_emit_x64.cpp + backend/x64/a32_emit_x64.h + backend/x64/a32_emit_x64_memory.cpp + backend/x64/a32_interface.cpp + backend/x64/a32_jitstate.cpp + backend/x64/a32_jitstate.h + # A64 + backend/x64/a64_emit_x64.cpp + backend/x64/a64_emit_x64.h + backend/x64/a64_emit_x64_memory.cpp + backend/x64/a64_interface.cpp + backend/x64/a64_jitstate.cpp + backend/x64/a64_jitstate.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a32_emit_x64.cpp - backend/x64/a32_emit_x64.h - backend/x64/a32_emit_x64_memory.cpp - backend/x64/a32_interface.cpp - backend/x64/a32_jitstate.cpp - backend/x64/a32_jitstate.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a64_emit_x64.cpp - backend/x64/a64_emit_x64.h - backend/x64/a64_emit_x64_memory.cpp - backend/x64/a64_interface.cpp - backend/x64/a64_jitstate.cpp - backend/x64/a64_jitstate.h - ) - endif() endif() if ("arm64" IN_LIST ARCHITECTURE) @@ -277,25 +247,17 @@ if ("arm64" IN_LIST ARCHITECTURE) backend/arm64/verbose_debugging_output.h common/spin_lock_arm64.cpp common/spin_lock_arm64.h + # A32 + backend/arm64/a32_address_space.cpp + backend/arm64/a32_address_space.h + backend/arm64/a32_core.h + backend/arm64/a32_interface.cpp + # A64 + backend/arm64/a64_address_space.cpp + backend/arm64/a64_address_space.h + backend/arm64/a64_core.h + backend/arm64/a64_interface.cpp ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a32_address_space.cpp - backend/arm64/a32_address_space.h - backend/arm64/a32_core.h - backend/arm64/a32_interface.cpp - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a64_address_space.cpp - backend/arm64/a64_address_space.h - backend/arm64/a64_core.h - backend/arm64/a64_interface.cpp - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -324,21 +286,14 @@ if ("riscv" IN_LIST ARCHITECTURE) backend/riscv64/reg_alloc.cpp backend/riscv64/reg_alloc.h backend/riscv64/stack_layout.h + # A32 + backend/riscv64/a32_address_space.cpp + backend/riscv64/a32_address_space.h + backend/riscv64/a32_core.h + backend/riscv64/a32_interface.cpp + backend/riscv64/code_block.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - backend/riscv64/a32_address_space.cpp - backend/riscv64/a32_address_space.h - backend/riscv64/a32_core.h - backend/riscv64/a32_interface.cpp - backend/riscv64/code_block.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") - endif() + message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") endif() if (WIN32) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index bbdf925ff4..8e48fa3687 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -16,7 +16,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -163,21 +163,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index d4fe9a9cb7..2b50ad9ea3 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -331,22 +331,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index efa211618b..0bb9591411 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/backend/riscv64/stack_layout.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::RV64 { @@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index b116ec180e..382eb70f3f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -29,7 +29,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/location_descriptor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A32 { @@ -217,19 +217,7 @@ private: block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index ddd2327395..c65b582982 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -25,7 +25,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/a64.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A64 { @@ -275,21 +275,7 @@ private: const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block).entrypoint; } diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.cpp b/src/dynarmic/src/dynarmic/common/memory_pool.cpp deleted file mode 100644 index f41dd92af5..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/common/memory_pool.h" - -#include - -namespace Dynarmic::Common { - - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.h b/src/dynarmic/src/dynarmic/common/memory_pool.h deleted file mode 100644 index c99316e107..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -namespace Dynarmic::Common { - -/// @tparam object_size Byte-size of objects to construct -/// @tparam slab_size Number of objects to have per slab -template -class Pool { -public: - inline Pool() noexcept { - AllocateNewSlab(); - } - inline ~Pool() noexcept { - std::free(current_slab); - for (char* slab : slabs) { - std::free(slab); - } - } - - Pool(const Pool&) = delete; - Pool(Pool&&) = delete; - - Pool& operator=(const Pool&) = delete; - Pool& operator=(Pool&&) = delete; - - /// @brief Returns a pointer to an `object_size`-bytes block of memory. - [[nodiscard]] void* Alloc() noexcept { - if (remaining == 0) { - slabs.push_back(current_slab); - AllocateNewSlab(); - } - void* ret = static_cast(current_ptr); - current_ptr += object_size; - remaining--; - return ret; - } -private: - /// @brief Allocates a completely new memory slab. - /// Used when an entirely new slab is needed - /// due the current one running out of usable space. - void AllocateNewSlab() noexcept { - current_slab = static_cast(std::malloc(object_size * slab_size)); - current_ptr = current_slab; - remaining = slab_size; - } - - std::vector slabs; - char* current_slab = nullptr; - char* current_ptr = nullptr; - size_t remaining = 0; -}; - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index b00ab3cb20..a13a7ebfc9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -15,8 +15,6 @@ #include #include "dynarmic/common/assert.h" - -#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -27,8 +25,7 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL}, - instruction_alloc_pool{std::make_unique>()} + cond{Cond::AL} { } @@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); + IR::Inst* inst = new IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index e88dc92fc4..2f2d9ab6de 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -21,7 +21,6 @@ #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" #include "dynarmic/ir/dense_list.h" -#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -174,8 +173,6 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; - /// Memory pool for instruction list - std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp deleted file mode 100644 index a8ef7e2989..0000000000 --- a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/ir_emitter.h" - -#include - -#include "dynarmic/common/assert.h" -#include - -#include "dynarmic/ir/opcodes.h" - -namespace Dynarmic::IR { - - -} // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp deleted file mode 100644 index 9699f18345..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/interface/A32/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { - for (auto& inst : block) { - switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp deleted file mode 100644 index 499b38b120..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp +++ /dev/null @@ -1,382 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A32/a32_ir_emitter.h" -#include "dynarmic/frontend/A32/a32_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -namespace { - -void FlagsPass(IR::Block& block) { - using Iterator = std::reverse_iterator; - - struct FlagInfo { - bool set_not_required = false; - bool has_value_request = false; - Iterator value_request = {}; - }; - struct ValuelessFlagInfo { - bool set_not_required = false; - }; - ValuelessFlagInfo nzcvq; - ValuelessFlagInfo nzcv; - ValuelessFlagInfo nz; - FlagInfo c_flag; - FlagInfo ge; - - auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(value); - } - info.has_value_request = false; - - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_get = [](FlagInfo& info, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(IR::Value{&*inst}); - } - info.has_value_request = true; - info.value_request = inst; - }; - - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetCFlag: { - do_get(c_flag, inst); - break; - } - case IR::Opcode::A32SetCpsrNZCV: { - if (c_flag.has_value_request) { - ir.SetInsertionPointBefore(inst.base()); // base is one ahead - IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); - c_flag.value_request->ReplaceUsesWith(c); - c_flag.has_value_request = false; - break; // This case will be executed again because of the above - } - - do_set_valueless(nzcv, inst); - - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVRaw: { - if (c_flag.has_value_request) { - nzcv.set_not_required = false; - } - - do_set_valueless(nzcv, inst); - - nzcvq = {}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVQ: { - if (c_flag.has_value_request) { - nzcvq.set_not_required = false; - } - - do_set_valueless(nzcvq, inst); - - nzcv = {.set_not_required = true}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZ: { - do_set_valueless(nz, inst); - - nzcvq = {}; - nzcv = {}; - break; - } - case IR::Opcode::A32SetCpsrNZC: { - if (c_flag.has_value_request) { - c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); - c_flag.has_value_request = false; - } - - if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { - const auto nz_value = inst->GetArg(0); - - inst->Invalidate(); - - ir.SetInsertionPointBefore(inst.base()); - ir.SetCpsrNZ(IR::NZCV{nz_value}); - - nzcvq = {}; - nzcv = {}; - nz = {.set_not_required = true}; - break; - } - - if (nz.set_not_required && c_flag.set_not_required) { - inst->Invalidate(); - } else if (nz.set_not_required) { - inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); - } - nz.set_not_required = true; - c_flag.set_not_required = true; - - nzcv = {}; - nzcvq = {}; - break; - } - case IR::Opcode::A32SetGEFlags: { - do_set(ge, inst->GetArg(0), inst); - break; - } - case IR::Opcode::A32GetGEFlags: { - do_get(ge, inst); - break; - } - case IR::Opcode::A32SetGEFlagsCompressed: { - ge = {.set_not_required = true}; - break; - } - case IR::Opcode::A32OrQFlag: { - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcvq = {}; - nzcv = {}; - nz = {}; - c_flag = {}; - ge = {}; - } - break; - } - } - } -} - -void RegisterPass(IR::Block& block) { - using Iterator = IR::Block::iterator; - - struct RegInfo { - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array reg_info; - - const auto do_get = [](RegInfo& info, Iterator get_inst) { - if (info.register_value.IsEmpty()) { - info.register_value = IR::Value(&*get_inst); - return; - } - get_inst->ReplaceUsesWith(info.register_value); - }; - - const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { - if (info.last_set_instruction) { - (*info.last_set_instruction)->Invalidate(); - } - info = { - .register_value = value, - .last_set_instruction = set_inst, - }; - }; - - enum class ExtValueType { - Empty, - Single, - Double, - VectorDouble, - VectorQuad, - }; - struct ExtRegInfo { - ExtValueType value_type = {}; - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array ext_reg_info; - - const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { - if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = IR::Value(&*get_inst), - .last_set_instruction = std::nullopt, - }; - } - return; - } - get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); - }; - - const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { - if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - if (std::data(infos)[0].get().last_set_instruction) { - (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); - } - } - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = value, - .last_set_instruction = set_inst, - }; - } - }; - - // Location and version don't matter here. - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - ASSERT(reg != A32::Reg::PC); - const size_t reg_index = static_cast(reg); - do_get(reg_info[reg_index], inst); - break; - } - case IR::Opcode::A32SetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - if (reg == A32::Reg::PC) { - break; - } - const auto reg_index = static_cast(reg); - do_set(reg_info[reg_index], inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); - break; - } - case IR::Opcode::A32SetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - break; - } - case IR::Opcode::A32SetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst->GetArg(1), - inst); - break; - } - case IR::Opcode::A32GetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - do_ext_get(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_get(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst); - } - break; - } - case IR::Opcode::A32SetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - ir.SetInsertionPointAfter(inst); - const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); - do_ext_set(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - stored_value, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_set(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst->GetArg(1), - inst); - } - break; - } - default: { - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - ext_reg_info = {}; - } - break; - } - } - } -} - -} // namespace - -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { - FlagsPass(block); - RegisterPass(block); -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp deleted file mode 100644 index 79d9769520..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/frontend/A64/a64_ir_emitter.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { - if (conf.hook_data_cache_operations) { - return; - } - - for (auto& inst : block) { - if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { - continue; - } - - const auto op = static_cast(inst.GetArg(1).GetU64()); - if (op == A64::DataCacheOperation::ZeroByVA) { - A64::IREmitter ir{block}; - ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; - ir.SetInsertionPointBefore(&inst); - - size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); - IR::U64 addr{inst.GetArg(2)}; - - const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); - while (bytes >= 16) { - ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(16)); - bytes -= 16; - } - - while (bytes >= 8) { - ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(8)); - bytes -= 8; - } - - while (bytes >= 4) { - ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(4)); - bytes -= 4; - } - } - inst.Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp deleted file mode 100644 index 53e3b27176..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -void A64GetSetElimination(IR::Block& block) { - using Iterator = IR::Block::iterator; - - enum class TrackingType { - W, - X, - S, - D, - Q, - SP, - NZCV, - NZCVRaw, - }; - struct RegisterInfo { - IR::Value register_value; - TrackingType tracking_type; - bool set_instruction_present = false; - Iterator last_set_instruction; - }; - std::array reg_info; - std::array vec_info; - RegisterInfo sp_info; - RegisterInfo nzcv_info; - - const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { - if (info.set_instruction_present) { - info.last_set_instruction->Invalidate(); - block.Instructions().erase(info.last_set_instruction); - } - - info.register_value = value; - info.tracking_type = tracking_type; - info.set_instruction_present = true; - info.last_set_instruction = set_inst; - }; - - const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { - const auto do_nothing = [&] { - info = {}; - info.register_value = IR::Value(&*get_inst); - info.tracking_type = tracking_type; - }; - - if (info.register_value.IsEmpty()) { - do_nothing(); - return; - } - - if (info.tracking_type == tracking_type) { - get_inst->ReplaceUsesWith(info.register_value); - return; - } - - do_nothing(); - }; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A64GetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::W); - break; - } - case IR::Opcode::A64GetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::X); - break; - } - case IR::Opcode::A64GetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::S); - break; - } - case IR::Opcode::A64GetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::D); - break; - } - case IR::Opcode::A64GetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64GetSP: { - do_get(sp_info, inst, TrackingType::SP); - break; - } - case IR::Opcode::A64GetNZCVRaw: { - do_get(nzcv_info, inst, TrackingType::NZCVRaw); - break; - } - case IR::Opcode::A64SetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); - break; - } - case IR::Opcode::A64SetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); - break; - } - case IR::Opcode::A64SetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); - break; - } - case IR::Opcode::A64SetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); - break; - } - case IR::Opcode::A64SetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64SetSP: { - do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); - break; - } - case IR::Opcode::A64SetNZCV: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); - break; - } - case IR::Opcode::A64SetNZCVRaw: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcv_info = {}; - } - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - vec_info = {}; - sp_info = {}; - } - break; - } - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp deleted file mode 100644 index 25b7ef0ff1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_location_descriptor.h" -#include "dynarmic/frontend/A64/translate/a64_translate.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { - const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { - const auto instruction = cb->MemoryReadCode(location.PC()); - if (!instruction) - return false; - - IR::Block new_block{location}; - A64::TranslateSingleInstruction(new_block, location, *instruction); - - if (!new_block.Instructions().empty()) - return false; - - const IR::Terminal terminal = new_block.GetTerminal(); - if (auto term = boost::get(&terminal)) { - return term->next == location; - } - - return false; - }; - - IR::Terminal terminal = block.GetTerminal(); - auto term = boost::get(&terminal); - if (!term) - return; - - A64::LocationDescriptor location{term->next}; - size_t num_instructions = 1; - - while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { - num_instructions++; - } - - term->num_instructions = num_instructions; - block.ReplaceTerminal(terminal); - block.CycleCount() += num_instructions - 1; -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp deleted file mode 100644 index 86ebca87d2..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp +++ /dev/null @@ -1,559 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/assert.h" -#include -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/common/safe_ops.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -using Op = Dynarmic::IR::Opcode; - -namespace { - -// Tiny helper to avoid the need to store based off the opcode -// bit size all over the place within folding functions. -void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { - if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); - } else { - inst.ReplaceUsesWith(IR::Value{value}); - } -} - -IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; -} - -template -bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - - const bool is_lhs_immediate = lhs.IsImmediate(); - const bool is_rhs_immediate = rhs.IsImmediate(); - - if (is_lhs_immediate && is_rhs_immediate) { - const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); - ReplaceUsesWith(inst, is_32_bit, result); - return false; - } - - if (is_lhs_immediate && !is_rhs_immediate) { - const IR::Inst* rhs_inst = rhs.GetInstRecursive(); - if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, rhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } else { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - } - } - - if (!is_lhs_immediate && is_rhs_immediate) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } - } - - return true; -} - -void FoldAdd(IR::Inst& inst, bool is_32_bit) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - if (lhs.IsImmediate() && !rhs.IsImmediate()) { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - FoldAdd(inst, is_32_bit); - return; - } - - if (inst.HasAssociatedPseudoOperation()) { - return; - } - - if (!lhs.IsImmediate() && rhs.IsImmediate()) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { - const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); - if (combined == 0) { - inst.ReplaceUsesWith(lhs_inst->GetArg(0)); - return; - } - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - return; - } - if (rhs.IsZero() && carry.IsZero()) { - inst.ReplaceUsesWith(lhs); - return; - } - } - - if (inst.AreAllArgsImmediates()) { - const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); - return; - } -} - -/// Folds AND operations based on the following: -/// -/// 1. imm_x & imm_y -> result -/// 2. x & 0 -> 0 -/// 3. 0 & y -> 0 -/// 4. x & y -> y (where x has all bits set to 1) -/// 5. x & y -> x (where y has all bits set to 1) -/// -void FoldAND(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.HasAllBitsSet()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -/// Folds byte reversal opcodes based on the following: -/// -/// 1. imm -> swap(imm) -/// -void FoldByteReverse(IR::Inst& inst, Op op) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - if (op == Op::ByteReverseWord) { - const u32 result = mcl::bit::swap_bytes_32(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else if (op == Op::ByteReverseHalf) { - const u16 result = mcl::bit::swap_bytes_16(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else { - const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); - inst.ReplaceUsesWith(IR::Value{result}); - } -} - -/// Folds division operations based on the following: -/// -/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) -/// 2. imm_x / imm_y -> result -/// 3. x / 1 -> x -/// -void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { - const auto rhs = inst.GetArg(1); - - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - return; - } - - const auto lhs = inst.GetArg(0); - if (lhs.IsImmediate() && rhs.IsImmediate()) { - if (is_signed) { - const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); - ReplaceUsesWith(inst, is_32_bit, static_cast(result)); - } else { - const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); - } - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(IR::Value{lhs}); - } -} - -// Folds EOR operations based on the following: -// -// 1. imm_x ^ imm_y -> result -// 2. x ^ 0 -> x -// 3. 0 ^ y -> y -// -void FoldEOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -void FoldLeastSignificantByte(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantHalf(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldMostSignificantBit(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); -} - -void FoldMostSignificantWord(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - if (carry_inst) { - carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); - } - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); -} - -// Folds multiplication operations based on the following: -// -// 1. imm_x * imm_y -> result -// 2. x * 0 -> 0 -// 3. 0 * y -> 0 -// 4. x * 1 -> x -// 5. 1 * y -> y -// -void FoldMultiply(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -// Folds NOT operations if the contained value is an immediate. -void FoldNOT(IR::Inst& inst, bool is_32_bit) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - const u64 result = ~operand.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -// Folds OR operations based on the following: -// -// 1. imm_x | imm_y -> result -// 2. x | 0 -> x -// 3. 0 | y -> y -// -void FoldOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -bool FoldShifts(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - // The 32-bit variants can contain 3 arguments, while the - // 64-bit variants only contain 2. - if (inst.NumArgs() == 3 && !carry_inst) { - inst.SetArg(2, IR::Value(false)); - } - - const auto shift_amount = inst.GetArg(1); - - if (shift_amount.IsZero()) { - if (carry_inst) { - carry_inst->ReplaceUsesWith(inst.GetArg(2)); - } - inst.ReplaceUsesWith(inst.GetArg(0)); - return false; - } - - if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { - inst.SetArg(2, IR::Value(false)); - } - - if (!inst.AreAllArgsImmediates() || carry_inst) { - return false; - } - - return true; -} - -void FoldSignExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSignExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSub(IR::Inst& inst, bool is_32_bit) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -void FoldZeroExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldZeroExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{value}); -} -} // Anonymous namespace - -void ConstantPropagation(IR::Block& block) { - for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - - switch (opcode) { - case Op::LeastSignificantWord: - FoldLeastSignificantWord(inst); - break; - case Op::MostSignificantWord: - FoldMostSignificantWord(inst); - break; - case Op::LeastSignificantHalf: - FoldLeastSignificantHalf(inst); - break; - case Op::LeastSignificantByte: - FoldLeastSignificantByte(inst); - break; - case Op::MostSignificantBit: - FoldMostSignificantBit(inst); - break; - case Op::IsZero32: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); - } - break; - case Op::IsZero64: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); - } - break; - case Op::LogicalShiftLeft32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeft64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeftMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftLeftMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::LogicalShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::ArithmeticShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::ArithmeticShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::RotateRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); - } - break; - case Op::RotateRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); - } - break; - case Op::Add32: - case Op::Add64: - FoldAdd(inst, opcode == Op::Add32); - break; - case Op::Sub32: - case Op::Sub64: - FoldSub(inst, opcode == Op::Sub32); - break; - case Op::Mul32: - case Op::Mul64: - FoldMultiply(inst, opcode == Op::Mul32); - break; - case Op::SignedDiv32: - case Op::SignedDiv64: - FoldDivide(inst, opcode == Op::SignedDiv32, true); - break; - case Op::UnsignedDiv32: - case Op::UnsignedDiv64: - FoldDivide(inst, opcode == Op::UnsignedDiv32, false); - break; - case Op::And32: - case Op::And64: - FoldAND(inst, opcode == Op::And32); - break; - case Op::Eor32: - case Op::Eor64: - FoldEOR(inst, opcode == Op::Eor32); - break; - case Op::Or32: - case Op::Or64: - FoldOR(inst, opcode == Op::Or32); - break; - case Op::Not32: - case Op::Not64: - FoldNOT(inst, opcode == Op::Not32); - break; - case Op::SignExtendByteToWord: - case Op::SignExtendHalfToWord: - FoldSignExtendXToWord(inst); - break; - case Op::SignExtendByteToLong: - case Op::SignExtendHalfToLong: - case Op::SignExtendWordToLong: - FoldSignExtendXToLong(inst); - break; - case Op::ZeroExtendByteToWord: - case Op::ZeroExtendHalfToWord: - FoldZeroExtendXToWord(inst); - break; - case Op::ZeroExtendByteToLong: - case Op::ZeroExtendHalfToLong: - case Op::ZeroExtendWordToLong: - FoldZeroExtendXToLong(inst); - break; - case Op::ByteReverseWord: - case Op::ByteReverseHalf: - case Op::ByteReverseDual: - FoldByteReverse(inst, opcode); - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp deleted file mode 100644 index bda9f6efd1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void DeadCodeElimination(IR::Block& block) { - // We iterate over the instructions in reverse order. - // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp deleted file mode 100644 index e87fcc335b..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void IdentityRemovalPass(IR::Block& block) { - std::vector to_invalidate; - - auto iter = block.begin(); - while (iter != block.end()) { - IR::Inst& inst = *iter; - - const size_t num_args = inst.NumArgs(); - for (size_t i = 0; i < num_args; i++) { - while (true) { - IR::Value arg = inst.GetArg(i); - if (!arg.IsIdentity()) - break; - inst.SetArg(i, arg.GetInst()->GetArg(0)); - } - } - - if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { - iter = block.Instructions().erase(inst); - to_invalidate.push_back(&inst); - } else { - ++iter; - } - } - - for (IR::Inst* inst : to_invalidate) { - inst->Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h b/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h deleted file mode 100644 index 5eb1a55100..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h +++ /dev/null @@ -1,127 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization::IRMatcher { - -struct CaptureValue { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value); - } -}; - -struct CaptureInst { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return std::tuple(value.GetInstRecursive()); - } -}; - -struct CaptureUImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsU64()); - } -}; - -struct CaptureSImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsS64()); - } -}; - -template -struct UImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsU64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct SImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsS64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct Inst { -public: - using ReturnType = mp::concat, typename Args::ReturnType...>; - - static std::optional Match(const IR::Inst& inst) { - if (inst.GetOpcode() != Opcode) - return std::nullopt; - if (inst.HasAssociatedPseudoOperation()) - return std::nullopt; - return MatchArgs<0>(inst); - } - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return Match(*value.GetInstRecursive()); - } - -private: - template - static auto MatchArgs(const IR::Inst& inst) -> std::optional>, std::tuple<>>>> { - if constexpr (I >= sizeof...(Args)) { - return std::tuple(); - } else { - using Arg = mp::get>; - - if (const auto arg = Arg::Match(inst.GetArg(I))) { - if (const auto rest = MatchArgs(inst)) { - return std::tuple_cat(*arg, *rest); - } - } - - return std::nullopt; - } - } -}; - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t); -} - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t); -} - -} // namespace Dynarmic::Optimization::IRMatcher diff --git a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp deleted file mode 100644 index a766bdc83f..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2023 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" - -namespace Dynarmic::Optimization { - -void NamingPass(IR::Block& block) { - unsigned name = 1; - for (auto& inst : block) { - inst.SetName(name++); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/passes.h b/src/dynarmic/src/dynarmic/ir/opt/passes.h deleted file mode 100644 index 703145b556..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/passes.h +++ /dev/null @@ -1,47 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -namespace Dynarmic::A32 { -struct UserCallbacks; -} - -namespace Dynarmic::A64 { -struct UserCallbacks; -struct UserConfig; -} // namespace Dynarmic::A64 - -namespace Dynarmic::IR { -class Block; -} - -namespace Dynarmic::Optimization { - -struct PolyfillOptions { - bool sha256 = false; - bool vector_multiply_widen = false; - - bool operator==(const PolyfillOptions&) const = default; -}; - -struct A32GetSetEliminationOptions { - bool convert_nzc_to_nz = false; - bool convert_nz_to_nzc = false; -}; - -void PolyfillPass(IR::Block& block, const PolyfillOptions& opt); -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb); -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt); -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf); -void A64GetSetElimination(IR::Block& block); -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb); -void ConstantPropagation(IR::Block& block); -void DeadCodeElimination(IR::Block& block); -void IdentityRemovalPass(IR::Block& block); -void VerificationPass(const IR::Block& block); -void NamingPass(IR::Block& block); - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp deleted file mode 100644 index 1aa3aea91e..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2022 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -namespace { - -void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - - const IR::U128 t = ir.VectorExtract(x, y, 32); - - IR::U128 result = ir.ZeroVector(); - for (size_t i = 0; i < 4; i++) { - const IR::U32 modified_element = [&] { - const IR::U32 element = ir.VectorGetElement(32, t, i); - const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); - const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); - const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); - }(); - - result = ir.VectorSetElement(32, result, i, modified_element); - } - result = ir.VectorAdd(32, result, x); - - inst.ReplaceUsesWith(result); -} - -void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 z = (IR::U128)inst.GetArg(2); - - const IR::U128 T0 = ir.VectorExtract(y, z, 32); - - const IR::U128 lower_half = [&] { - const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); - const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); - return ir.VectorZeroUpper(tmp5); - }(); - - const IR::U64 upper_half = [&] { - const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - - // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] - const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); - const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); - - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); - return ir.VectorGetElement(64, tmp5, 0); - }(); - - const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); - - inst.ReplaceUsesWith(result); -} - -IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Eor(ir.And(ir.Eor(y, z), x), z); -} - -IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); -} - -IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 x = (IR::U128)inst.GetArg(0); - IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 w = (IR::U128)inst.GetArg(2); - const bool part1 = inst.GetArg(3).GetU1(); - - for (size_t i = 0; i < 4; i++) { - const IR::U32 low_x = ir.VectorGetElement(32, x, 0); - const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); - const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); - const IR::U32 high_x = ir.VectorGetElement(32, x, 3); - - const IR::U32 low_y = ir.VectorGetElement(32, y, 0); - const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); - const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); - const IR::U32 high_y = ir.VectorGetElement(32, y, 3); - - const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); - const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); - - const IR::U32 t = [&] { - const IR::U32 w_element = ir.VectorGetElement(32, w, i); - const IR::U32 sig = SHAhashSIGMA1(ir, low_y); - - return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); - }(); - - const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); - const IR::U32 new_low_y = ir.Add(t, high_x); - - // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] - const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); - const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); - - x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); - y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); - } - - inst.ReplaceUsesWith(part1 ? x : y); -} - -template -void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 n = (IR::U128)inst.GetArg(0); - IR::U128 m = (IR::U128)inst.GetArg(1); - - const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); - const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); - - const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); - - inst.ReplaceUsesWith(result); -} - -} // namespace - -void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { - if (polyfill == PolyfillOptions{}) { - return; - } - - IR::IREmitter ir{block}; - - for (auto& inst : block) { - ir.SetInsertionPointBefore(&inst); - - switch (inst.GetOpcode()) { - case IR::Opcode::SHA256MessageSchedule0: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule0(ir, inst); - } - break; - case IR::Opcode::SHA256MessageSchedule1: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule1(ir, inst); - } - break; - case IR::Opcode::SHA256Hash: - if (polyfill.sha256) { - PolyfillSHA256Hash(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, false>(ir, inst); - } - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp deleted file mode 100644 index c6c2cff231..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/type.h" - -namespace Dynarmic::Optimization { - -void VerificationPass(const IR::Block& block) { - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const IR::Type t1 = inst.GetArg(i).GetType(); - const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); - if (!IR::AreTypesCompatible(t1, t2)) { - std::puts(IR::DumpBlock(block).c_str()); - ASSERT_FALSE("above block failed validation"); - } - } - } - - ankerl::unordered_dense::map actual_uses; - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const auto arg = inst.GetArg(i); - if (!arg.IsImmediate()) { - actual_uses[arg.GetInst()]++; - } - } - } - - for (const auto& pair : actual_uses) { - ASSERT(pair.first->UseCount() == pair.second); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp new file mode 100644 index 0000000000..383b915839 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -0,0 +1,1502 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include + +#include +#include "boost/container/small_vector.hpp" +#include "dynarmic/frontend/A32/a32_ir_emitter.h" +#include "dynarmic/frontend/A32/a32_location_descriptor.h" +#include "dynarmic/frontend/A32/a32_types.h" +#include "dynarmic/frontend/A64/a64_ir_emitter.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/A32/config.h" +#include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/optimization_flags.h" +#include "dynarmic/common/safe_ops.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/microinstruction.h" +#include "dynarmic/ir/opcodes.h" +#include "dynarmic/ir/opt_passes.h" +#include "dynarmic/ir/type.h" +#include "mcl/bit/swap.hpp" +#include "mcl/bit/rotate.hpp" +#include "mcl/iterator/reverse.hpp" + +namespace Dynarmic::Optimization { + +static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { + for (auto& inst : block) { + switch (inst.GetOpcode()) { + case IR::Opcode::A32ReadMemory8: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory16: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory32: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory64: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + default: + break; + } + } +} + +static void FlagsPass(IR::Block& block) { + using Iterator = std::reverse_iterator; + + struct FlagInfo { + bool set_not_required = false; + bool has_value_request = false; + Iterator value_request = {}; + }; + struct ValuelessFlagInfo { + bool set_not_required = false; + }; + ValuelessFlagInfo nzcvq; + ValuelessFlagInfo nzcv; + ValuelessFlagInfo nz; + FlagInfo c_flag; + FlagInfo ge; + + auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(value); + } + info.has_value_request = false; + + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_get = [](FlagInfo& info, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(IR::Value{&*inst}); + } + info.has_value_request = true; + info.value_request = inst; + }; + + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetCFlag: { + do_get(c_flag, inst); + break; + } + case IR::Opcode::A32SetCpsrNZCV: { + if (c_flag.has_value_request) { + ir.SetInsertionPointBefore(inst.base()); // base is one ahead + IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); + c_flag.value_request->ReplaceUsesWith(c); + c_flag.has_value_request = false; + break; // This case will be executed again because of the above + } + + do_set_valueless(nzcv, inst); + + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVRaw: { + if (c_flag.has_value_request) { + nzcv.set_not_required = false; + } + + do_set_valueless(nzcv, inst); + + nzcvq = {}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVQ: { + if (c_flag.has_value_request) { + nzcvq.set_not_required = false; + } + + do_set_valueless(nzcvq, inst); + + nzcv = {.set_not_required = true}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZ: { + do_set_valueless(nz, inst); + + nzcvq = {}; + nzcv = {}; + break; + } + case IR::Opcode::A32SetCpsrNZC: { + if (c_flag.has_value_request) { + c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); + c_flag.has_value_request = false; + } + + if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { + const auto nz_value = inst->GetArg(0); + + inst->Invalidate(); + + ir.SetInsertionPointBefore(inst.base()); + ir.SetCpsrNZ(IR::NZCV{nz_value}); + + nzcvq = {}; + nzcv = {}; + nz = {.set_not_required = true}; + break; + } + + if (nz.set_not_required && c_flag.set_not_required) { + inst->Invalidate(); + } else if (nz.set_not_required) { + inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); + } + nz.set_not_required = true; + c_flag.set_not_required = true; + + nzcv = {}; + nzcvq = {}; + break; + } + case IR::Opcode::A32SetGEFlags: { + do_set(ge, inst->GetArg(0), inst); + break; + } + case IR::Opcode::A32GetGEFlags: { + do_get(ge, inst); + break; + } + case IR::Opcode::A32SetGEFlagsCompressed: { + ge = {.set_not_required = true}; + break; + } + case IR::Opcode::A32OrQFlag: { + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcvq = {}; + nzcv = {}; + nz = {}; + c_flag = {}; + ge = {}; + } + break; + } + } + } +} + +static void RegisterPass(IR::Block& block) { + using Iterator = IR::Block::iterator; + + struct RegInfo { + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array reg_info; + + const auto do_get = [](RegInfo& info, Iterator get_inst) { + if (info.register_value.IsEmpty()) { + info.register_value = IR::Value(&*get_inst); + return; + } + get_inst->ReplaceUsesWith(info.register_value); + }; + + const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { + if (info.last_set_instruction) { + (*info.last_set_instruction)->Invalidate(); + } + info = { + .register_value = value, + .last_set_instruction = set_inst, + }; + }; + + enum class ExtValueType { + Empty, + Single, + Double, + VectorDouble, + VectorQuad, + }; + struct ExtRegInfo { + ExtValueType value_type = {}; + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array ext_reg_info; + + const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { + if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = IR::Value(&*get_inst), + .last_set_instruction = std::nullopt, + }; + } + return; + } + get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); + }; + + const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { + if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + if (std::data(infos)[0].get().last_set_instruction) { + (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); + } + } + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = value, + .last_set_instruction = set_inst, + }; + } + }; + + // Location and version don't matter here. + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + ASSERT(reg != A32::Reg::PC); + const size_t reg_index = size_t(reg); + do_get(reg_info[reg_index], inst); + break; + } + case IR::Opcode::A32SetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + if (reg == A32::Reg::PC) { + break; + } + const auto reg_index = size_t(reg); + do_set(reg_info[reg_index], inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); + break; + } + case IR::Opcode::A32SetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + break; + } + case IR::Opcode::A32SetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst->GetArg(1), + inst); + break; + } + case IR::Opcode::A32GetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + do_ext_get(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_get(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst); + } + break; + } + case IR::Opcode::A32SetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + ir.SetInsertionPointAfter(inst); + const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); + do_ext_set(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + stored_value, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_set(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst->GetArg(1), + inst); + } + break; + } + default: { + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + ext_reg_info = {}; + } + break; + } + } + } +} + +struct A32GetSetEliminationOptions { + bool convert_nzc_to_nz = false; + bool convert_nz_to_nzc = false; +}; + +static void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { + FlagsPass(block); + RegisterPass(block); +} + +static void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { + if (conf.hook_data_cache_operations) { + return; + } + + for (auto& inst : block) { + if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { + continue; + } + + const auto op = static_cast(inst.GetArg(1).GetU64()); + if (op == A64::DataCacheOperation::ZeroByVA) { + A64::IREmitter ir{block}; + ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; + ir.SetInsertionPointBefore(&inst); + + size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); + IR::U64 addr{inst.GetArg(2)}; + + const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); + while (bytes >= 16) { + ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(16)); + bytes -= 16; + } + + while (bytes >= 8) { + ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(8)); + bytes -= 8; + } + + while (bytes >= 4) { + ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(4)); + bytes -= 4; + } + } + inst.Invalidate(); + } +} + +static void A64GetSetElimination(IR::Block& block) { + using Iterator = IR::Block::iterator; + + enum class TrackingType { + W, + X, + S, + D, + Q, + SP, + NZCV, + NZCVRaw, + }; + struct RegisterInfo { + IR::Value register_value; + TrackingType tracking_type; + bool set_instruction_present = false; + Iterator last_set_instruction; + }; + std::array reg_info; + std::array vec_info; + RegisterInfo sp_info; + RegisterInfo nzcv_info; + + const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { + if (info.set_instruction_present) { + info.last_set_instruction->Invalidate(); + block.Instructions().erase(info.last_set_instruction); + } + + info.register_value = value; + info.tracking_type = tracking_type; + info.set_instruction_present = true; + info.last_set_instruction = set_inst; + }; + + const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { + const auto do_nothing = [&] { + info = {}; + info.register_value = IR::Value(&*get_inst); + info.tracking_type = tracking_type; + }; + + if (info.register_value.IsEmpty()) { + do_nothing(); + return; + } + + if (info.tracking_type == tracking_type) { + get_inst->ReplaceUsesWith(info.register_value); + return; + } + + do_nothing(); + }; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A64GetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::W); + break; + } + case IR::Opcode::A64GetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::X); + break; + } + case IR::Opcode::A64GetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::S); + break; + } + case IR::Opcode::A64GetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::D); + break; + } + case IR::Opcode::A64GetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64GetSP: { + do_get(sp_info, inst, TrackingType::SP); + break; + } + case IR::Opcode::A64GetNZCVRaw: { + do_get(nzcv_info, inst, TrackingType::NZCVRaw); + break; + } + case IR::Opcode::A64SetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); + break; + } + case IR::Opcode::A64SetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); + break; + } + case IR::Opcode::A64SetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); + break; + } + case IR::Opcode::A64SetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); + break; + } + case IR::Opcode::A64SetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64SetSP: { + do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); + break; + } + case IR::Opcode::A64SetNZCV: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); + break; + } + case IR::Opcode::A64SetNZCVRaw: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcv_info = {}; + } + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + vec_info = {}; + sp_info = {}; + } + break; + } + } + } +} + +static void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { + const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { + const auto instruction = cb->MemoryReadCode(location.PC()); + if (!instruction) + return false; + + IR::Block new_block{location}; + A64::TranslateSingleInstruction(new_block, location, *instruction); + + if (!new_block.Instructions().empty()) + return false; + + const IR::Terminal terminal = new_block.GetTerminal(); + if (auto term = boost::get(&terminal)) { + return term->next == location; + } + + return false; + }; + + IR::Terminal terminal = block.GetTerminal(); + auto term = boost::get(&terminal); + if (!term) + return; + + A64::LocationDescriptor location{term->next}; + size_t num_instructions = 1; + + while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { + num_instructions++; + } + + term->num_instructions = num_instructions; + block.ReplaceTerminal(terminal); + block.CycleCount() += num_instructions - 1; +} + +using Op = Dynarmic::IR::Opcode; + +// Tiny helper to avoid the need to store based off the opcode +// bit size all over the place within folding functions. +static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { + if (is_32_bit) { + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + } else { + inst.ReplaceUsesWith(IR::Value{value}); + } +} + +static IR::Value Value(bool is_32_bit, u64 value) { + return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; +} + +template +static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + + const bool is_lhs_immediate = lhs.IsImmediate(); + const bool is_rhs_immediate = rhs.IsImmediate(); + + if (is_lhs_immediate && is_rhs_immediate) { + const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); + ReplaceUsesWith(inst, is_32_bit, result); + return false; + } + + if (is_lhs_immediate && !is_rhs_immediate) { + const IR::Inst* rhs_inst = rhs.GetInstRecursive(); + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, rhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } + } + + return true; +} + +static void FoldAdd(IR::Inst& inst, bool is_32_bit) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + if (lhs.IsImmediate() && !rhs.IsImmediate()) { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + FoldAdd(inst, is_32_bit); + return; + } + + if (inst.HasAssociatedPseudoOperation()) { + return; + } + + if (!lhs.IsImmediate() && rhs.IsImmediate()) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { + const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); + if (combined == 0) { + inst.ReplaceUsesWith(lhs_inst->GetArg(0)); + return; + } + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + return; + } + if (rhs.IsZero() && carry.IsZero()) { + inst.ReplaceUsesWith(lhs); + return; + } + } + + if (inst.AreAllArgsImmediates()) { + const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); + return; + } +} + +/// Folds AND operations based on the following: +/// +/// 1. imm_x & imm_y -> result +/// 2. x & 0 -> 0 +/// 3. 0 & y -> 0 +/// 4. x & y -> y (where x has all bits set to 1) +/// 5. x & y -> x (where y has all bits set to 1) +/// +static void FoldAND(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.HasAllBitsSet()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +/// Folds byte reversal opcodes based on the following: +/// +/// 1. imm -> swap(imm) +/// +static void FoldByteReverse(IR::Inst& inst, Op op) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + if (op == Op::ByteReverseWord) { + const u32 result = mcl::bit::swap_bytes_32(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else if (op == Op::ByteReverseHalf) { + const u16 result = mcl::bit::swap_bytes_16(u16(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } +} + +/// Folds division operations based on the following: +/// +/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) +/// 2. imm_x / imm_y -> result +/// 3. x / 1 -> x +/// +static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { + const auto rhs = inst.GetArg(1); + + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + return; + } + + const auto lhs = inst.GetArg(0); + if (lhs.IsImmediate() && rhs.IsImmediate()) { + if (is_signed) { + const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); + ReplaceUsesWith(inst, is_32_bit, static_cast(result)); + } else { + const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); + } + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(IR::Value{lhs}); + } +} + +// Folds EOR operations based on the following: +// +// 1. imm_x ^ imm_y -> result +// 2. x ^ 0 -> x +// 3. 0 ^ y -> y +// +static void FoldEOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static void FoldLeastSignificantByte(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantHalf(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldMostSignificantBit(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); +} + +static void FoldMostSignificantWord(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + if (carry_inst) { + carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); + } + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); +} + +// Folds multiplication operations based on the following: +// +// 1. imm_x * imm_y -> result +// 2. x * 0 -> 0 +// 3. 0 * y -> 0 +// 4. x * 1 -> x +// 5. 1 * y -> y +// +static void FoldMultiply(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +// Folds NOT operations if the contained value is an immediate. +static void FoldNOT(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + const u64 result = ~operand.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +// Folds OR operations based on the following: +// +// 1. imm_x | imm_y -> result +// 2. x | 0 -> x +// 3. 0 | y -> y +// +static void FoldOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static bool FoldShifts(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + // The 32-bit variants can contain 3 arguments, while the + // 64-bit variants only contain 2. + if (inst.NumArgs() == 3 && !carry_inst) { + inst.SetArg(2, IR::Value(false)); + } + + const auto shift_amount = inst.GetArg(1); + + if (shift_amount.IsZero()) { + if (carry_inst) { + carry_inst->ReplaceUsesWith(inst.GetArg(2)); + } + inst.ReplaceUsesWith(inst.GetArg(0)); + return false; + } + + if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { + inst.SetArg(2, IR::Value(false)); + } + + if (!inst.AreAllArgsImmediates() || carry_inst) { + return false; + } + + return true; +} + +static void FoldSignExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSignExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSub(IR::Inst& inst, bool is_32_bit) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return; + } + + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +static void FoldZeroExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldZeroExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{value}); +} + +static void ConstantPropagation(IR::Block& block) { + for (auto& inst : block) { + const auto opcode = inst.GetOpcode(); + + switch (opcode) { + case Op::LeastSignificantWord: + FoldLeastSignificantWord(inst); + break; + case Op::MostSignificantWord: + FoldMostSignificantWord(inst); + break; + case Op::LeastSignificantHalf: + FoldLeastSignificantHalf(inst); + break; + case Op::LeastSignificantByte: + FoldLeastSignificantByte(inst); + break; + case Op::MostSignificantBit: + FoldMostSignificantBit(inst); + break; + case Op::IsZero32: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); + } + break; + case Op::IsZero64: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); + } + break; + case Op::LogicalShiftLeft32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeft64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeftMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftLeftMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::LogicalShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::ArithmeticShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::ArithmeticShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::RotateRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); + } + break; + case Op::RotateRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); + } + break; + case Op::Add32: + case Op::Add64: + FoldAdd(inst, opcode == Op::Add32); + break; + case Op::Sub32: + case Op::Sub64: + FoldSub(inst, opcode == Op::Sub32); + break; + case Op::Mul32: + case Op::Mul64: + FoldMultiply(inst, opcode == Op::Mul32); + break; + case Op::SignedDiv32: + case Op::SignedDiv64: + FoldDivide(inst, opcode == Op::SignedDiv32, true); + break; + case Op::UnsignedDiv32: + case Op::UnsignedDiv64: + FoldDivide(inst, opcode == Op::UnsignedDiv32, false); + break; + case Op::And32: + case Op::And64: + FoldAND(inst, opcode == Op::And32); + break; + case Op::Eor32: + case Op::Eor64: + FoldEOR(inst, opcode == Op::Eor32); + break; + case Op::Or32: + case Op::Or64: + FoldOR(inst, opcode == Op::Or32); + break; + case Op::Not32: + case Op::Not64: + FoldNOT(inst, opcode == Op::Not32); + break; + case Op::SignExtendByteToWord: + case Op::SignExtendHalfToWord: + FoldSignExtendXToWord(inst); + break; + case Op::SignExtendByteToLong: + case Op::SignExtendHalfToLong: + case Op::SignExtendWordToLong: + FoldSignExtendXToLong(inst); + break; + case Op::ZeroExtendByteToWord: + case Op::ZeroExtendHalfToWord: + FoldZeroExtendXToWord(inst); + break; + case Op::ZeroExtendByteToLong: + case Op::ZeroExtendHalfToLong: + case Op::ZeroExtendWordToLong: + FoldZeroExtendXToLong(inst); + break; + case Op::ByteReverseWord: + case Op::ByteReverseHalf: + case Op::ByteReverseDual: + FoldByteReverse(inst, opcode); + break; + default: + break; + } + } +} + +static void DeadCodeElimination(IR::Block& block) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (auto& inst : mcl::iterator::reverse(block)) { + if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { + inst.Invalidate(); + } + } +} + +static void IdentityRemovalPass(IR::Block& block) { + boost::container::small_vector to_invalidate; + + auto iter = block.begin(); + while (iter != block.end()) { + IR::Inst& inst = *iter; + + const size_t num_args = inst.NumArgs(); + for (size_t i = 0; i < num_args; i++) { + while (true) { + IR::Value arg = inst.GetArg(i); + if (!arg.IsIdentity()) + break; + inst.SetArg(i, arg.GetInst()->GetArg(0)); + } + } + + if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { + iter = block.Instructions().erase(inst); + to_invalidate.push_back(&inst); + } else { + ++iter; + } + } + for (IR::Inst* inst : to_invalidate) { + inst->Invalidate(); + } +} + +static void NamingPass(IR::Block& block) { + u32 name = 1; + for (auto& inst : block) + inst.SetName(name++); +} + +static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + + const IR::U128 t = ir.VectorExtract(x, y, 32); + + IR::U128 result = ir.ZeroVector(); + for (size_t i = 0; i < 4; i++) { + const IR::U32 modified_element = [&] { + const IR::U32 element = ir.VectorGetElement(32, t, i); + const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); + const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); + const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); + }(); + + result = ir.VectorSetElement(32, result, i, modified_element); + } + result = ir.VectorAdd(32, result, x); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 z = (IR::U128)inst.GetArg(2); + + const IR::U128 T0 = ir.VectorExtract(y, z, 32); + + const IR::U128 lower_half = [&] { + const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); + const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); + return ir.VectorZeroUpper(tmp5); + }(); + + const IR::U64 upper_half = [&] { + const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + + // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] + const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); + const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); + + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); + return ir.VectorGetElement(64, tmp5, 0); + }(); + + const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); + + inst.ReplaceUsesWith(result); +} + +static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Eor(ir.And(ir.Eor(y, z), x), z); +} + +static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); +} + +static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 x = (IR::U128)inst.GetArg(0); + IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 w = (IR::U128)inst.GetArg(2); + const bool part1 = inst.GetArg(3).GetU1(); + + for (size_t i = 0; i < 4; i++) { + const IR::U32 low_x = ir.VectorGetElement(32, x, 0); + const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); + const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); + const IR::U32 high_x = ir.VectorGetElement(32, x, 3); + + const IR::U32 low_y = ir.VectorGetElement(32, y, 0); + const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); + const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); + const IR::U32 high_y = ir.VectorGetElement(32, y, 3); + + const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); + const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); + + const IR::U32 t = [&] { + const IR::U32 w_element = ir.VectorGetElement(32, w, i); + const IR::U32 sig = SHAhashSIGMA1(ir, low_y); + + return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); + }(); + + const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); + const IR::U32 new_low_y = ir.Add(t, high_x); + + // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] + const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); + const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); + + x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); + y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); + } + + inst.ReplaceUsesWith(part1 ? x : y); +} + +template +static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 n = (IR::U128)inst.GetArg(0); + IR::U128 m = (IR::U128)inst.GetArg(1); + + const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); + const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); + + const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { + if (polyfill == PolyfillOptions{}) { + return; + } + + IR::IREmitter ir{block}; + + for (auto& inst : block) { + ir.SetInsertionPointBefore(&inst); + + switch (inst.GetOpcode()) { + case IR::Opcode::SHA256MessageSchedule0: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule0(ir, inst); + } + break; + case IR::Opcode::SHA256MessageSchedule1: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule1(ir, inst); + } + break; + case IR::Opcode::SHA256Hash: + if (polyfill.sha256) { + PolyfillSHA256Hash(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, false>(ir, inst); + } + break; + default: + break; + } + } +} + +static void VerificationPass(const IR::Block& block) { + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) { + const IR::Type t1 = inst.GetArg(i).GetType(); + const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); + ASSERT_MSG(IR::AreTypesCompatible(t1, t2), "Block failed:\n{}", IR::DumpBlock(block)); + } + } + ankerl::unordered_dense::map actual_uses; + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) + if (IR::Value const arg = inst.GetArg(i); !arg.IsImmediate()) + actual_uses[arg.GetInst()]++; + } + for (auto const& pair : actual_uses) + ASSERT(pair.first->UseCount() == pair.second); +} + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) [[likely]] { + Optimization::A32GetSetElimination(block, {.convert_nzc_to_nz = true}); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + Optimization::IdentityRemovalPass(block); + Optimization::VerificationPass(block); +} + +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::A64CallbackConfigPass(block, conf); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) [[likely]] { + Optimization::A64GetSetElimination(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { + Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); + } + Optimization::VerificationPass(block); +} + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h new file mode 100644 index 0000000000..1963fad0a0 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -0,0 +1,34 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +namespace Dynarmic::A32 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::A64 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::IR { +class Block; +} + +namespace Dynarmic::Optimization { + +struct PolyfillOptions { + bool sha256 = false; + bool vector_multiply_widen = false; + + bool operator==(const PolyfillOptions&) const = default; +}; + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index 4d14141bbf..ad01e5718b 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -29,7 +29,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; @@ -179,13 +179,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 85d86c7966..29a8f23478 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -6,33 +6,24 @@ add_executable(dynarmic_tests fp/mantissa_util_tests.cpp fp/unpacked_tests.cpp rand_int.h + # A32 + A32/test_arm_disassembler.cpp + A32/test_arm_instructions.cpp + A32/test_coprocessor.cpp + A32/test_svc.cpp + A32/test_thumb_instructions.cpp + A32/testenv.h + decoder_tests.cpp + # A64 + A64/a64.cpp + A64/fibonacci.cpp + A64/fp_min_max.cpp + A64/misaligned_page_table.cpp + A64/test_invalidation.cpp + A64/real_world.cpp + A64/testenv.h ) - -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/test_arm_disassembler.cpp - A32/test_arm_instructions.cpp - A32/test_coprocessor.cpp - A32/test_svc.cpp - A32/test_thumb_instructions.cpp - A32/testenv.h - decoder_tests.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) - - target_sources(dynarmic_tests PRIVATE - A64/a64.cpp - A64/fibonacci.cpp - A64/fp_min_max.cpp - A64/misaligned_page_table.cpp - A64/test_invalidation.cpp - A64/real_world.cpp - A64/testenv.h - ) -endif() +target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) if (DYNARMIC_TESTS_USE_UNICORN) target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn) @@ -40,25 +31,17 @@ if (DYNARMIC_TESTS_USE_UNICORN) target_sources(dynarmic_tests PRIVATE fuzz_util.cpp fuzz_util.h + # A32 + A32/fuzz_arm.cpp + A32/fuzz_thumb.cpp + unicorn_emu/a32_unicorn.cpp + unicorn_emu/a32_unicorn.h + # A64 + A64/fuzz_with_unicorn.cpp + A64/verify_unicorn.cpp + unicorn_emu/a64_unicorn.cpp + unicorn_emu/a64_unicorn.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/fuzz_arm.cpp - A32/fuzz_thumb.cpp - unicorn_emu/a32_unicorn.cpp - unicorn_emu/a32_unicorn.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A64/fuzz_with_unicorn.cpp - A64/verify_unicorn.cpp - unicorn_emu/a64_unicorn.cpp - unicorn_emu/a64_unicorn.h - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -69,9 +52,6 @@ if ("x86_64" IN_LIST ARCHITECTURE) target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak) target_architecture_specific_sources(dynarmic_tests "x86_64" x64_cpu_info.cpp - ) - - target_architecture_specific_sources(dynarmic_tests "x86_64" native/preserve_xmm.cpp ) @@ -85,47 +65,47 @@ endif() include(CreateDirectoryGroups) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_print_info - print_info.cpp - ) +# +# dynarmic_print_info +# +add_executable(dynarmic_print_info + print_info.cpp +) +create_target_directory_groups(dynarmic_print_info) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_print_info PRIVATE . ../src) +target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_print_info) +# +# dynarmic_test_generator +# +add_executable(dynarmic_test_generator + fuzz_util.cpp + fuzz_util.h + test_generator.cpp +) - target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_print_info PRIVATE . ../src) - target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +create_target_directory_groups(dynarmic_test_generator) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_generator - fuzz_util.cpp - fuzz_util.h - test_generator.cpp - ) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_generator PRIVATE . ../src) +target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_test_generator) - - target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_generator PRIVATE . ../src) - target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() - -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_reader - test_reader.cpp - ) - - create_target_directory_groups(dynarmic_test_reader) - - target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_reader PRIVATE . ../src) - target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +# +# dynarmic_test_reader +# +add_executable(dynarmic_test_reader + test_reader.cpp +) +create_target_directory_groups(dynarmic_test_reader) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_reader PRIVATE . ../src) +target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) +# create_target_directory_groups(dynarmic_tests) target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index ef8b87bbd1..3263ca729a 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -34,7 +34,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; From 565f7f6532f8cf684e2c2d5f1af70e23f2131879 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:57:48 +0000 Subject: [PATCH 02/32] Fix license headers --- src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index 8e48fa3687..01af361b27 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index 2b50ad9ea3..c4c1c42792 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h index 1963fad0a0..88b8020031 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.h +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD From e31acb645e713f5f7517929bf4ee912a0d87e881 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 00:11:40 +0000 Subject: [PATCH 03/32] [dynarmic] use small vector experiment Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index f70329f471..df1619d552 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -12,6 +12,7 @@ #include #include +#include "boost/container/small_vector.hpp" #include "dynarmic/common/common_types.h" #include #include @@ -83,7 +84,7 @@ public: void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial - std::vector values; //24 + boost::container::small_vector values; //24 // Block state uint16_t total_uses = 0; //8 //sometimes zeroed @@ -96,7 +97,7 @@ private: uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - alignas(16) uint8_t lru_counter = 0; //1 + uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From 8b1f2a151c0d2abd926e434bf439862c90f8f010 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:07 +0000 Subject: [PATCH 04/32] [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index fa6006ed2a..014c121be5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.push_back(inst); ASSERT(size_t(total_uses) + inst->UseCount() < std::numeric_limits::max()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); + max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); } void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index df1619d552..160c2ef098 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -78,7 +78,7 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } inline size_t GetMaxBitWidth() const noexcept { - return max_bit_width; + return 1 << max_bit_width; } void AddValue(IR::Inst* inst) noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; @@ -94,10 +94,10 @@ private: uint16_t is_being_used_count = 0; //8 uint16_t current_references = 0; //8 // Value state - uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 + uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6) + uint8_t lru_counter : 2 = 0; //1 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From f1df72f196d755da394dae589745fd06f173761c Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:22 +0000 Subject: [PATCH 05/32] [dynarmic] fix tests_reader and tests_generator Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 2 +- src/dynarmic/tests/A32/testenv.h | 1 - src/dynarmic/tests/A64/testenv.h | 1 - src/dynarmic/tests/CMakeLists.txt | 28 ++++++++++++++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 6948bf39fb..dbd5926ee9 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -371,7 +371,7 @@ target_link_libraries(dynarmic ) if (BOOST_NO_HEADERS) -target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) + target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) else() target_link_libraries(dynarmic PRIVATE Boost::headers) endif() diff --git a/src/dynarmic/tests/A32/testenv.h b/src/dynarmic/tests/A32/testenv.h index a6df2017ce..72eaafce14 100644 --- a/src/dynarmic/tests/A32/testenv.h +++ b/src/dynarmic/tests/A32/testenv.h @@ -17,7 +17,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A32/a32.h" -#include "../native/testenv.h" template class A32TestEnv : public Dynarmic::A32::UserCallbacks { diff --git a/src/dynarmic/tests/A64/testenv.h b/src/dynarmic/tests/A64/testenv.h index 31e338b138..fcdadb23e6 100644 --- a/src/dynarmic/tests/A64/testenv.h +++ b/src/dynarmic/tests/A64/testenv.h @@ -12,7 +12,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A64/a64.h" -#include "../native/testenv.h" using Vector = Dynarmic::A64::Vector; diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 29a8f23478..f8f420e2d6 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -72,7 +72,12 @@ add_executable(dynarmic_print_info print_info.cpp ) create_target_directory_groups(dynarmic_print_info) -target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_print_info PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_print_info PRIVATE . ../src) target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -88,7 +93,12 @@ add_executable(dynarmic_test_generator create_target_directory_groups(dynarmic_test_generator) -target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_generator PRIVATE . ../src) target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -100,7 +110,12 @@ add_executable(dynarmic_test_reader test_reader.cpp ) create_target_directory_groups(dynarmic_test_reader) -target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_reader PRIVATE . ../src) target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -108,7 +123,12 @@ target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LIT # create_target_directory_groups(dynarmic_tests) -target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) +target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_tests PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_tests PRIVATE . ../src) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) From 779ee80a95028bbacdc8d5ba2419281ce229bd36 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 09:10:18 +0000 Subject: [PATCH 06/32] [docs] fastmem draft Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 5 +++++ src/dynarmic/docs/Fastmem.svg | 4 ++++ src/dynarmic/docs/HostToGuest.svg | 4 ++++ 3 files changed, 13 insertions(+) create mode 100644 src/dynarmic/docs/FastMemory.md create mode 100644 src/dynarmic/docs/Fastmem.svg create mode 100644 src/dynarmic/docs/HostToGuest.svg diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md new file mode 100644 index 0000000000..2a7c1a2583 --- /dev/null +++ b/src/dynarmic/docs/FastMemory.md @@ -0,0 +1,5 @@ +# Fast memory (Fastmem) + +![Host to guest translation](./HostToGuest.svg) + +![Fastmem translation](./Fastmem.svg) diff --git a/src/dynarmic/docs/Fastmem.svg b/src/dynarmic/docs/Fastmem.svg new file mode 100644 index 0000000000..a3ed0bb68b --- /dev/null +++ b/src/dynarmic/docs/Fastmem.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
SIGSEGV Trap
Fastmem
Only needs to linearly offset from fastmem arena
Less codegen (SIGSEGV traps)
Is fast only if SIGSEGV handlers are sufficiently fast
\ No newline at end of file diff --git a/src/dynarmic/docs/HostToGuest.svg b/src/dynarmic/docs/HostToGuest.svg new file mode 100644 index 0000000000..6a15a44b46 --- /dev/null +++ b/src/dynarmic/docs/HostToGuest.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
Resolver
Host to Guest translation
Looks up correct PTE
Translates each address 
Is slow
\ No newline at end of file From d68e62c9194f0ecf0ef9ce164c48d3eeb24035a8 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 10:16:22 +0000 Subject: [PATCH 07/32] [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 6 +- .../frontend/decoder/decoder_detail.h | 65 ++++++++----------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 2c2c54a1ad..3f7bf7f967 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,12 +210,8 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 438b7b691c..682b0cfbf6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,12 +269,8 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index bd76efda2a..4050033ef5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -70,11 +70,9 @@ struct detail { return std::make_tuple(mask, expect); } - /** - * Generates the masks and shifts for each argument. - * A '-' in a bitstring indicates that we don't care about that value. - * An argument is specified by a continuous string of the same character. - */ + /// @brief Generates the masks and shifts for each argument. + /// A '-' in a bitstring indicates that we don't care about that value. + /// An argument is specified by a continuous string of the same character. template static consteval auto GetArgInfo(std::array bitstring) { std::array masks = {}; @@ -98,7 +96,6 @@ struct detail { if constexpr (N > 0) { const size_t bit_position = opcode_bitsize - i - 1; - if (arg_index >= N) throw std::out_of_range("Unexpected field"); @@ -109,20 +106,16 @@ struct detail { } } } - #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) // Avoids a MSVC ICE, and avoids Android NDK issue. ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); #endif - return std::make_tuple(masks, shifts); } - /** - * This struct's Make member function generates a lambda which decodes an instruction based on - * the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a - * template argument. - */ + /// @brief This struct's Make member function generates a lambda which decodes an instruction + /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is + /// provided as a template argument. template struct VisitorCaller; @@ -130,36 +123,36 @@ struct detail { # pragma warning(push) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) #endif - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...), + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...), const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...) const, + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...) const, const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; @@ -167,22 +160,16 @@ struct detail { # pragma warning(pop) #endif - /** - * Creates a matcher that can match and parse instructions based on bitstring. - * See also: GetMaskAndExpect and GetArgInfo for format of bitstring. - */ - template - static auto GetMatcher(FnT fn, const char* const name) { - constexpr size_t args_count = mcl::parameter_count_v; - + /// @brief Creates a matcher that can match and parse instructions based on bitstring. + /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. + template + static constexpr auto GetMatcher(F fn, const char* const name) { + constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); - - using Iota = std::make_index_sequence; - - const auto proxy_fn = VisitorCaller::Make(Iota(), fn, arg_masks, arg_shifts); + const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); return MatcherT(name, mask, expect, proxy_fn); } }; From 889f7b750fc83129893bbf06d1b078422b025d7e Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 4 Sep 2025 07:24:49 +0000 Subject: [PATCH 08/32] [dynarmic] reduce matcher table noise and cache misses Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/a32_types.h | 21 +++++---------- .../src/dynarmic/frontend/A32/decoder/asimd.h | 27 +++++++++---------- .../src/dynarmic/frontend/A32/decoder/vfp.h | 12 +++------ .../src/dynarmic/frontend/A64/decoder/a64.h | 23 +++++++--------- .../frontend/decoder/decoder_detail.h | 6 ++--- .../src/dynarmic/frontend/decoder/matcher.h | 11 ++------ 6 files changed, 38 insertions(+), 62 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index fdadef8257..2a0cc25751 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -9,12 +9,9 @@ #pragma once #include -#include - #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/ir/cond.h" @@ -89,23 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) { inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); - return static_cast(reg); + return size_t(reg); } inline size_t RegNumber(ExtReg reg) { if (IsSingleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::S0); + return size_t(reg) - size_t(ExtReg::S0); + } else if (IsDoubleExtReg(reg)) { + return size_t(reg) - size_t(ExtReg::D0); } - - if (IsDoubleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::D0); - } - - if (IsQuadExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::Q0); - } - - ASSERT_MSG(false, "Invalid extended register"); + ASSERT(IsQuadExtReg(reg)); + return size_t(reg) - size_t(ExtReg::Q0); } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f2e206695b..f63816d2b1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher; template std::vector> GetASIMDDecodeTable() { - std::vector> table = { - -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> table = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" #undef INST - }; - // Exceptions to the rule of thumb. const std::set comes_first{ "VBIC, VMOV, VMVN, VORR (immediate)", @@ -53,19 +50,21 @@ std::vector> GetASIMDDecodeTable() { "VQDMULH (scalar)", "VQRDMULH (scalar)", }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_first.count(matcher.GetName()) > 0; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_last.count(matcher.GetName()) == 0; + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - - return table; + std::vector> final_table; + std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + return e.second; + }); + return final_table; } template diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index 5fcacd2bda..a969570547 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -30,22 +30,18 @@ std::optional>> DecodeVFP(u32 instruc static const struct Tables { Table unconditional; Table conditional; - } tables = [] { + } tables = []() { Table list = { - #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./vfp.inc" #undef INST - }; - - const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return (matcher.GetMask() & 0xF0000000) == 0xF0000000; }); - return Tables{ - Table{list.begin(), division}, - Table{division, list.end()}, + Table{list.begin(), it}, + Table{it, list.end()}, }; }(); diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c651dd7cde..c7bc981ac6 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) { template constexpr DecodeTable GetDecodeTable() { - std::vector> list = { -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" #undef INST }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { return std::set{ "MOVI, MVNI, ORR, BIC (vector, immediate)", "FMOV (vector, immediate)", "Unallocated SIMD modified immediate", - }.count(matcher.GetName()) > 0; + }.count(e.first) > 0; }); - DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); + for (auto const& e : list) { + const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e.second); } } } diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 4050033ef5..0d9da6169c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -163,18 +163,18 @@ struct detail { /// @brief Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template - static constexpr auto GetMatcher(F fn, const char* const name) { + static constexpr auto GetMatcher(F fn) { constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); - return MatcherT(name, mask, expect, proxy_fn); + return MatcherT(mask, expect, proxy_fn); } }; -#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn, name) +#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn) } // namespace detail } // namespace Dynarmic::Decoder diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 7e5c9c5a8f..5a2afe57c5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -31,14 +31,8 @@ public: using visitor_type = Visitor; using handler_return_type = typename Visitor::instruction_return_type; using handler_function = std::function; - - Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) - : name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {} - - /// Gets the name of this type of instruction. - const char* GetName() const { - return name; - } + Matcher(opcode_type mask, opcode_type expected, handler_function func) + : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. opcode_type GetMask() const { @@ -70,7 +64,6 @@ public: } private: - const char* name; opcode_type mask; opcode_type expected; handler_function fn; From c9b73a34bd9ab575372feab9341947c2404bb3d2 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 20:59:23 +0000 Subject: [PATCH 09/32] [dynarmic] fix tests Signed-off-by: lizzie --- .../src/dynarmic/frontend/decoder/matcher.h | 40 +++++++----------- src/dynarmic/tests/A32/fuzz_arm.cpp | 1 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 1 + .../tests/A32/test_arm_instructions.cpp | 1 + src/dynarmic/tests/A32/test_coprocessor.cpp | 1 + src/dynarmic/tests/A32/test_svc.cpp | 1 + .../tests/A32/test_thumb_instructions.cpp | 1 + src/dynarmic/tests/A64/a64.cpp | 1 + src/dynarmic/tests/A64/fp_min_max.cpp | 1 + src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 1 + .../tests/A64/misaligned_page_table.cpp | 1 + src/dynarmic/tests/A64/real_world.cpp | 1 + src/dynarmic/tests/A64/test_invalidation.cpp | 1 + src/dynarmic/tests/decoder_tests.cpp | 2 + src/dynarmic/tests/native/preserve_xmm.cpp | 1 + src/dynarmic/tests/print_info.cpp | 42 ++++--------------- 16 files changed, 39 insertions(+), 58 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 5a2afe57c5..f7e2884e0c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -14,16 +14,12 @@ namespace Dynarmic::Decoder { -/** - * Generic instruction handling construct. - * - * @tparam Visitor An arbitrary visitor type that will be passed through - * to the function being handled. This type must be the - * type of the first parameter in a handler function. - * - * @tparam OpcodeType Type representing an opcode. This must be the - * type of the second parameter in a handler function. - */ +/// Generic instruction handling construct. +/// @tparam Visitor An arbitrary visitor type that will be passed through +/// to the function being handled. This type must be the +/// type of the first parameter in a handler function. +/// @tparam OpcodeType Type representing an opcode. This must be the +/// type of the second parameter in a handler function. template class Matcher { public: @@ -35,30 +31,26 @@ public: : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. - opcode_type GetMask() const { + inline opcode_type GetMask() const noexcept { return mask; } /// Gets the expected value after masking for this instruction. - opcode_type GetExpected() const { + inline opcode_type GetExpected() const noexcept { return expected; } - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - bool Matches(opcode_type instruction) const { + /// Tests to see if the given instruction is the instruction this matcher represents. + /// @param instruction The instruction to test + /// @returns true if the given instruction matches. + inline bool Matches(opcode_type instruction) const noexcept { return (instruction & mask) == expected; } - /** - * Calls the corresponding instruction handler on visitor for this type of instruction. - * @param v The visitor to use - * @param instruction The instruction to decode. - */ - handler_return_type call(Visitor& v, opcode_type instruction) const { + /// Calls the corresponding instruction handler on visitor for this type of instruction. + /// @param v The visitor to use + /// @param instruction The instruction to decode. + inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept { ASSERT(Matches(instruction)); return fn(v, instruction); } diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index 087ce54813..5ee6d2bf02 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -24,6 +24,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index ad01e5718b..7f64cb0ccb 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -22,6 +22,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/FPSCR.h" #include "dynarmic/frontend/A32/PSR.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" diff --git a/src/dynarmic/tests/A32/test_arm_instructions.cpp b/src/dynarmic/tests/A32/test_arm_instructions.cpp index 0411877823..c007a18299 100644 --- a/src/dynarmic/tests/A32/test_arm_instructions.cpp +++ b/src/dynarmic/tests/A32/test_arm_instructions.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" diff --git a/src/dynarmic/tests/A32/test_coprocessor.cpp b/src/dynarmic/tests/A32/test_coprocessor.cpp index 98da0e5d34..49cb42bdf3 100644 --- a/src/dynarmic/tests/A32/test_coprocessor.cpp +++ b/src/dynarmic/tests/A32/test_coprocessor.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/tests/A32/test_svc.cpp b/src/dynarmic/tests/A32/test_svc.cpp index 8b55d6537c..998566130e 100644 --- a/src/dynarmic/tests/A32/test_svc.cpp +++ b/src/dynarmic/tests/A32/test_svc.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A32/test_thumb_instructions.cpp b/src/dynarmic/tests/A32/test_thumb_instructions.cpp index 3501d5419f..d509acdd8d 100644 --- a/src/dynarmic/tests/A32/test_thumb_instructions.cpp +++ b/src/dynarmic/tests/A32/test_thumb_instructions.cpp @@ -10,6 +10,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A32/a32.h" static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) { diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index 40eff1f071..24e92d1210 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -7,6 +7,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/A64/fp_min_max.cpp b/src/dynarmic/tests/A64/fp_min_max.cpp index d8b45db807..1669b63071 100644 --- a/src/dynarmic/tests/A64/fp_min_max.cpp +++ b/src/dynarmic/tests/A64/fp_min_max.cpp @@ -12,6 +12,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp index 3322af06e7..45ba728917 100644 --- a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp +++ b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp @@ -19,6 +19,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a64_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A64/misaligned_page_table.cpp b/src/dynarmic/tests/A64/misaligned_page_table.cpp index 8235e14a67..ecba5a3efa 100644 --- a/src/dynarmic/tests/A64/misaligned_page_table.cpp +++ b/src/dynarmic/tests/A64/misaligned_page_table.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_access_via_page_table is set", "[a64]") { diff --git a/src/dynarmic/tests/A64/real_world.cpp b/src/dynarmic/tests/A64/real_world.cpp index 07532d95af..a083f16d61 100644 --- a/src/dynarmic/tests/A64/real_world.cpp +++ b/src/dynarmic/tests/A64/real_world.cpp @@ -5,6 +5,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/test_invalidation.cpp b/src/dynarmic/tests/A64/test_invalidation.cpp index 168043c1cb..8a63776fa4 100644 --- a/src/dynarmic/tests/A64/test_invalidation.cpp +++ b/src/dynarmic/tests/A64/test_invalidation.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index e545309960..777ea8b510 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,6 +20,7 @@ using namespace Dynarmic; +/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -76,3 +77,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { } while (x != 0); } } +*/ diff --git a/src/dynarmic/tests/native/preserve_xmm.cpp b/src/dynarmic/tests/native/preserve_xmm.cpp index 0f69697b7a..7421252063 100644 --- a/src/dynarmic/tests/native/preserve_xmm.cpp +++ b/src/dynarmic/tests/native/preserve_xmm.cpp @@ -6,6 +6,7 @@ #include #include "../A64/testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 3263ca729a..0c89e780ad 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -32,6 +32,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/frontend/A64/translate/impl/impl.h" #include "dynarmic/interface/A32/a32.h" +#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/opt_passes.h" @@ -39,20 +40,20 @@ using namespace Dynarmic; const char* GetNameOfA32Instruction(u32 instruction) { - if (auto vfp_decoder = A32::DecodeVFP(instruction)) { + /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { return vfp_decoder->get().GetName(); } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { return asimd_decoder->get().GetName(); } else if (auto decoder = A32::DecodeArm(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } const char* GetNameOfA64Instruction(u32 instruction) { - if (auto decoder = A64::Decode(instruction)) { + /*if (auto decoder = A64::Decode(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } @@ -64,18 +65,9 @@ void PrintA32Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -88,18 +80,9 @@ void PrintA64Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A64::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A64::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -115,18 +98,9 @@ void PrintThumbInstruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } From dba64c05c96afeb82ab01b73962140bec00ddbb9 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:07:18 +0000 Subject: [PATCH 10/32] [dynarmic] fix ASIMD execution Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f63816d2b1..2cea7a14c1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -61,7 +61,7 @@ std::vector> GetASIMDDecodeTable() { return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); std::vector> final_table; - std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) { return e.second; }); return final_table; From f4d16ffc58c1886406e60508bc3a5fef537932d2 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:40:13 +0000 Subject: [PATCH 11/32] [dynarmic] add back encoding names (for print_info) Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/decoder/arm.h | 20 ++++++++++--- .../src/dynarmic/frontend/A32/decoder/asimd.h | 26 ++++++++++++----- .../dynarmic/frontend/A32/decoder/thumb16.h | 24 +++++++++++----- .../dynarmic/frontend/A32/decoder/thumb32.h | 24 +++++++++++----- .../src/dynarmic/frontend/A32/decoder/vfp.h | 24 +++++++++++----- .../A32/translate/translate_thumb.cpp | 2 +- .../src/dynarmic/frontend/A64/decoder/a64.h | 20 ++++++++++--- src/dynarmic/tests/decoder_tests.cpp | 28 ++++++------------- src/dynarmic/tests/print_info.cpp | 22 +++++++-------- 9 files changed, 122 insertions(+), 68 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index 0257c28ddb..c6f034ae21 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -30,13 +30,13 @@ template using ArmDecodeTable = std::array>, 0x1000>; namespace detail { -inline size_t ToFastLookupIndexArm(u32 instruction) { +inline size_t ToFastLookupIndexArm(u32 instruction) noexcept { return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0); } } // namespace detail template -constexpr ArmDecodeTable GetArmDecodeTable() { +constexpr ArmDecodeTable GetArmDecodeTable() noexcept { std::vector> list = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" @@ -62,15 +62,27 @@ constexpr ArmDecodeTable GetArmDecodeTable() { } template -std::optional>> DecodeArm(u32 instruction) { +std::optional>> DecodeArm(u32 instruction) noexcept { alignas(64) static const auto table = GetArmDecodeTable(); const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameARM(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./arm.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index 2cea7a14c1..57e1392871 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -26,7 +26,7 @@ template using ASIMDMatcher = Decoder::Matcher; template -std::vector> GetASIMDDecodeTable() { +std::vector> GetASIMDDecodeTable() noexcept { std::vector>> table = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" @@ -68,13 +68,25 @@ std::vector> GetASIMDDecodeTable() { } template -std::optional>> DecodeASIMD(u32 instruction) { - static const auto table = GetASIMDDecodeTable(); - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); +std::optional>> DecodeASIMD(u32 instruction) noexcept { + alignas(64) static const auto table = GetASIMDDecodeTable(); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameASIMD(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./asimd.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h index 8073ee5d47..16b99ba5aa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h @@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher; template std::optional>> DecodeThumb16(u16 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #include "./thumb16.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb16(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) }, +#include "./thumb16.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h index 86a4d767a7..19418de67c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h @@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher; template std::optional>> DecodeThumb32(u32 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./thumb32.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb32(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./thumb32.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index a969570547..a346304a9a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -26,8 +26,7 @@ using VFPMatcher = Decoder::Matcher; template std::optional>> DecodeVFP(u32 instruction) { using Table = std::vector>; - - static const struct Tables { + alignas(64) static const struct Tables { Table unconditional; Table conditional; } tables = []() { @@ -44,14 +43,25 @@ std::optional>> DecodeVFP(u32 instruc Table{it, list.end()}, }; }(); - const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000; const Table& table = is_unconditional ? tables.unconditional : tables.conditional; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameVFP(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./vfp.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp index 5bb516ccfd..0381c984cc 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp @@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) { return 0xF7F0A000; // UDF } -bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) { +inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept { return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c7bc981ac6..7e6cdc3935 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -71,12 +71,24 @@ constexpr DecodeTable GetDecodeTable() { template std::optional>> Decode(u32 instruction) { alignas(64) static const auto table = GetDecodeTable(); - const auto matches_instruction = [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }; const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; - auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); + auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetName(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./a64.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A64 diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index 777ea8b510..4ad9d90833 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,7 +20,6 @@ using namespace Dynarmic; -/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -37,22 +36,12 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto is_decode_error = [&get_ir](const A32::ASIMDMatcher& matcher, u32 instruction) { const auto block = get_ir(matcher, instruction); - - for (const auto& ir_inst : block) { - if (ir_inst.GetOpcode() == IR::Opcode::A32ExceptionRaised) { - if (static_cast(ir_inst.GetArg(1).GetU64()) == A32::Exception::DecodeError) { - return true; - } - } - } - return false; + return std::find_if(block.cbegin(), block.cend(), [](auto const& e) { + return e.GetOpcode() == IR::Opcode::A32ExceptionRaised && A32::Exception(e.GetArg(1).GetU64()) == A32::Exception::DecodeError; + }) != block.cend(); }; for (auto iter = table.cbegin(); iter != table.cend(); ++iter) { - if (std::strncmp(iter->GetName(), "UNALLOCATED", 11) == 0) { - continue; - } - const u32 expect = iter->GetExpected(); const u32 mask = iter->GetMask(); u32 x = 0; @@ -60,15 +49,17 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const u32 instruction = expect | x; const bool iserr = is_decode_error(*iter, instruction); - const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { return m.Matches(instruction); }); + const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { + return m.Matches(instruction); + }); const bool altiserr = is_decode_error(*alternative, instruction); INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction); INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect); INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x); - INFO("Name: " << iter->GetName()); + INFO("Name: " << *A32::GetNameASIMD(instruction)); INFO("iserr: " << iserr); - INFO("alternative: " << alternative->GetName()); + //INFO("alternative: " << alternative->GetName()); INFO("altiserr: " << altiserr); REQUIRE(((!iserr && alternative == iter) || (iserr && alternative != iter && !altiserr))); @@ -76,5 +67,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { x = ((x | mask) + 1) & ~mask; } while (x != 0); } -} -*/ +} \ No newline at end of file diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 0c89e780ad..8936f32bd3 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -39,21 +39,19 @@ using namespace Dynarmic; -const char* GetNameOfA32Instruction(u32 instruction) { - /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { - return vfp_decoder->get().GetName(); - } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { - return asimd_decoder->get().GetName(); - } else if (auto decoder = A32::DecodeArm(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA32Instruction(u32 instruction) { + if (auto const vfp_decoder = A32::DecodeVFP(instruction)) + return *A32::GetNameVFP(instruction); + else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) + return *A32::GetNameASIMD(instruction); + else if (auto const decoder = A32::DecodeArm(instruction)) + return *A32::GetNameARM(instruction); return ""; } -const char* GetNameOfA64Instruction(u32 instruction) { - /*if (auto decoder = A64::Decode(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA64Instruction(u32 instruction) { + if (auto const decoder = A64::Decode(instruction)) + return *A64::GetName(instruction); return ""; } From 14efe861823f2849647d3cc40404e5ad66488226 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 14:08:17 +0000 Subject: [PATCH 12/32] [dynarmic] use ARCHITECTURE_ macros instead of MCL ones Signed-off-by: lizzie --- .../src/dynarmic/backend/exception_handler.h | 18 +-- .../backend/exception_handler_generic.cpp | 6 +- .../backend/exception_handler_macos.cpp | 12 +- .../backend/exception_handler_macos_mig.c | 4 +- .../backend/exception_handler_posix.cpp | 130 ++---------------- .../backend/exception_handler_windows.cpp | 4 +- src/dynarmic/src/dynarmic/common/context.h | 120 ++++++++++++++++ 7 files changed, 157 insertions(+), 137 deletions(-) create mode 100644 src/dynarmic/src/dynarmic/common/context.h diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler.h b/src/dynarmic/src/dynarmic/backend/exception_handler.h index 173949628c..cd274b111f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler.h +++ b/src/dynarmic/src/dynarmic/backend/exception_handler.h @@ -15,15 +15,15 @@ #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) namespace Dynarmic::Backend::X64 { class BlockOfCode; } // namespace Dynarmic::Backend::X64 -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) namespace oaknut { class CodeBlock; } // namespace oaknut -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) namespace Dynarmic::Backend::RV64 { class CodeBlock; } // namespace Dynarmic::Backend::RV64 @@ -33,16 +33,16 @@ class CodeBlock; namespace Dynarmic::Backend { -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) struct FakeCall { u64 call_rip; u64 ret_rip; }; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) struct FakeCall { u64 call_pc; }; -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) struct FakeCall { }; #else @@ -54,11 +54,11 @@ public: ExceptionHandler(); ~ExceptionHandler(); -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void Register(X64::BlockOfCode& code); -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void Register(oaknut::CodeBlock& mem, std::size_t mem_size); -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void Register(RV64::CodeBlock& mem, std::size_t mem_size); #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp index ad7df25ca6..985536d9f0 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp @@ -13,15 +13,15 @@ struct ExceptionHandler::Impl final { ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode&) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock&, std::size_t) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock&, std::size_t) { // Do nothing } diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp index 52bcf5972f..76e517f05b 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp @@ -25,7 +25,7 @@ #include "dynarmic/backend/exception_handler.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" # define mig_external extern "C" @@ -36,7 +36,7 @@ using dynarmic_thread_state_t = x86_thread_state64_t; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include # define mig_external extern "C" @@ -133,7 +133,7 @@ void MachHandler::MessagePump() { } } -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -151,7 +151,7 @@ kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { return KERN_SUCCESS; } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) kern_return_t MachHandler::HandleRequest(arm_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -269,13 +269,13 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { const u64 code_begin = mcl::bit_cast(code.getCode()); const u64 code_end = code_begin + code.GetTotalCodeSize(); impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c index 762a80ca42..25678ab115 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/mig/mach_exc_server.c" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/arm64/mig/mach_exc_server.c" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index 7695df57d2..ef28d99279 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -8,19 +8,6 @@ #include "dynarmic/backend/exception_handler.h" -#ifdef __APPLE__ -# include -# include -#else -# include -# ifndef __OpenBSD__ -# include -# endif -# ifdef __sun__ -# include -# endif -#endif - #include #include #include @@ -29,16 +16,17 @@ #include #include "dynarmic/common/assert.h" +#include "dynarmic/common/context.h" #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include # include "dynarmic/backend/arm64/abi.h" -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) # include "dynarmic/backend/riscv64/code_block.h" #else # error "Invalid architecture" @@ -131,139 +119,51 @@ SigHandler::~SigHandler() { void SigHandler::AddCodeBlock(CodeBlockInfo cbi) { std::lock_guard guard(code_block_infos_mutex); - if (auto iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) { + if (auto const iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) code_block_infos.erase(iter); - } code_block_infos.push_back(cbi); } void SigHandler::RemoveCodeBlock(u64 host_pc) { std::lock_guard guard(code_block_infos_mutex); const auto iter = FindCodeBlockInfo(host_pc); - if (iter == code_block_infos.end()) { - return; - } - code_block_infos.erase(iter); + if (iter != code_block_infos.end()) + code_block_infos.erase(iter); } void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { ASSERT(sig == SIGSEGV || sig == SIGBUS); - -#ifndef MCL_ARCHITECTURE_RISCV - ucontext_t* ucontext = reinterpret_cast(raw_context); -#ifndef __OpenBSD__ - auto& mctx = ucontext->uc_mcontext; -#endif -#endif - -#if defined(MCL_ARCHITECTURE_X86_64) - -# if defined(__APPLE__) -# define CTX_RIP (mctx->__ss.__rip) -# define CTX_RSP (mctx->__ss.__rsp) -# elif defined(__linux__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# elif defined(__FreeBSD__) -# define CTX_RIP (mctx.mc_rip) -# define CTX_RSP (mctx.mc_rsp) -# elif defined(__NetBSD__) -# define CTX_RIP (mctx.__gregs[_REG_RIP]) -# define CTX_RSP (mctx.__gregs[_REG_RSP]) -# elif defined(__OpenBSD__) -# define CTX_RIP (ucontext->sc_rip) -# define CTX_RSP (ucontext->sc_rsp) -# elif defined(__sun__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# else -# error "Unknown platform" -# endif - + CTX_DECLARE(raw_context); +#if defined(ARCHITECTURE_x86_64) { std::lock_guard guard(sig_handler->code_block_infos_mutex); const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); if (iter != sig_handler->code_block_infos.end()) { FakeCall fc = iter->cb(CTX_RIP); - CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; - return; } } - fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); - -#elif defined(MCL_ARCHITECTURE_ARM64) - -# if defined(__APPLE__) -# define CTX_PC (mctx->__ss.__pc) -# define CTX_SP (mctx->__ss.__sp) -# define CTX_LR (mctx->__ss.__lr) -# define CTX_X(i) (mctx->__ss.__x[i]) -# define CTX_Q(i) (mctx->__ns.__v[i]) -# elif defined(__linux__) -# define CTX_PC (mctx.pc) -# define CTX_SP (mctx.sp) -# define CTX_LR (mctx.regs[30]) -# define CTX_X(i) (mctx.regs[i]) -# define CTX_Q(i) (fpctx->vregs[i]) - [[maybe_unused]] const auto fpctx = [&mctx] { - _aarch64_ctx* header = (_aarch64_ctx*)&mctx.__reserved; - while (header->magic != FPSIMD_MAGIC) { - ASSERT(header->magic && header->size); - header = (_aarch64_ctx*)((char*)header + header->size); - } - return (fpsimd_context*)header; - }(); -# elif defined(__FreeBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__NetBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__OpenBSD__) -# define CTX_PC (ucontext->sc_elr) -# define CTX_SP (ucontext->sc_sp) -# define CTX_LR (ucontext->sc_lr) -# define CTX_X(i) (ucontext->sc_x[i]) -# define CTX_Q(i) (ucontext->sc_q[i]) -# else -# error "Unknown platform" -# endif - +#elif defined(ARCHITECTURE_arm64) + CTX_DECLARE(raw_context); { std::lock_guard guard(sig_handler->code_block_infos_mutex); - const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); if (iter != sig_handler->code_block_infos.end()) { FakeCall fc = iter->cb(CTX_PC); - CTX_PC = fc.call_pc; - return; } } - fmt::print(stderr, "Unhandled {} at pc {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_PC); - -#elif defined(MCL_ARCHITECTURE_RISCV) - +#elif defined(ARCHITECTURE_riscv64) ASSERT_FALSE("Unimplemented"); - #else - # error "Invalid architecture" - #endif struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler->old_sa_segv : &sig_handler->old_sa_bus; @@ -309,19 +209,19 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { const u64 code_begin = mcl::bit_cast(code.getCode()); const u64 code_end = code_begin + code.GetTotalCodeSize(); impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp index 719c007594..9e76cae912 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/exception_handler_windows.cpp" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/exception_handler_generic.cpp" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/common/context.h b/src/dynarmic/src/dynarmic/common/context.h new file mode 100644 index 0000000000..0eb128449c --- /dev/null +++ b/src/dynarmic/src/dynarmic/common/context.h @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#ifdef __APPLE__ +# include +# include +#else +# include +# ifndef __OpenBSD__ +# include +# endif +# ifdef __sun__ +# include +# endif +# ifdef __linux__ +# include +# endif +#endif + +#ifdef ARCHITECTURE_x86_64 +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; +# endif +#elif defined(ARCHITECTURE_arm64) +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; \ + [[maybe_unused]] const auto fpctx = GetFloatingPointState(mctx); +# endif +#endif + +#if defined(ARCHITECTURE_x86_64) +# if defined(__APPLE__) +# define CTX_RIP (mctx->__ss.__rip) +# define CTX_RSP (mctx->__ss.__rsp) +# elif defined(__linux__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# elif defined(__FreeBSD__) +# define CTX_RIP (mctx.mc_rip) +# define CTX_RSP (mctx.mc_rsp) +# elif defined(__NetBSD__) +# define CTX_RIP (mctx.__gregs[_REG_RIP]) +# define CTX_RSP (mctx.__gregs[_REG_RSP]) +# elif defined(__OpenBSD__) +# define CTX_RIP (ucontext->sc_rip) +# define CTX_RSP (ucontext->sc_rsp) +# elif defined(__sun__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# else +# error "Unknown platform" +# endif +#elif defined(ARCHITECTURE_arm64) +# if defined(__APPLE__) +# define CTX_PC (mctx->__ss.__pc) +# define CTX_SP (mctx->__ss.__sp) +# define CTX_LR (mctx->__ss.__lr) +# define CTX_PSTATE (mctx->__ss.__cpsr) +# define CTX_X(i) (mctx->__ss.__x[i]) +# define CTX_Q(i) (mctx->__ns.__v[i]) +# define CTX_FPSR (mctx->__ns.__fpsr) +# define CTX_FPCR (mctx->__ns.__fpcr) +# elif defined(__linux__) +# define CTX_PC (mctx.pc) +# define CTX_SP (mctx.sp) +# define CTX_LR (mctx.regs[30]) +# define CTX_PSTATE (mctx.pstate) +# define CTX_X(i) (mctx.regs[i]) +# define CTX_Q(i) (fpctx->vregs[i]) +# define CTX_FPSR (fpctx->fpsr) +# define CTX_FPCR (fpctx->fpcr) +# elif defined(__FreeBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__NetBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__OpenBSD__) +# define CTX_PC (ucontext->sc_elr) +# define CTX_SP (ucontext->sc_sp) +# define CTX_LR (ucontext->sc_lr) +# define CTX_X(i) (ucontext->sc_x[i]) +# define CTX_Q(i) (ucontext->sc_q[i]) +# else +# error "Unknown platform" +# endif +#else +# error "unimplemented" +#endif + +#ifdef ARCHITECTURE_arm64 +#ifdef __APPLE__ +inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) { + return &(host_ctx->__ns); +} +#elif defined(__linux__) +inline fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { + _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved); + while (header->magic != FPSIMD_MAGIC) + header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast(header) + header->size); + return reinterpret_cast(header); +} +#endif +#endif From 6362cab83e21b27a9fd9ef07352af947da72103b Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 17:03:36 +0000 Subject: [PATCH 13/32] [dynarmic] fix android Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index ef28d99279..b22befaff9 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -149,7 +149,6 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { } fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) - CTX_DECLARE(raw_context); { std::lock_guard guard(sig_handler->code_block_infos_mutex); const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); From 37e0b80766f03ae3eb052f2eaa95506b34419bbe Mon Sep 17 00:00:00 2001 From: Gamer64 Date: Sun, 7 Sep 2025 19:20:45 +0200 Subject: [PATCH 14/32] [hle] Added missing error codes and increased audio renderer revision (#390) Co-authored-by: Jarrod Norwell Fixes Animal Well Co-authored-by: Gamer64 <76565986+Gamer64ytb@users.noreply.github.com> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/390 Reviewed-by: MaranBr Reviewed-by: crueter Co-authored-by: Gamer64 Co-committed-by: Gamer64 --- src/audio_core/common/feature_support.h | 6 +++++- src/core/hle/result.h | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/audio_core/common/feature_support.h b/src/audio_core/common/feature_support.h index cd83df3832..39d50746b8 100644 --- a/src/audio_core/common/feature_support.h +++ b/src/audio_core/common/feature_support.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -13,7 +16,7 @@ #include "common/polyfill_ranges.h" namespace AudioCore { -constexpr u32 CurrentRevision = 15; +constexpr u32 CurrentRevision = 16; enum class SupportTags { CommandProcessingTimeEstimatorVersion4, @@ -54,6 +57,7 @@ constexpr u32 GetRevisionNum(u32 user_revision) { user_revision -= Common::MakeMagic('R', 'E', 'V', '0'); user_revision >>= 24; } + return user_revision; }; diff --git a/src/core/hle/result.h b/src/core/hle/result.h index 316370266d..495e6e32c2 100644 --- a/src/core/hle/result.h +++ b/src/core/hle/result.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -24,6 +27,7 @@ enum class ErrorModule : u32 { HTCS = 4, NCM = 5, DD = 6, + OSDBG = 7, LR = 8, Loader = 9, CMIF = 10, @@ -51,6 +55,7 @@ enum class ErrorModule : u32 { Util = 33, TIPC = 35, ANIF = 37, + CRT = 39, ETHC = 100, I2C = 101, GPIO = 102, @@ -106,6 +111,7 @@ enum class ErrorModule : u32 { Audio = 153, NPNS = 154, NPNSHTTPSTREAM = 155, + IDLE = 156, ARP = 157, SWKBD = 158, BOOT = 159, @@ -115,6 +121,7 @@ enum class ErrorModule : u32 { Fatal = 163, NIMShop = 164, SPSM = 165, + AOC = 166, BGTC = 167, UserlandCrash = 168, SASBUS = 169, @@ -176,13 +183,22 @@ enum class ErrorModule : u32 { DP2HDMI = 244, Cradle = 245, SProfile = 246, + Icm42607p = 248, NDRM = 250, + Fst2 = 251, + Nex = 306, + NPLN = 321, TSPM = 499, DevMenu = 500, + Nverpt = 520, + Am_StuckMonitor = 521, + Pia = 618, + Eagle = 623, GeneralWebApplet = 800, WifiWebAuthApplet = 809, WhitelistedApplet = 810, ShopN = 811, + Coral = 815 }; /// Encapsulates a Horizon OS error code, allowing it to be separated into its constituent fields. From 10dd003d0fe69d8ce015113a30327d96f164b9fa Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 00:49:46 +0200 Subject: [PATCH 15/32] [dynarmic, cmake] allow LTO build for dynarmic (#252) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/252 Reviewed-by: crueter Co-authored-by: lizzie Co-committed-by: lizzie --- .ci/linux/build.sh | 1 + .ci/windows/build.sh | 1 + src/android/app/build.gradle.kts | 3 ++- src/dynarmic/CMakeLists.txt | 1 + src/dynarmic/src/dynarmic/CMakeLists.txt | 4 ++++ 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.ci/linux/build.sh b/.ci/linux/build.sh index 7c8bed1279..8e3a452809 100755 --- a/.ci/linux/build.sh +++ b/.ci/linux/build.sh @@ -104,6 +104,7 @@ cmake .. -G Ninja \ -DYUZU_USE_QT_WEB_ENGINE=$WEBENGINE \ -DYUZU_USE_FASTER_LD=ON \ -DYUZU_ENABLE_LTO=ON \ + -DDYNARMIC_ENABLE_LTO=ON \ "${EXTRA_CMAKE_FLAGS[@]}" ninja -j${NPROC} diff --git a/.ci/windows/build.sh b/.ci/windows/build.sh index 7504630a57..681f327793 100644 --- a/.ci/windows/build.sh +++ b/.ci/windows/build.sh @@ -52,6 +52,7 @@ cmake .. -G Ninja \ -DYUZU_USE_QT_MULTIMEDIA=$MULTIMEDIA \ -DYUZU_USE_QT_WEB_ENGINE=$WEBENGINE \ -DYUZU_ENABLE_LTO=ON \ + -DDYNARMIC_ENABLE_LTO=ON \ "${EXTRA_CMAKE_FLAGS[@]}" ninja diff --git a/src/android/app/build.gradle.kts b/src/android/app/build.gradle.kts index 3f1a7c102b..c76b5e7162 100644 --- a/src/android/app/build.gradle.kts +++ b/src/android/app/build.gradle.kts @@ -179,7 +179,8 @@ android { "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", "-DBUILD_TESTING=OFF", "-DYUZU_TESTS=OFF", - "-DDYNARMIC_TESTS=OFF" + "-DDYNARMIC_TESTS=OFF", + "-DDYNARMIC_ENABLE_LTO=ON" ) abiFilters("arm64-v8a") diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 5c28435f72..d505d16553 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -23,6 +23,7 @@ option(DYNARMIC_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON) option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF) option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF) option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT}) +option(DYNARMIC_ENABLE_LTO "Enable LTO" OFF) if (NOT DEFINED DYNARMIC_FRONTENDS) set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable") endif() diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index e060989f82..7ec92206f9 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -440,6 +440,10 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Windows") endif() target_compile_definitions(dynarmic PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) +if (DYNARMIC_ENABLE_LTO) + set_property(TARGET dynarmic PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) +endif() + if (DYNARMIC_USE_PRECOMPILED_HEADERS) set(PRECOMPILED_HEADERS "$<$:${CMAKE_CURRENT_SOURCE_DIR}/ir/ir_emitter.h>") if ("x86_64" IN_LIST ARCHITECTURE) From 43c41e4db5596e12ce63b002c8d1543c3e26ccca Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 00:54:48 +0200 Subject: [PATCH 16/32] [compat] openbsd port fixes (#273) Signed-off-by: lizzie Co-authored-by: crueter Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/273 Reviewed-by: crueter Co-authored-by: lizzie Co-committed-by: lizzie --- CMakeLists.txt | 4 +++- README.md | 1 + docs/Development.md | 1 + docs/build/OpenBSD.md | 10 ++++++++++ externals/CMakeLists.txt | 2 +- src/core/debugger/debugger.cpp | 2 +- src/dynarmic/CMakeLists.txt | 16 ++++++++++++++-- src/dynarmic/externals/CMakeLists.txt | 7 ------- .../src/dynarmic/backend/x64/block_of_code.cpp | 8 +++++++- .../src/dynarmic/common/spin_lock_x64.cpp | 2 +- 10 files changed, 39 insertions(+), 14 deletions(-) create mode 100644 docs/build/OpenBSD.md diff --git a/CMakeLists.txt b/CMakeLists.txt index 9abca561f3..03f97eb7e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,9 @@ CMAKE_DEPENDENT_OPTION(ENABLE_SDL2 "Enable the SDL2 frontend" ON "NOT ANDROID" O set(EXT_DEFAULT ON) -if (PLATFORM_FREEBSD) +# See https://github.com/llvm/llvm-project/issues/123946 +# OpenBSD va_list doesn't play nice with precompiled headers +if (PLATFORM_FREEBSD OR PLATFORM_OPENBSD) set(EXT_DEFAULT OFF) endif() diff --git a/README.md b/README.md index 70f2c81296..e1f0b50b37 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ If you would like to contribute, we are open to new developers and pull requests * **Solaris**: [Solaris Building Guide](./docs/build/Solaris.md) * **FreeBSD**: [FreeBSD Building Guide](./docs/build/FreeBSD.md) * **macOS**: [macOS Building Guide](./docs/build/macOS.md) +* **OpenBSD**: [OpenBSD Building Guide](./docs/build/OpenBSD.md) ## Download diff --git a/docs/Development.md b/docs/Development.md index e60384e8ab..e4816cd1ec 100644 --- a/docs/Development.md +++ b/docs/Development.md @@ -6,6 +6,7 @@ * **Solaris**: [Solaris Building Guide](./build/Solaris.md) * **FreeBSD**: [FreeBSD Building Guide](./build/FreeBSD.md) * **macOS**: [macOS Building Guide](./build/macOS.md) +* **OpenBSD**: [OpenBSD Building Guide](./build/OpenBSD.md) # CPM diff --git a/docs/build/OpenBSD.md b/docs/build/OpenBSD.md new file mode 100644 index 0000000000..6a55fd269d --- /dev/null +++ b/docs/build/OpenBSD.md @@ -0,0 +1,10 @@ +# Building for OpenBSD + +```sh +pkg_add -u +pkg_add cmake nasm git boost unzip--iconv autoconf-2.72p0 bash ffmpeg glslang gmake llvm-19.1.7p3 qt6 jq +git --recursive https://git.eden-emu.dev/eden-emu/eden +cmake -DCMAKE_C_COMPILER=clang-19 -DCMAKE_CXX_COMPILER=clang++-19 -DDYNARMIC_USE_PRECOMPILED_HEADERS=OFF -DCMAKE_BUILD_TYPE=Debug -DENABLE_QT=OFF -DENABLE_OPENSSL=OFF -DENABLE_WEB_SERVICE=OFF -B /usr/obj/eden +``` + +- Modify `externals/ffmpeg/CMakeFiles/ffmpeg-build/build.make` to use `-j$(nproc)` instead of just `-j`. diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index e917e4e7d8..25886021e2 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -33,7 +33,7 @@ endif() # Xbyak (also used by Dynarmic, so needs to be added first) if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) - if (PLATFORM_SUN) + if (PLATFORM_SUN OR PLATFORM_OPENBSD) AddJsonPackage(xbyak_sun) else() AddJsonPackage(xbyak) diff --git a/src/core/debugger/debugger.cpp b/src/core/debugger/debugger.cpp index 7fe22fdce2..460e0d19b4 100644 --- a/src/core/debugger/debugger.cpp +++ b/src/core/debugger/debugger.cpp @@ -7,7 +7,7 @@ #include #include -#if BOOST_VERSION > 108300 && (!defined(_WINDOWS) && !defined(ANDROID)) || defined(YUZU_BOOST_v1) +#if BOOST_VERSION > 108400 && (!defined(_WINDOWS) && !defined(ANDROID)) || defined(YUZU_BOOST_v1) #define USE_BOOST_v1 #endif diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index d505d16553..842eb91a88 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -14,12 +14,24 @@ endif() # Dynarmic project options option(DYNARMIC_ENABLE_CPU_FEATURE_DETECTION "Turning this off causes dynarmic to assume the host CPU doesn't support anything later than SSE3" ON) -option(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT "Enables support for systems that require W^X" OFF) + +if (PLATFORM_OPENBSD) + option(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT "Enables support for systems that require W^X" ON) +else() + option(DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT "Enables support for systems that require W^X" OFF) +endif() + option(DYNARMIC_FATAL_ERRORS "Errors are fatal" OFF) option(DYNARMIC_IGNORE_ASSERTS "Ignore asserts" OFF) option(DYNARMIC_TESTS_USE_UNICORN "Enable fuzzing tests against unicorn" OFF) option(DYNARMIC_USE_LLVM "Support disassembly of jitted x86_64 code using LLVM" OFF) -option(DYNARMIC_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON) + +if (PLATFORM_OPENBSD) + option(DYNARMIC_USE_PRECOMPILED_HEADERS "Use precompiled headers" OFF) +else() + option(DYNARMIC_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON) +endif() + option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF) option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF) option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT}) diff --git a/src/dynarmic/externals/CMakeLists.txt b/src/dynarmic/externals/CMakeLists.txt index ba70797a84..26f9290ed8 100644 --- a/src/dynarmic/externals/CMakeLists.txt +++ b/src/dynarmic/externals/CMakeLists.txt @@ -60,13 +60,6 @@ AddJsonPackage( # endif() # endif() -# unordered_dense - -# AddJsonPackage( -# NAME unordered-dense -# BUNDLED_PACKAGE ${DYNARMIC_USE_BUNDLED_EXTERNALS} -# ) - # xbyak # uncomment if in an independent repo diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index 5a33ac7727..d5d5f089ff 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -225,8 +225,14 @@ bool IsUnderRosetta() { } // anonymous namespace +#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT +static const auto default_cg_mode = Xbyak::DontSetProtectRWE; +#else +static const auto default_cg_mode = nullptr; //Allow RWE +#endif + BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, std::function rcp) - : Xbyak::CodeGenerator(total_code_size, nullptr, &s_allocator) + : Xbyak::CodeGenerator(total_code_size, default_cg_mode, &s_allocator) , cb(std::move(cb)) , jsi(jsi) , constant_pool(*this, CONSTANT_POOL_SIZE) diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp index 474c2f8404..7c0ba8a890 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp @@ -37,7 +37,7 @@ namespace { struct SpinLockImpl { void Initialize(); - Xbyak::CodeGenerator code; + Xbyak::CodeGenerator code = Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE); void (*lock)(volatile int*); void (*unlock)(volatile int*); From 2f82b63e6ad4f26544e74f86224f4e240933d602 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 00:57:08 +0200 Subject: [PATCH 17/32] [user] prioritize 'user' directory if it exists (without needing a portable build) + docs (#338) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/338 Reviewed-by: crueter Co-authored-by: lizzie Co-committed-by: lizzie --- CMakeLists.txt | 2 -- docs/User.md | 11 +++++++++++ src/common/CMakeLists.txt | 4 ---- src/common/fs/fs_paths.h | 4 ++-- src/common/fs/path_util.cpp | 30 +++++++++++------------------- 5 files changed, 24 insertions(+), 27 deletions(-) create mode 100644 docs/User.md diff --git a/CMakeLists.txt b/CMakeLists.txt index 03f97eb7e5..5e3a45d8c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,8 +117,6 @@ option(YUZU_ENABLE_LTO "Enable link-time optimization" OFF) option(YUZU_DOWNLOAD_TIME_ZONE_DATA "Always download time zone binaries" ON) -option(YUZU_ENABLE_PORTABLE "Allow yuzu to enable portable mode if a user folder is found in the CWD" ON) - CMAKE_DEPENDENT_OPTION(YUZU_USE_FASTER_LD "Check if a faster linker is available" ON "NOT WIN32" OFF) CMAKE_DEPENDENT_OPTION(USE_SYSTEM_MOLTENVK "Use the system MoltenVK lib (instead of the bundled one)" OFF "APPLE" OFF) diff --git a/docs/User.md b/docs/User.md new file mode 100644 index 0000000000..cfc81063f8 --- /dev/null +++ b/docs/User.md @@ -0,0 +1,11 @@ +# User configuration + +## Configuration directories + +Eden will store configuration in the following directories: + +- **Windows**: `%AppData%\Roaming`. +- **Android**: Data is stored internally. +- **Linux, macOS, FreeBSD, Solaris, OpenBSD**: `$XDG_DATA_HOME`, `$XDG_CACHE_HOME`, `$XDG_CONFIG_HOME`. + +If a `user` directory is present in the current working directory, that will override all global configuration directories and the emulator will use that instead. diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 1aa433db32..9b898837bc 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -162,10 +162,6 @@ add_library( zstd_compression.h ) -if(YUZU_ENABLE_PORTABLE) - add_compile_definitions(YUZU_ENABLE_PORTABLE) -endif() - if(WIN32) target_sources(common PRIVATE windows/timer_resolution.cpp windows/timer_resolution.h) diff --git a/src/common/fs/fs_paths.h b/src/common/fs/fs_paths.h index 40891380c9..5cdf9be39d 100644 --- a/src/common/fs/fs_paths.h +++ b/src/common/fs/fs_paths.h @@ -12,7 +12,6 @@ #define PORTABLE_DIR "user" // Sub-directories contained within a yuzu data directory - #define AMIIBO_DIR "amiibo" #define CACHE_DIR "cache" #define CONFIG_DIR "config" @@ -28,11 +27,12 @@ #define SHADER_DIR "shader" #define TAS_DIR "tas" #define ICONS_DIR "icons" + +// Compatibility with other emulators #define CITRON_DIR "citron" #define SUDACHI_DIR "sudachi" #define YUZU_DIR "yuzu" #define SUYU_DIR "suyu" // yuzu-specific files - #define LOG_FILE "eden_log.txt" diff --git a/src/common/fs/path_util.cpp b/src/common/fs/path_util.cpp index fa1403225e..a2f5cb92ff 100644 --- a/src/common/fs/path_util.cpp +++ b/src/common/fs/path_util.cpp @@ -101,61 +101,53 @@ public: legacy_paths.insert_or_assign(legacy_path, new_path); } + /// In non-android devices, the current directory will first search for "user" + /// if such directory (and it must be a directory) is found, that takes priority + /// over the global configuration directory (in other words, portable directories + /// take priority over the global ones, always) + /// On Android, the behaviour is to look for the current directory only. void Reinitialize(fs::path eden_path = {}) { fs::path eden_path_cache; fs::path eden_path_config; - #ifdef _WIN32 -#ifdef YUZU_ENABLE_PORTABLE + // User directory takes priority over global %AppData% directory eden_path = GetExeDirectory() / PORTABLE_DIR; -#endif - if (!IsDir(eden_path)) { + if (!Exists(eden_path) || !IsDir(eden_path)) { eden_path = GetAppDataRoamingDirectory() / EDEN_DIR; } - eden_path_cache = eden_path / CACHE_DIR; eden_path_config = eden_path / CONFIG_DIR; - #define LEGACY_PATH(titleName, upperName) GenerateLegacyPath(LegacyPath::titleName##Dir, GetAppDataRoamingDirectory() / upperName##_DIR); \ GenerateLegacyPath(LegacyPath::titleName##ConfigDir, GetAppDataRoamingDirectory() / upperName##_DIR / CONFIG_DIR); \ GenerateLegacyPath(LegacyPath::titleName##CacheDir, GetAppDataRoamingDirectory() / upperName##_DIR / CACHE_DIR); - LEGACY_PATH(Citron, CITRON) LEGACY_PATH(Sudachi, SUDACHI) LEGACY_PATH(Yuzu, YUZU) LEGACY_PATH(Suyu, SUYU) #undef LEGACY_PATH - #elif ANDROID ASSERT(!eden_path.empty()); eden_path_cache = eden_path / CACHE_DIR; eden_path_config = eden_path / CONFIG_DIR; #else -#ifdef YUZU_ENABLE_PORTABLE eden_path = GetCurrentDir() / PORTABLE_DIR; -#endif - if (Exists(eden_path) && IsDir(eden_path)) { - eden_path_cache = eden_path / CACHE_DIR; - eden_path_config = eden_path / CONFIG_DIR; - } else { + if (!Exists(eden_path) || !IsDir(eden_path)) { eden_path = GetDataDirectory("XDG_DATA_HOME") / EDEN_DIR; eden_path_cache = GetDataDirectory("XDG_CACHE_HOME") / EDEN_DIR; eden_path_config = GetDataDirectory("XDG_CONFIG_HOME") / EDEN_DIR; + } else { + eden_path_cache = eden_path / CACHE_DIR; + eden_path_config = eden_path / CONFIG_DIR; } - #define LEGACY_PATH(titleName, upperName) GenerateLegacyPath(LegacyPath::titleName##Dir, GetDataDirectory("XDG_DATA_HOME") / upperName##_DIR); \ GenerateLegacyPath(LegacyPath::titleName##ConfigDir, GetDataDirectory("XDG_CONFIG_HOME") / upperName##_DIR); \ GenerateLegacyPath(LegacyPath::titleName##CacheDir, GetDataDirectory("XDG_CACHE_HOME") / upperName##_DIR); - LEGACY_PATH(Citron, CITRON) LEGACY_PATH(Sudachi, SUDACHI) LEGACY_PATH(Yuzu, YUZU) LEGACY_PATH(Suyu, SUYU) - #undef LEGACY_PATH - #endif - GenerateEdenPath(EdenPath::EdenDir, eden_path); GenerateEdenPath(EdenPath::AmiiboDir, eden_path / AMIIBO_DIR); GenerateEdenPath(EdenPath::CacheDir, eden_path_cache); From ecc99ce9ab54e4a14dc5f67aac932316a332b6a1 Mon Sep 17 00:00:00 2001 From: crueter Date: Mon, 8 Sep 2025 02:37:55 +0200 Subject: [PATCH 18/32] [dynarmic] spinlock: (re-)allow RWE on execute-supported targets (#393) regr. #273 Signed-off-by: crueter Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/393 --- src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp index 7c0ba8a890..c949ed7de8 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp @@ -11,6 +11,12 @@ #include "dynarmic/backend/x64/hostloc.h" #include "dynarmic/common/spin_lock.h" +#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT +static const auto default_cg_mode = Xbyak::DontSetProtectRWE; +#else +static const auto default_cg_mode = nullptr; //Allow RWE +#endif + namespace Dynarmic { void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { @@ -37,7 +43,7 @@ namespace { struct SpinLockImpl { void Initialize(); - Xbyak::CodeGenerator code = Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE); + Xbyak::CodeGenerator code = Xbyak::CodeGenerator(4096, default_cg_mode); void (*lock)(volatile int*); void (*unlock)(volatile int*); From 95fd9ecdd7f1028391e7557dc54957abe78a3bb0 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:54:53 +0000 Subject: [PATCH 19/32] [dynarmic] reduce opt pass latency Signed-off-by: lizzie --- src/dynarmic/docs/RegisterAllocator.md | 40 +- .../docs/ReturnStackBufferOptimization.md | 162 +- src/dynarmic/src/dynarmic/CMakeLists.txt | 199 +-- .../backend/arm64/a32_address_space.cpp | 18 +- .../backend/arm64/a64_address_space.cpp | 19 +- .../backend/riscv64/a32_address_space.cpp | 16 +- .../dynarmic/backend/x64/a32_interface.cpp | 16 +- .../dynarmic/backend/x64/a64_interface.cpp | 18 +- .../src/dynarmic/common/memory_pool.cpp | 13 - .../src/dynarmic/common/memory_pool.h | 61 - src/dynarmic/src/dynarmic/ir/basic_block.cpp | 7 +- src/dynarmic/src/dynarmic/ir/basic_block.h | 3 - src/dynarmic/src/dynarmic/ir/ir_emitter.cpp | 21 - .../ir/opt/a32_constant_memory_reads_pass.cpp | 70 - .../ir/opt/a32_get_set_elimination_pass.cpp | 382 ----- .../ir/opt/a64_callback_config_pass.cpp | 57 - .../ir/opt/a64_get_set_elimination_pass.cpp | 165 -- .../ir/opt/a64_merge_interpret_blocks.cpp | 57 - .../ir/opt/constant_propagation_pass.cpp | 559 ------ .../ir/opt/dead_code_elimination_pass.cpp | 23 - .../dynarmic/ir/opt/identity_removal_pass.cpp | 44 - src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h | 127 -- .../src/dynarmic/ir/opt/naming_pass.cpp | 18 - src/dynarmic/src/dynarmic/ir/opt/passes.h | 47 - .../src/dynarmic/ir/opt/polyfill_pass.cpp | 218 --- .../src/dynarmic/ir/opt/verification_pass.cpp | 51 - src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 1502 +++++++++++++++++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 34 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 10 +- src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 2 +- src/dynarmic/tests/CMakeLists.txt | 146 +- src/dynarmic/tests/print_info.cpp | 2 +- 32 files changed, 1808 insertions(+), 2299 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.cpp delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.h delete mode 100644 src/dynarmic/src/dynarmic/ir/ir_emitter.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/passes.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.h diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index fea6f19e6a..5a7210c791 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -16,19 +16,31 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun The member functions on `RegAlloc` are just a combination of the above concepts. +The following registers are reserved for internal use and should NOT participate in register allocation: +- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. +- `%rsp`: Stack pointer. + +The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. + +Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration. + ### `Scratch` - Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) - Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) +```c++ +Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr); +Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm); +``` At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope. ### Pure `Use` - Xbyak::Reg64 UseGpr(Argument& arg); - Xbyak::Xmm UseXmm(Argument& arg); - OpArg UseOpArg(Argument& arg); - void Use(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseGpr(Argument& arg); +Xbyak::Xmm UseXmm(Argument& arg); +OpArg UseOpArg(Argument& arg); +void Use(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it @@ -39,9 +51,11 @@ This register **must not** have it's value changed. ### `UseScratch` - Xbyak::Reg64 UseScratchGpr(Argument& arg); - Xbyak::Xmm UseScratchXmm(Argument& arg); - void UseScratch(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseScratchGpr(Argument& arg); +Xbyak::Xmm UseScratchXmm(Argument& arg); +void UseScratch(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it @@ -55,7 +69,9 @@ You are free to modify the value in the register. The register is discarded at t A `Define` is the defintion of a value. This is the only time when a value may be set. - void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +```c++ +void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +``` By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the value to the specified register `reg`. @@ -64,7 +80,9 @@ value to the specified register `reg`. Adding a `Define` to an existing value. - void DefineValue(IR::Inst* inst, Argument& arg); +```c++ +void DefineValue(IR::Inst* inst, Argument& arg); +``` You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are emitted. diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index 6ffe41bcc6..0e72c3bce8 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -23,15 +23,17 @@ One complication dynarmic has is that a compiled block is not uniquely identifia the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block. - u64 LocationDescriptor::UniqueHash() const { - // This value MUST BE UNIQUE. - // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint - u64 pc_u64 = u64(arm_pc) << 32; - u64 fpscr_u64 = u64(fpscr.Value()); - u64 t_u64 = cpsr.T() ? 1 : 0; - u64 e_u64 = cpsr.E() ? 2 : 0; - return pc_u64 | fpscr_u64 | t_u64 | e_u64; - } +```c++ +u64 LocationDescriptor::UniqueHash() const { + // This value MUST BE UNIQUE. + // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint + u64 pc_u64 = u64(arm_pc) << 32; + u64 fpscr_u64 = u64(fpscr.Value()); + u64 t_u64 = cpsr.T() ? 1 : 0; + u64 e_u64 = cpsr.E() ? 2 : 0; + return pc_u64 | fpscr_u64 | t_u64 | e_u64; +} +``` ## Our implementation isn't actually a stack @@ -49,97 +51,107 @@ host addresses for the corresponding the compiled blocks. size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8 showed degraded performance. - struct JitState { - // ... +```c++ +struct JitState { + // ... - static constexpr size_t RSBSize = 8; // MUST be a power of 2. - u32 rsb_ptr = 0; - std::array rsb_location_descriptors; - std::array rsb_codeptrs; - void ResetRSB(); + static constexpr size_t RSBSize = 8; // MUST be a power of 2. + u32 rsb_ptr = 0; + std::array rsb_location_descriptors; + std::array rsb_codeptrs; + void ResetRSB(); - // ... - }; + // ... +}; +``` ### RSB Push We insert our prediction at the insertion point iff the RSB doesn't already contain a prediction with the same `UniqueHash`. - void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { + using namespace Xbyak::util; - ASSERT(inst->GetArg(0).IsImmediate()); - u64 imm64 = inst->GetArg(0).GetU64(); + ASSERT(inst->GetArg(0).IsImmediate()); + u64 imm64 = inst->GetArg(0).GetU64(); - Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); - Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); - Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); - u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() - ? u64(unique_hash_to_code_ptr[imm64]) - : u64(code->GetReturnFromRunCodeAddress()); + Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); + Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); + Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); + u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() + ? u64(unique_hash_to_code_ptr[imm64]) + : u64(code->GetReturnFromRunCodeAddress()); - code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); - code->add(index_reg, 1); - code->and_(index_reg, u32(JitState::RSBSize - 1)); + code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); + code->add(index_reg, 1); + code->and_(index_reg, u32(JitState::RSBSize - 1)); - code->mov(loc_desc_reg, u64(imm64)); - CodePtr patch_location = code->getCurr(); - patch_unique_hash_locations[imm64].emplace_back(patch_location); - code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. - code->EnsurePatchLocationSize(patch_location, 10); + code->mov(loc_desc_reg, u64(imm64)); + CodePtr patch_location = code->getCurr(); + patch_unique_hash_locations[imm64].emplace_back(patch_location); + code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. + code->EnsurePatchLocationSize(patch_location, 10); - Xbyak::Label label; - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->je(label, code->T_SHORT); - } - - code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); - code->L(label); + Xbyak::Label label; + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->je(label, code->T_SHORT); } + code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); + code->L(label); +} +``` + In pseudocode: - for (i := 0 .. RSBSize-1) - if (rsb_location_descriptors[i] == imm64) - goto label; - rsb_ptr++; - rsb_ptr %= RSBSize; - rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash - rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; - label: +```c++ + for (i := 0 .. RSBSize-1) + if (rsb_location_descriptors[i] == imm64) + goto label; + rsb_ptr++; + rsb_ptr %= RSBSize; + rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash + rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; +label: +``` ## RSB Pop To check if a predicition is in the RSB, we linearly scan the RSB. - void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { + using namespace Xbyak::util; - // This calculation has to match up with IREmitter::PushRSB - code->mov(ecx, MJitStateReg(Arm::Reg::PC)); - code->shl(rcx, 32); - code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); - code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); - code->or_(rbx, rcx); + // This calculation has to match up with IREmitter::PushRSB + code->mov(ecx, MJitStateReg(Arm::Reg::PC)); + code->shl(rcx, 32); + code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); + code->or_(rbx, rcx); - code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); - } - - code->jmp(rax); + code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); } + code->jmp(rax); +} +``` + In pseudocode: - rbx := ComputeUniqueHash() - rax := ReturnToDispatch - for (i := 0 .. RSBSize-1) - if (rbx == rsb_location_descriptors[i]) - rax = rsb_codeptrs[i] - goto rax \ No newline at end of file +```c++ +rbx := ComputeUniqueHash() +rax := ReturnToDispatch +for (i := 0 .. RSBSize-1) + if (rbx == rsb_location_descriptors[i]) + rax = rsb_codeptrs[i] +goto rax +``` diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 7ec92206f9..f35f22f15c 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -56,8 +56,6 @@ add_library(dynarmic common/lut_from_list.h common/math_util.cpp common/math_util.h - common/memory_pool.cpp - common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h @@ -78,7 +76,6 @@ add_library(dynarmic ir/basic_block.cpp ir/basic_block.h ir/cond.h - ir/ir_emitter.cpp ir/ir_emitter.h ir/location_descriptor.cpp ir/location_descriptor.h @@ -87,78 +84,59 @@ add_library(dynarmic ir/opcodes.cpp ir/opcodes.h ir/opcodes.inc - ir/opt/constant_propagation_pass.cpp - ir/opt/dead_code_elimination_pass.cpp - ir/opt/identity_removal_pass.cpp - ir/opt/ir_matcher.h - ir/opt/naming_pass.cpp - ir/opt/passes.h - ir/opt/polyfill_pass.cpp - ir/opt/verification_pass.cpp + ir/opt_passes.cpp + ir/opt_passes.h ir/terminal.h ir/type.cpp ir/type.h ir/value.cpp ir/value.h + # A32 + frontend/A32/a32_ir_emitter.cpp + frontend/A32/a32_ir_emitter.h + frontend/A32/a32_location_descriptor.cpp + frontend/A32/a32_location_descriptor.h + frontend/A32/decoder/arm.h + frontend/A32/decoder/arm.inc + frontend/A32/decoder/asimd.h + frontend/A32/decoder/asimd.inc + frontend/A32/decoder/thumb16.h + frontend/A32/decoder/thumb16.inc + frontend/A32/decoder/thumb32.h + frontend/A32/decoder/thumb32.inc + frontend/A32/decoder/vfp.h + frontend/A32/decoder/vfp.inc + frontend/A32/disassembler/disassembler.h + frontend/A32/disassembler/disassembler_arm.cpp + frontend/A32/disassembler/disassembler_thumb.cpp + frontend/A32/FPSCR.h + frontend/A32/ITState.h + frontend/A32/PSR.h + frontend/A32/translate/a32_translate.cpp + frontend/A32/translate/a32_translate.h + frontend/A32/translate/conditional_state.cpp + frontend/A32/translate/conditional_state.h + frontend/A32/translate/translate_arm.cpp + frontend/A32/translate/translate_thumb.cpp + interface/A32/a32.h + interface/A32/arch_version.h + interface/A32/config.h + interface/A32/coprocessor.h + interface/A32/coprocessor_util.h + interface/A32/disassembler.h + # A64 + frontend/A64/a64_ir_emitter.cpp + frontend/A64/a64_ir_emitter.h + frontend/A64/a64_location_descriptor.cpp + frontend/A64/a64_location_descriptor.h + frontend/A64/decoder/a64.h + frontend/A64/decoder/a64.inc + frontend/A64/translate/a64_translate.cpp + frontend/A64/translate/a64_translate.h + interface/A64/a64.h + interface/A64/config.h ) -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A32/a32_ir_emitter.cpp - frontend/A32/a32_ir_emitter.h - frontend/A32/a32_location_descriptor.cpp - frontend/A32/a32_location_descriptor.h - frontend/A32/decoder/arm.h - frontend/A32/decoder/arm.inc - frontend/A32/decoder/asimd.h - frontend/A32/decoder/asimd.inc - frontend/A32/decoder/thumb16.h - frontend/A32/decoder/thumb16.inc - frontend/A32/decoder/thumb32.h - frontend/A32/decoder/thumb32.inc - frontend/A32/decoder/vfp.h - frontend/A32/decoder/vfp.inc - frontend/A32/disassembler/disassembler.h - frontend/A32/disassembler/disassembler_arm.cpp - frontend/A32/disassembler/disassembler_thumb.cpp - frontend/A32/FPSCR.h - frontend/A32/ITState.h - frontend/A32/PSR.h - frontend/A32/translate/a32_translate.cpp - frontend/A32/translate/a32_translate.h - frontend/A32/translate/conditional_state.cpp - frontend/A32/translate/conditional_state.h - frontend/A32/translate/translate_arm.cpp - frontend/A32/translate/translate_thumb.cpp - interface/A32/a32.h - interface/A32/arch_version.h - interface/A32/config.h - interface/A32/coprocessor.h - interface/A32/coprocessor_util.h - interface/A32/disassembler.h - ir/opt/a32_constant_memory_reads_pass.cpp - ir/opt/a32_get_set_elimination_pass.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A64/a64_ir_emitter.cpp - frontend/A64/a64_ir_emitter.h - frontend/A64/a64_location_descriptor.cpp - frontend/A64/a64_location_descriptor.h - frontend/A64/decoder/a64.h - frontend/A64/decoder/a64.inc - frontend/A64/translate/a64_translate.cpp - frontend/A64/translate/a64_translate.h - interface/A64/a64.h - interface/A64/config.h - ir/opt/a64_callback_config_pass.cpp - ir/opt/a64_get_set_elimination_pass.cpp - ir/opt/a64_merge_interpret_blocks.cpp - ) -endif() - if ("x86_64" IN_LIST ARCHITECTURE) target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1) target_link_libraries(dynarmic @@ -211,29 +189,21 @@ if ("x86_64" IN_LIST ARCHITECTURE) common/spin_lock_x64.h common/x64_disassemble.cpp common/x64_disassemble.h + # A32 + backend/x64/a32_emit_x64.cpp + backend/x64/a32_emit_x64.h + backend/x64/a32_emit_x64_memory.cpp + backend/x64/a32_interface.cpp + backend/x64/a32_jitstate.cpp + backend/x64/a32_jitstate.h + # A64 + backend/x64/a64_emit_x64.cpp + backend/x64/a64_emit_x64.h + backend/x64/a64_emit_x64_memory.cpp + backend/x64/a64_interface.cpp + backend/x64/a64_jitstate.cpp + backend/x64/a64_jitstate.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a32_emit_x64.cpp - backend/x64/a32_emit_x64.h - backend/x64/a32_emit_x64_memory.cpp - backend/x64/a32_interface.cpp - backend/x64/a32_jitstate.cpp - backend/x64/a32_jitstate.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a64_emit_x64.cpp - backend/x64/a64_emit_x64.h - backend/x64/a64_emit_x64_memory.cpp - backend/x64/a64_interface.cpp - backend/x64/a64_jitstate.cpp - backend/x64/a64_jitstate.h - ) - endif() endif() if ("arm64" IN_LIST ARCHITECTURE) @@ -277,25 +247,17 @@ if ("arm64" IN_LIST ARCHITECTURE) backend/arm64/verbose_debugging_output.h common/spin_lock_arm64.cpp common/spin_lock_arm64.h + # A32 + backend/arm64/a32_address_space.cpp + backend/arm64/a32_address_space.h + backend/arm64/a32_core.h + backend/arm64/a32_interface.cpp + # A64 + backend/arm64/a64_address_space.cpp + backend/arm64/a64_address_space.h + backend/arm64/a64_core.h + backend/arm64/a64_interface.cpp ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a32_address_space.cpp - backend/arm64/a32_address_space.h - backend/arm64/a32_core.h - backend/arm64/a32_interface.cpp - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a64_address_space.cpp - backend/arm64/a64_address_space.h - backend/arm64/a64_core.h - backend/arm64/a64_interface.cpp - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -324,21 +286,14 @@ if ("riscv" IN_LIST ARCHITECTURE) backend/riscv64/reg_alloc.cpp backend/riscv64/reg_alloc.h backend/riscv64/stack_layout.h + # A32 + backend/riscv64/a32_address_space.cpp + backend/riscv64/a32_address_space.h + backend/riscv64/a32_core.h + backend/riscv64/a32_interface.cpp + backend/riscv64/code_block.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - backend/riscv64/a32_address_space.cpp - backend/riscv64/a32_address_space.h - backend/riscv64/a32_core.h - backend/riscv64/a32_interface.cpp - backend/riscv64/code_block.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") - endif() + message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") endif() if (WIN32) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index bbdf925ff4..8e48fa3687 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -16,7 +16,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -163,21 +163,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index d4fe9a9cb7..2b50ad9ea3 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -331,22 +331,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index efa211618b..0bb9591411 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/backend/riscv64/stack_layout.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::RV64 { @@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index b116ec180e..382eb70f3f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -29,7 +29,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/location_descriptor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A32 { @@ -217,19 +217,7 @@ private: block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index ddd2327395..c65b582982 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -25,7 +25,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/a64.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A64 { @@ -275,21 +275,7 @@ private: const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block).entrypoint; } diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.cpp b/src/dynarmic/src/dynarmic/common/memory_pool.cpp deleted file mode 100644 index f41dd92af5..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/common/memory_pool.h" - -#include - -namespace Dynarmic::Common { - - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.h b/src/dynarmic/src/dynarmic/common/memory_pool.h deleted file mode 100644 index c99316e107..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -namespace Dynarmic::Common { - -/// @tparam object_size Byte-size of objects to construct -/// @tparam slab_size Number of objects to have per slab -template -class Pool { -public: - inline Pool() noexcept { - AllocateNewSlab(); - } - inline ~Pool() noexcept { - std::free(current_slab); - for (char* slab : slabs) { - std::free(slab); - } - } - - Pool(const Pool&) = delete; - Pool(Pool&&) = delete; - - Pool& operator=(const Pool&) = delete; - Pool& operator=(Pool&&) = delete; - - /// @brief Returns a pointer to an `object_size`-bytes block of memory. - [[nodiscard]] void* Alloc() noexcept { - if (remaining == 0) { - slabs.push_back(current_slab); - AllocateNewSlab(); - } - void* ret = static_cast(current_ptr); - current_ptr += object_size; - remaining--; - return ret; - } -private: - /// @brief Allocates a completely new memory slab. - /// Used when an entirely new slab is needed - /// due the current one running out of usable space. - void AllocateNewSlab() noexcept { - current_slab = static_cast(std::malloc(object_size * slab_size)); - current_ptr = current_slab; - remaining = slab_size; - } - - std::vector slabs; - char* current_slab = nullptr; - char* current_ptr = nullptr; - size_t remaining = 0; -}; - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index b00ab3cb20..a13a7ebfc9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -15,8 +15,6 @@ #include #include "dynarmic/common/assert.h" - -#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -27,8 +25,7 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL}, - instruction_alloc_pool{std::make_unique>()} + cond{Cond::AL} { } @@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); + IR::Inst* inst = new IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index e88dc92fc4..2f2d9ab6de 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -21,7 +21,6 @@ #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" #include "dynarmic/ir/dense_list.h" -#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -174,8 +173,6 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; - /// Memory pool for instruction list - std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp deleted file mode 100644 index a8ef7e2989..0000000000 --- a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/ir_emitter.h" - -#include - -#include "dynarmic/common/assert.h" -#include - -#include "dynarmic/ir/opcodes.h" - -namespace Dynarmic::IR { - - -} // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp deleted file mode 100644 index 9699f18345..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/interface/A32/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { - for (auto& inst : block) { - switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp deleted file mode 100644 index 499b38b120..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp +++ /dev/null @@ -1,382 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A32/a32_ir_emitter.h" -#include "dynarmic/frontend/A32/a32_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -namespace { - -void FlagsPass(IR::Block& block) { - using Iterator = std::reverse_iterator; - - struct FlagInfo { - bool set_not_required = false; - bool has_value_request = false; - Iterator value_request = {}; - }; - struct ValuelessFlagInfo { - bool set_not_required = false; - }; - ValuelessFlagInfo nzcvq; - ValuelessFlagInfo nzcv; - ValuelessFlagInfo nz; - FlagInfo c_flag; - FlagInfo ge; - - auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(value); - } - info.has_value_request = false; - - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_get = [](FlagInfo& info, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(IR::Value{&*inst}); - } - info.has_value_request = true; - info.value_request = inst; - }; - - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetCFlag: { - do_get(c_flag, inst); - break; - } - case IR::Opcode::A32SetCpsrNZCV: { - if (c_flag.has_value_request) { - ir.SetInsertionPointBefore(inst.base()); // base is one ahead - IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); - c_flag.value_request->ReplaceUsesWith(c); - c_flag.has_value_request = false; - break; // This case will be executed again because of the above - } - - do_set_valueless(nzcv, inst); - - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVRaw: { - if (c_flag.has_value_request) { - nzcv.set_not_required = false; - } - - do_set_valueless(nzcv, inst); - - nzcvq = {}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVQ: { - if (c_flag.has_value_request) { - nzcvq.set_not_required = false; - } - - do_set_valueless(nzcvq, inst); - - nzcv = {.set_not_required = true}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZ: { - do_set_valueless(nz, inst); - - nzcvq = {}; - nzcv = {}; - break; - } - case IR::Opcode::A32SetCpsrNZC: { - if (c_flag.has_value_request) { - c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); - c_flag.has_value_request = false; - } - - if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { - const auto nz_value = inst->GetArg(0); - - inst->Invalidate(); - - ir.SetInsertionPointBefore(inst.base()); - ir.SetCpsrNZ(IR::NZCV{nz_value}); - - nzcvq = {}; - nzcv = {}; - nz = {.set_not_required = true}; - break; - } - - if (nz.set_not_required && c_flag.set_not_required) { - inst->Invalidate(); - } else if (nz.set_not_required) { - inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); - } - nz.set_not_required = true; - c_flag.set_not_required = true; - - nzcv = {}; - nzcvq = {}; - break; - } - case IR::Opcode::A32SetGEFlags: { - do_set(ge, inst->GetArg(0), inst); - break; - } - case IR::Opcode::A32GetGEFlags: { - do_get(ge, inst); - break; - } - case IR::Opcode::A32SetGEFlagsCompressed: { - ge = {.set_not_required = true}; - break; - } - case IR::Opcode::A32OrQFlag: { - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcvq = {}; - nzcv = {}; - nz = {}; - c_flag = {}; - ge = {}; - } - break; - } - } - } -} - -void RegisterPass(IR::Block& block) { - using Iterator = IR::Block::iterator; - - struct RegInfo { - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array reg_info; - - const auto do_get = [](RegInfo& info, Iterator get_inst) { - if (info.register_value.IsEmpty()) { - info.register_value = IR::Value(&*get_inst); - return; - } - get_inst->ReplaceUsesWith(info.register_value); - }; - - const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { - if (info.last_set_instruction) { - (*info.last_set_instruction)->Invalidate(); - } - info = { - .register_value = value, - .last_set_instruction = set_inst, - }; - }; - - enum class ExtValueType { - Empty, - Single, - Double, - VectorDouble, - VectorQuad, - }; - struct ExtRegInfo { - ExtValueType value_type = {}; - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array ext_reg_info; - - const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { - if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = IR::Value(&*get_inst), - .last_set_instruction = std::nullopt, - }; - } - return; - } - get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); - }; - - const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { - if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - if (std::data(infos)[0].get().last_set_instruction) { - (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); - } - } - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = value, - .last_set_instruction = set_inst, - }; - } - }; - - // Location and version don't matter here. - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - ASSERT(reg != A32::Reg::PC); - const size_t reg_index = static_cast(reg); - do_get(reg_info[reg_index], inst); - break; - } - case IR::Opcode::A32SetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - if (reg == A32::Reg::PC) { - break; - } - const auto reg_index = static_cast(reg); - do_set(reg_info[reg_index], inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); - break; - } - case IR::Opcode::A32SetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - break; - } - case IR::Opcode::A32SetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst->GetArg(1), - inst); - break; - } - case IR::Opcode::A32GetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - do_ext_get(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_get(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst); - } - break; - } - case IR::Opcode::A32SetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - ir.SetInsertionPointAfter(inst); - const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); - do_ext_set(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - stored_value, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_set(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst->GetArg(1), - inst); - } - break; - } - default: { - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - ext_reg_info = {}; - } - break; - } - } - } -} - -} // namespace - -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { - FlagsPass(block); - RegisterPass(block); -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp deleted file mode 100644 index 79d9769520..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/frontend/A64/a64_ir_emitter.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { - if (conf.hook_data_cache_operations) { - return; - } - - for (auto& inst : block) { - if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { - continue; - } - - const auto op = static_cast(inst.GetArg(1).GetU64()); - if (op == A64::DataCacheOperation::ZeroByVA) { - A64::IREmitter ir{block}; - ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; - ir.SetInsertionPointBefore(&inst); - - size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); - IR::U64 addr{inst.GetArg(2)}; - - const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); - while (bytes >= 16) { - ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(16)); - bytes -= 16; - } - - while (bytes >= 8) { - ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(8)); - bytes -= 8; - } - - while (bytes >= 4) { - ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(4)); - bytes -= 4; - } - } - inst.Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp deleted file mode 100644 index 53e3b27176..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -void A64GetSetElimination(IR::Block& block) { - using Iterator = IR::Block::iterator; - - enum class TrackingType { - W, - X, - S, - D, - Q, - SP, - NZCV, - NZCVRaw, - }; - struct RegisterInfo { - IR::Value register_value; - TrackingType tracking_type; - bool set_instruction_present = false; - Iterator last_set_instruction; - }; - std::array reg_info; - std::array vec_info; - RegisterInfo sp_info; - RegisterInfo nzcv_info; - - const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { - if (info.set_instruction_present) { - info.last_set_instruction->Invalidate(); - block.Instructions().erase(info.last_set_instruction); - } - - info.register_value = value; - info.tracking_type = tracking_type; - info.set_instruction_present = true; - info.last_set_instruction = set_inst; - }; - - const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { - const auto do_nothing = [&] { - info = {}; - info.register_value = IR::Value(&*get_inst); - info.tracking_type = tracking_type; - }; - - if (info.register_value.IsEmpty()) { - do_nothing(); - return; - } - - if (info.tracking_type == tracking_type) { - get_inst->ReplaceUsesWith(info.register_value); - return; - } - - do_nothing(); - }; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A64GetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::W); - break; - } - case IR::Opcode::A64GetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::X); - break; - } - case IR::Opcode::A64GetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::S); - break; - } - case IR::Opcode::A64GetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::D); - break; - } - case IR::Opcode::A64GetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64GetSP: { - do_get(sp_info, inst, TrackingType::SP); - break; - } - case IR::Opcode::A64GetNZCVRaw: { - do_get(nzcv_info, inst, TrackingType::NZCVRaw); - break; - } - case IR::Opcode::A64SetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); - break; - } - case IR::Opcode::A64SetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); - break; - } - case IR::Opcode::A64SetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); - break; - } - case IR::Opcode::A64SetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); - break; - } - case IR::Opcode::A64SetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64SetSP: { - do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); - break; - } - case IR::Opcode::A64SetNZCV: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); - break; - } - case IR::Opcode::A64SetNZCVRaw: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcv_info = {}; - } - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - vec_info = {}; - sp_info = {}; - } - break; - } - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp deleted file mode 100644 index 25b7ef0ff1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_location_descriptor.h" -#include "dynarmic/frontend/A64/translate/a64_translate.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { - const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { - const auto instruction = cb->MemoryReadCode(location.PC()); - if (!instruction) - return false; - - IR::Block new_block{location}; - A64::TranslateSingleInstruction(new_block, location, *instruction); - - if (!new_block.Instructions().empty()) - return false; - - const IR::Terminal terminal = new_block.GetTerminal(); - if (auto term = boost::get(&terminal)) { - return term->next == location; - } - - return false; - }; - - IR::Terminal terminal = block.GetTerminal(); - auto term = boost::get(&terminal); - if (!term) - return; - - A64::LocationDescriptor location{term->next}; - size_t num_instructions = 1; - - while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { - num_instructions++; - } - - term->num_instructions = num_instructions; - block.ReplaceTerminal(terminal); - block.CycleCount() += num_instructions - 1; -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp deleted file mode 100644 index 86ebca87d2..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp +++ /dev/null @@ -1,559 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/assert.h" -#include -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/common/safe_ops.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -using Op = Dynarmic::IR::Opcode; - -namespace { - -// Tiny helper to avoid the need to store based off the opcode -// bit size all over the place within folding functions. -void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { - if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); - } else { - inst.ReplaceUsesWith(IR::Value{value}); - } -} - -IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; -} - -template -bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - - const bool is_lhs_immediate = lhs.IsImmediate(); - const bool is_rhs_immediate = rhs.IsImmediate(); - - if (is_lhs_immediate && is_rhs_immediate) { - const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); - ReplaceUsesWith(inst, is_32_bit, result); - return false; - } - - if (is_lhs_immediate && !is_rhs_immediate) { - const IR::Inst* rhs_inst = rhs.GetInstRecursive(); - if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, rhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } else { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - } - } - - if (!is_lhs_immediate && is_rhs_immediate) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } - } - - return true; -} - -void FoldAdd(IR::Inst& inst, bool is_32_bit) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - if (lhs.IsImmediate() && !rhs.IsImmediate()) { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - FoldAdd(inst, is_32_bit); - return; - } - - if (inst.HasAssociatedPseudoOperation()) { - return; - } - - if (!lhs.IsImmediate() && rhs.IsImmediate()) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { - const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); - if (combined == 0) { - inst.ReplaceUsesWith(lhs_inst->GetArg(0)); - return; - } - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - return; - } - if (rhs.IsZero() && carry.IsZero()) { - inst.ReplaceUsesWith(lhs); - return; - } - } - - if (inst.AreAllArgsImmediates()) { - const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); - return; - } -} - -/// Folds AND operations based on the following: -/// -/// 1. imm_x & imm_y -> result -/// 2. x & 0 -> 0 -/// 3. 0 & y -> 0 -/// 4. x & y -> y (where x has all bits set to 1) -/// 5. x & y -> x (where y has all bits set to 1) -/// -void FoldAND(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.HasAllBitsSet()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -/// Folds byte reversal opcodes based on the following: -/// -/// 1. imm -> swap(imm) -/// -void FoldByteReverse(IR::Inst& inst, Op op) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - if (op == Op::ByteReverseWord) { - const u32 result = mcl::bit::swap_bytes_32(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else if (op == Op::ByteReverseHalf) { - const u16 result = mcl::bit::swap_bytes_16(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else { - const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); - inst.ReplaceUsesWith(IR::Value{result}); - } -} - -/// Folds division operations based on the following: -/// -/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) -/// 2. imm_x / imm_y -> result -/// 3. x / 1 -> x -/// -void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { - const auto rhs = inst.GetArg(1); - - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - return; - } - - const auto lhs = inst.GetArg(0); - if (lhs.IsImmediate() && rhs.IsImmediate()) { - if (is_signed) { - const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); - ReplaceUsesWith(inst, is_32_bit, static_cast(result)); - } else { - const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); - } - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(IR::Value{lhs}); - } -} - -// Folds EOR operations based on the following: -// -// 1. imm_x ^ imm_y -> result -// 2. x ^ 0 -> x -// 3. 0 ^ y -> y -// -void FoldEOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -void FoldLeastSignificantByte(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantHalf(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldMostSignificantBit(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); -} - -void FoldMostSignificantWord(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - if (carry_inst) { - carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); - } - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); -} - -// Folds multiplication operations based on the following: -// -// 1. imm_x * imm_y -> result -// 2. x * 0 -> 0 -// 3. 0 * y -> 0 -// 4. x * 1 -> x -// 5. 1 * y -> y -// -void FoldMultiply(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -// Folds NOT operations if the contained value is an immediate. -void FoldNOT(IR::Inst& inst, bool is_32_bit) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - const u64 result = ~operand.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -// Folds OR operations based on the following: -// -// 1. imm_x | imm_y -> result -// 2. x | 0 -> x -// 3. 0 | y -> y -// -void FoldOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -bool FoldShifts(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - // The 32-bit variants can contain 3 arguments, while the - // 64-bit variants only contain 2. - if (inst.NumArgs() == 3 && !carry_inst) { - inst.SetArg(2, IR::Value(false)); - } - - const auto shift_amount = inst.GetArg(1); - - if (shift_amount.IsZero()) { - if (carry_inst) { - carry_inst->ReplaceUsesWith(inst.GetArg(2)); - } - inst.ReplaceUsesWith(inst.GetArg(0)); - return false; - } - - if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { - inst.SetArg(2, IR::Value(false)); - } - - if (!inst.AreAllArgsImmediates() || carry_inst) { - return false; - } - - return true; -} - -void FoldSignExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSignExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSub(IR::Inst& inst, bool is_32_bit) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -void FoldZeroExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldZeroExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{value}); -} -} // Anonymous namespace - -void ConstantPropagation(IR::Block& block) { - for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - - switch (opcode) { - case Op::LeastSignificantWord: - FoldLeastSignificantWord(inst); - break; - case Op::MostSignificantWord: - FoldMostSignificantWord(inst); - break; - case Op::LeastSignificantHalf: - FoldLeastSignificantHalf(inst); - break; - case Op::LeastSignificantByte: - FoldLeastSignificantByte(inst); - break; - case Op::MostSignificantBit: - FoldMostSignificantBit(inst); - break; - case Op::IsZero32: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); - } - break; - case Op::IsZero64: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); - } - break; - case Op::LogicalShiftLeft32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeft64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeftMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftLeftMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::LogicalShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::ArithmeticShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::ArithmeticShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::RotateRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); - } - break; - case Op::RotateRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); - } - break; - case Op::Add32: - case Op::Add64: - FoldAdd(inst, opcode == Op::Add32); - break; - case Op::Sub32: - case Op::Sub64: - FoldSub(inst, opcode == Op::Sub32); - break; - case Op::Mul32: - case Op::Mul64: - FoldMultiply(inst, opcode == Op::Mul32); - break; - case Op::SignedDiv32: - case Op::SignedDiv64: - FoldDivide(inst, opcode == Op::SignedDiv32, true); - break; - case Op::UnsignedDiv32: - case Op::UnsignedDiv64: - FoldDivide(inst, opcode == Op::UnsignedDiv32, false); - break; - case Op::And32: - case Op::And64: - FoldAND(inst, opcode == Op::And32); - break; - case Op::Eor32: - case Op::Eor64: - FoldEOR(inst, opcode == Op::Eor32); - break; - case Op::Or32: - case Op::Or64: - FoldOR(inst, opcode == Op::Or32); - break; - case Op::Not32: - case Op::Not64: - FoldNOT(inst, opcode == Op::Not32); - break; - case Op::SignExtendByteToWord: - case Op::SignExtendHalfToWord: - FoldSignExtendXToWord(inst); - break; - case Op::SignExtendByteToLong: - case Op::SignExtendHalfToLong: - case Op::SignExtendWordToLong: - FoldSignExtendXToLong(inst); - break; - case Op::ZeroExtendByteToWord: - case Op::ZeroExtendHalfToWord: - FoldZeroExtendXToWord(inst); - break; - case Op::ZeroExtendByteToLong: - case Op::ZeroExtendHalfToLong: - case Op::ZeroExtendWordToLong: - FoldZeroExtendXToLong(inst); - break; - case Op::ByteReverseWord: - case Op::ByteReverseHalf: - case Op::ByteReverseDual: - FoldByteReverse(inst, opcode); - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp deleted file mode 100644 index bda9f6efd1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void DeadCodeElimination(IR::Block& block) { - // We iterate over the instructions in reverse order. - // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp deleted file mode 100644 index e87fcc335b..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void IdentityRemovalPass(IR::Block& block) { - std::vector to_invalidate; - - auto iter = block.begin(); - while (iter != block.end()) { - IR::Inst& inst = *iter; - - const size_t num_args = inst.NumArgs(); - for (size_t i = 0; i < num_args; i++) { - while (true) { - IR::Value arg = inst.GetArg(i); - if (!arg.IsIdentity()) - break; - inst.SetArg(i, arg.GetInst()->GetArg(0)); - } - } - - if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { - iter = block.Instructions().erase(inst); - to_invalidate.push_back(&inst); - } else { - ++iter; - } - } - - for (IR::Inst* inst : to_invalidate) { - inst->Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h b/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h deleted file mode 100644 index 5eb1a55100..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h +++ /dev/null @@ -1,127 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization::IRMatcher { - -struct CaptureValue { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value); - } -}; - -struct CaptureInst { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return std::tuple(value.GetInstRecursive()); - } -}; - -struct CaptureUImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsU64()); - } -}; - -struct CaptureSImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsS64()); - } -}; - -template -struct UImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsU64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct SImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsS64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct Inst { -public: - using ReturnType = mp::concat, typename Args::ReturnType...>; - - static std::optional Match(const IR::Inst& inst) { - if (inst.GetOpcode() != Opcode) - return std::nullopt; - if (inst.HasAssociatedPseudoOperation()) - return std::nullopt; - return MatchArgs<0>(inst); - } - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return Match(*value.GetInstRecursive()); - } - -private: - template - static auto MatchArgs(const IR::Inst& inst) -> std::optional>, std::tuple<>>>> { - if constexpr (I >= sizeof...(Args)) { - return std::tuple(); - } else { - using Arg = mp::get>; - - if (const auto arg = Arg::Match(inst.GetArg(I))) { - if (const auto rest = MatchArgs(inst)) { - return std::tuple_cat(*arg, *rest); - } - } - - return std::nullopt; - } - } -}; - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t); -} - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t); -} - -} // namespace Dynarmic::Optimization::IRMatcher diff --git a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp deleted file mode 100644 index a766bdc83f..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2023 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" - -namespace Dynarmic::Optimization { - -void NamingPass(IR::Block& block) { - unsigned name = 1; - for (auto& inst : block) { - inst.SetName(name++); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/passes.h b/src/dynarmic/src/dynarmic/ir/opt/passes.h deleted file mode 100644 index 703145b556..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/passes.h +++ /dev/null @@ -1,47 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -namespace Dynarmic::A32 { -struct UserCallbacks; -} - -namespace Dynarmic::A64 { -struct UserCallbacks; -struct UserConfig; -} // namespace Dynarmic::A64 - -namespace Dynarmic::IR { -class Block; -} - -namespace Dynarmic::Optimization { - -struct PolyfillOptions { - bool sha256 = false; - bool vector_multiply_widen = false; - - bool operator==(const PolyfillOptions&) const = default; -}; - -struct A32GetSetEliminationOptions { - bool convert_nzc_to_nz = false; - bool convert_nz_to_nzc = false; -}; - -void PolyfillPass(IR::Block& block, const PolyfillOptions& opt); -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb); -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt); -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf); -void A64GetSetElimination(IR::Block& block); -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb); -void ConstantPropagation(IR::Block& block); -void DeadCodeElimination(IR::Block& block); -void IdentityRemovalPass(IR::Block& block); -void VerificationPass(const IR::Block& block); -void NamingPass(IR::Block& block); - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp deleted file mode 100644 index 1aa3aea91e..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2022 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -namespace { - -void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - - const IR::U128 t = ir.VectorExtract(x, y, 32); - - IR::U128 result = ir.ZeroVector(); - for (size_t i = 0; i < 4; i++) { - const IR::U32 modified_element = [&] { - const IR::U32 element = ir.VectorGetElement(32, t, i); - const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); - const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); - const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); - }(); - - result = ir.VectorSetElement(32, result, i, modified_element); - } - result = ir.VectorAdd(32, result, x); - - inst.ReplaceUsesWith(result); -} - -void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 z = (IR::U128)inst.GetArg(2); - - const IR::U128 T0 = ir.VectorExtract(y, z, 32); - - const IR::U128 lower_half = [&] { - const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); - const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); - return ir.VectorZeroUpper(tmp5); - }(); - - const IR::U64 upper_half = [&] { - const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - - // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] - const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); - const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); - - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); - return ir.VectorGetElement(64, tmp5, 0); - }(); - - const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); - - inst.ReplaceUsesWith(result); -} - -IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Eor(ir.And(ir.Eor(y, z), x), z); -} - -IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); -} - -IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 x = (IR::U128)inst.GetArg(0); - IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 w = (IR::U128)inst.GetArg(2); - const bool part1 = inst.GetArg(3).GetU1(); - - for (size_t i = 0; i < 4; i++) { - const IR::U32 low_x = ir.VectorGetElement(32, x, 0); - const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); - const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); - const IR::U32 high_x = ir.VectorGetElement(32, x, 3); - - const IR::U32 low_y = ir.VectorGetElement(32, y, 0); - const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); - const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); - const IR::U32 high_y = ir.VectorGetElement(32, y, 3); - - const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); - const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); - - const IR::U32 t = [&] { - const IR::U32 w_element = ir.VectorGetElement(32, w, i); - const IR::U32 sig = SHAhashSIGMA1(ir, low_y); - - return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); - }(); - - const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); - const IR::U32 new_low_y = ir.Add(t, high_x); - - // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] - const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); - const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); - - x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); - y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); - } - - inst.ReplaceUsesWith(part1 ? x : y); -} - -template -void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 n = (IR::U128)inst.GetArg(0); - IR::U128 m = (IR::U128)inst.GetArg(1); - - const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); - const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); - - const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); - - inst.ReplaceUsesWith(result); -} - -} // namespace - -void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { - if (polyfill == PolyfillOptions{}) { - return; - } - - IR::IREmitter ir{block}; - - for (auto& inst : block) { - ir.SetInsertionPointBefore(&inst); - - switch (inst.GetOpcode()) { - case IR::Opcode::SHA256MessageSchedule0: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule0(ir, inst); - } - break; - case IR::Opcode::SHA256MessageSchedule1: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule1(ir, inst); - } - break; - case IR::Opcode::SHA256Hash: - if (polyfill.sha256) { - PolyfillSHA256Hash(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, false>(ir, inst); - } - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp deleted file mode 100644 index c6c2cff231..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/type.h" - -namespace Dynarmic::Optimization { - -void VerificationPass(const IR::Block& block) { - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const IR::Type t1 = inst.GetArg(i).GetType(); - const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); - if (!IR::AreTypesCompatible(t1, t2)) { - std::puts(IR::DumpBlock(block).c_str()); - ASSERT_FALSE("above block failed validation"); - } - } - } - - ankerl::unordered_dense::map actual_uses; - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const auto arg = inst.GetArg(i); - if (!arg.IsImmediate()) { - actual_uses[arg.GetInst()]++; - } - } - } - - for (const auto& pair : actual_uses) { - ASSERT(pair.first->UseCount() == pair.second); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp new file mode 100644 index 0000000000..383b915839 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -0,0 +1,1502 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include + +#include +#include "boost/container/small_vector.hpp" +#include "dynarmic/frontend/A32/a32_ir_emitter.h" +#include "dynarmic/frontend/A32/a32_location_descriptor.h" +#include "dynarmic/frontend/A32/a32_types.h" +#include "dynarmic/frontend/A64/a64_ir_emitter.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/A32/config.h" +#include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/optimization_flags.h" +#include "dynarmic/common/safe_ops.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/microinstruction.h" +#include "dynarmic/ir/opcodes.h" +#include "dynarmic/ir/opt_passes.h" +#include "dynarmic/ir/type.h" +#include "mcl/bit/swap.hpp" +#include "mcl/bit/rotate.hpp" +#include "mcl/iterator/reverse.hpp" + +namespace Dynarmic::Optimization { + +static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { + for (auto& inst : block) { + switch (inst.GetOpcode()) { + case IR::Opcode::A32ReadMemory8: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory16: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory32: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory64: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + default: + break; + } + } +} + +static void FlagsPass(IR::Block& block) { + using Iterator = std::reverse_iterator; + + struct FlagInfo { + bool set_not_required = false; + bool has_value_request = false; + Iterator value_request = {}; + }; + struct ValuelessFlagInfo { + bool set_not_required = false; + }; + ValuelessFlagInfo nzcvq; + ValuelessFlagInfo nzcv; + ValuelessFlagInfo nz; + FlagInfo c_flag; + FlagInfo ge; + + auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(value); + } + info.has_value_request = false; + + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_get = [](FlagInfo& info, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(IR::Value{&*inst}); + } + info.has_value_request = true; + info.value_request = inst; + }; + + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetCFlag: { + do_get(c_flag, inst); + break; + } + case IR::Opcode::A32SetCpsrNZCV: { + if (c_flag.has_value_request) { + ir.SetInsertionPointBefore(inst.base()); // base is one ahead + IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); + c_flag.value_request->ReplaceUsesWith(c); + c_flag.has_value_request = false; + break; // This case will be executed again because of the above + } + + do_set_valueless(nzcv, inst); + + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVRaw: { + if (c_flag.has_value_request) { + nzcv.set_not_required = false; + } + + do_set_valueless(nzcv, inst); + + nzcvq = {}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVQ: { + if (c_flag.has_value_request) { + nzcvq.set_not_required = false; + } + + do_set_valueless(nzcvq, inst); + + nzcv = {.set_not_required = true}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZ: { + do_set_valueless(nz, inst); + + nzcvq = {}; + nzcv = {}; + break; + } + case IR::Opcode::A32SetCpsrNZC: { + if (c_flag.has_value_request) { + c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); + c_flag.has_value_request = false; + } + + if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { + const auto nz_value = inst->GetArg(0); + + inst->Invalidate(); + + ir.SetInsertionPointBefore(inst.base()); + ir.SetCpsrNZ(IR::NZCV{nz_value}); + + nzcvq = {}; + nzcv = {}; + nz = {.set_not_required = true}; + break; + } + + if (nz.set_not_required && c_flag.set_not_required) { + inst->Invalidate(); + } else if (nz.set_not_required) { + inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); + } + nz.set_not_required = true; + c_flag.set_not_required = true; + + nzcv = {}; + nzcvq = {}; + break; + } + case IR::Opcode::A32SetGEFlags: { + do_set(ge, inst->GetArg(0), inst); + break; + } + case IR::Opcode::A32GetGEFlags: { + do_get(ge, inst); + break; + } + case IR::Opcode::A32SetGEFlagsCompressed: { + ge = {.set_not_required = true}; + break; + } + case IR::Opcode::A32OrQFlag: { + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcvq = {}; + nzcv = {}; + nz = {}; + c_flag = {}; + ge = {}; + } + break; + } + } + } +} + +static void RegisterPass(IR::Block& block) { + using Iterator = IR::Block::iterator; + + struct RegInfo { + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array reg_info; + + const auto do_get = [](RegInfo& info, Iterator get_inst) { + if (info.register_value.IsEmpty()) { + info.register_value = IR::Value(&*get_inst); + return; + } + get_inst->ReplaceUsesWith(info.register_value); + }; + + const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { + if (info.last_set_instruction) { + (*info.last_set_instruction)->Invalidate(); + } + info = { + .register_value = value, + .last_set_instruction = set_inst, + }; + }; + + enum class ExtValueType { + Empty, + Single, + Double, + VectorDouble, + VectorQuad, + }; + struct ExtRegInfo { + ExtValueType value_type = {}; + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array ext_reg_info; + + const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { + if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = IR::Value(&*get_inst), + .last_set_instruction = std::nullopt, + }; + } + return; + } + get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); + }; + + const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { + if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + if (std::data(infos)[0].get().last_set_instruction) { + (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); + } + } + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = value, + .last_set_instruction = set_inst, + }; + } + }; + + // Location and version don't matter here. + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + ASSERT(reg != A32::Reg::PC); + const size_t reg_index = size_t(reg); + do_get(reg_info[reg_index], inst); + break; + } + case IR::Opcode::A32SetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + if (reg == A32::Reg::PC) { + break; + } + const auto reg_index = size_t(reg); + do_set(reg_info[reg_index], inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); + break; + } + case IR::Opcode::A32SetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + break; + } + case IR::Opcode::A32SetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst->GetArg(1), + inst); + break; + } + case IR::Opcode::A32GetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + do_ext_get(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_get(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst); + } + break; + } + case IR::Opcode::A32SetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + ir.SetInsertionPointAfter(inst); + const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); + do_ext_set(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + stored_value, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_set(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst->GetArg(1), + inst); + } + break; + } + default: { + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + ext_reg_info = {}; + } + break; + } + } + } +} + +struct A32GetSetEliminationOptions { + bool convert_nzc_to_nz = false; + bool convert_nz_to_nzc = false; +}; + +static void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { + FlagsPass(block); + RegisterPass(block); +} + +static void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { + if (conf.hook_data_cache_operations) { + return; + } + + for (auto& inst : block) { + if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { + continue; + } + + const auto op = static_cast(inst.GetArg(1).GetU64()); + if (op == A64::DataCacheOperation::ZeroByVA) { + A64::IREmitter ir{block}; + ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; + ir.SetInsertionPointBefore(&inst); + + size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); + IR::U64 addr{inst.GetArg(2)}; + + const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); + while (bytes >= 16) { + ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(16)); + bytes -= 16; + } + + while (bytes >= 8) { + ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(8)); + bytes -= 8; + } + + while (bytes >= 4) { + ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(4)); + bytes -= 4; + } + } + inst.Invalidate(); + } +} + +static void A64GetSetElimination(IR::Block& block) { + using Iterator = IR::Block::iterator; + + enum class TrackingType { + W, + X, + S, + D, + Q, + SP, + NZCV, + NZCVRaw, + }; + struct RegisterInfo { + IR::Value register_value; + TrackingType tracking_type; + bool set_instruction_present = false; + Iterator last_set_instruction; + }; + std::array reg_info; + std::array vec_info; + RegisterInfo sp_info; + RegisterInfo nzcv_info; + + const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { + if (info.set_instruction_present) { + info.last_set_instruction->Invalidate(); + block.Instructions().erase(info.last_set_instruction); + } + + info.register_value = value; + info.tracking_type = tracking_type; + info.set_instruction_present = true; + info.last_set_instruction = set_inst; + }; + + const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { + const auto do_nothing = [&] { + info = {}; + info.register_value = IR::Value(&*get_inst); + info.tracking_type = tracking_type; + }; + + if (info.register_value.IsEmpty()) { + do_nothing(); + return; + } + + if (info.tracking_type == tracking_type) { + get_inst->ReplaceUsesWith(info.register_value); + return; + } + + do_nothing(); + }; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A64GetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::W); + break; + } + case IR::Opcode::A64GetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::X); + break; + } + case IR::Opcode::A64GetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::S); + break; + } + case IR::Opcode::A64GetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::D); + break; + } + case IR::Opcode::A64GetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64GetSP: { + do_get(sp_info, inst, TrackingType::SP); + break; + } + case IR::Opcode::A64GetNZCVRaw: { + do_get(nzcv_info, inst, TrackingType::NZCVRaw); + break; + } + case IR::Opcode::A64SetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); + break; + } + case IR::Opcode::A64SetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); + break; + } + case IR::Opcode::A64SetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); + break; + } + case IR::Opcode::A64SetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); + break; + } + case IR::Opcode::A64SetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64SetSP: { + do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); + break; + } + case IR::Opcode::A64SetNZCV: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); + break; + } + case IR::Opcode::A64SetNZCVRaw: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcv_info = {}; + } + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + vec_info = {}; + sp_info = {}; + } + break; + } + } + } +} + +static void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { + const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { + const auto instruction = cb->MemoryReadCode(location.PC()); + if (!instruction) + return false; + + IR::Block new_block{location}; + A64::TranslateSingleInstruction(new_block, location, *instruction); + + if (!new_block.Instructions().empty()) + return false; + + const IR::Terminal terminal = new_block.GetTerminal(); + if (auto term = boost::get(&terminal)) { + return term->next == location; + } + + return false; + }; + + IR::Terminal terminal = block.GetTerminal(); + auto term = boost::get(&terminal); + if (!term) + return; + + A64::LocationDescriptor location{term->next}; + size_t num_instructions = 1; + + while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { + num_instructions++; + } + + term->num_instructions = num_instructions; + block.ReplaceTerminal(terminal); + block.CycleCount() += num_instructions - 1; +} + +using Op = Dynarmic::IR::Opcode; + +// Tiny helper to avoid the need to store based off the opcode +// bit size all over the place within folding functions. +static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { + if (is_32_bit) { + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + } else { + inst.ReplaceUsesWith(IR::Value{value}); + } +} + +static IR::Value Value(bool is_32_bit, u64 value) { + return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; +} + +template +static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + + const bool is_lhs_immediate = lhs.IsImmediate(); + const bool is_rhs_immediate = rhs.IsImmediate(); + + if (is_lhs_immediate && is_rhs_immediate) { + const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); + ReplaceUsesWith(inst, is_32_bit, result); + return false; + } + + if (is_lhs_immediate && !is_rhs_immediate) { + const IR::Inst* rhs_inst = rhs.GetInstRecursive(); + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, rhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } + } + + return true; +} + +static void FoldAdd(IR::Inst& inst, bool is_32_bit) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + if (lhs.IsImmediate() && !rhs.IsImmediate()) { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + FoldAdd(inst, is_32_bit); + return; + } + + if (inst.HasAssociatedPseudoOperation()) { + return; + } + + if (!lhs.IsImmediate() && rhs.IsImmediate()) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { + const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); + if (combined == 0) { + inst.ReplaceUsesWith(lhs_inst->GetArg(0)); + return; + } + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + return; + } + if (rhs.IsZero() && carry.IsZero()) { + inst.ReplaceUsesWith(lhs); + return; + } + } + + if (inst.AreAllArgsImmediates()) { + const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); + return; + } +} + +/// Folds AND operations based on the following: +/// +/// 1. imm_x & imm_y -> result +/// 2. x & 0 -> 0 +/// 3. 0 & y -> 0 +/// 4. x & y -> y (where x has all bits set to 1) +/// 5. x & y -> x (where y has all bits set to 1) +/// +static void FoldAND(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.HasAllBitsSet()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +/// Folds byte reversal opcodes based on the following: +/// +/// 1. imm -> swap(imm) +/// +static void FoldByteReverse(IR::Inst& inst, Op op) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + if (op == Op::ByteReverseWord) { + const u32 result = mcl::bit::swap_bytes_32(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else if (op == Op::ByteReverseHalf) { + const u16 result = mcl::bit::swap_bytes_16(u16(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } +} + +/// Folds division operations based on the following: +/// +/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) +/// 2. imm_x / imm_y -> result +/// 3. x / 1 -> x +/// +static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { + const auto rhs = inst.GetArg(1); + + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + return; + } + + const auto lhs = inst.GetArg(0); + if (lhs.IsImmediate() && rhs.IsImmediate()) { + if (is_signed) { + const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); + ReplaceUsesWith(inst, is_32_bit, static_cast(result)); + } else { + const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); + } + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(IR::Value{lhs}); + } +} + +// Folds EOR operations based on the following: +// +// 1. imm_x ^ imm_y -> result +// 2. x ^ 0 -> x +// 3. 0 ^ y -> y +// +static void FoldEOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static void FoldLeastSignificantByte(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantHalf(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldMostSignificantBit(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); +} + +static void FoldMostSignificantWord(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + if (carry_inst) { + carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); + } + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); +} + +// Folds multiplication operations based on the following: +// +// 1. imm_x * imm_y -> result +// 2. x * 0 -> 0 +// 3. 0 * y -> 0 +// 4. x * 1 -> x +// 5. 1 * y -> y +// +static void FoldMultiply(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +// Folds NOT operations if the contained value is an immediate. +static void FoldNOT(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + const u64 result = ~operand.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +// Folds OR operations based on the following: +// +// 1. imm_x | imm_y -> result +// 2. x | 0 -> x +// 3. 0 | y -> y +// +static void FoldOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static bool FoldShifts(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + // The 32-bit variants can contain 3 arguments, while the + // 64-bit variants only contain 2. + if (inst.NumArgs() == 3 && !carry_inst) { + inst.SetArg(2, IR::Value(false)); + } + + const auto shift_amount = inst.GetArg(1); + + if (shift_amount.IsZero()) { + if (carry_inst) { + carry_inst->ReplaceUsesWith(inst.GetArg(2)); + } + inst.ReplaceUsesWith(inst.GetArg(0)); + return false; + } + + if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { + inst.SetArg(2, IR::Value(false)); + } + + if (!inst.AreAllArgsImmediates() || carry_inst) { + return false; + } + + return true; +} + +static void FoldSignExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSignExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSub(IR::Inst& inst, bool is_32_bit) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return; + } + + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +static void FoldZeroExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldZeroExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{value}); +} + +static void ConstantPropagation(IR::Block& block) { + for (auto& inst : block) { + const auto opcode = inst.GetOpcode(); + + switch (opcode) { + case Op::LeastSignificantWord: + FoldLeastSignificantWord(inst); + break; + case Op::MostSignificantWord: + FoldMostSignificantWord(inst); + break; + case Op::LeastSignificantHalf: + FoldLeastSignificantHalf(inst); + break; + case Op::LeastSignificantByte: + FoldLeastSignificantByte(inst); + break; + case Op::MostSignificantBit: + FoldMostSignificantBit(inst); + break; + case Op::IsZero32: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); + } + break; + case Op::IsZero64: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); + } + break; + case Op::LogicalShiftLeft32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeft64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeftMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftLeftMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::LogicalShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::ArithmeticShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::ArithmeticShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::RotateRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); + } + break; + case Op::RotateRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); + } + break; + case Op::Add32: + case Op::Add64: + FoldAdd(inst, opcode == Op::Add32); + break; + case Op::Sub32: + case Op::Sub64: + FoldSub(inst, opcode == Op::Sub32); + break; + case Op::Mul32: + case Op::Mul64: + FoldMultiply(inst, opcode == Op::Mul32); + break; + case Op::SignedDiv32: + case Op::SignedDiv64: + FoldDivide(inst, opcode == Op::SignedDiv32, true); + break; + case Op::UnsignedDiv32: + case Op::UnsignedDiv64: + FoldDivide(inst, opcode == Op::UnsignedDiv32, false); + break; + case Op::And32: + case Op::And64: + FoldAND(inst, opcode == Op::And32); + break; + case Op::Eor32: + case Op::Eor64: + FoldEOR(inst, opcode == Op::Eor32); + break; + case Op::Or32: + case Op::Or64: + FoldOR(inst, opcode == Op::Or32); + break; + case Op::Not32: + case Op::Not64: + FoldNOT(inst, opcode == Op::Not32); + break; + case Op::SignExtendByteToWord: + case Op::SignExtendHalfToWord: + FoldSignExtendXToWord(inst); + break; + case Op::SignExtendByteToLong: + case Op::SignExtendHalfToLong: + case Op::SignExtendWordToLong: + FoldSignExtendXToLong(inst); + break; + case Op::ZeroExtendByteToWord: + case Op::ZeroExtendHalfToWord: + FoldZeroExtendXToWord(inst); + break; + case Op::ZeroExtendByteToLong: + case Op::ZeroExtendHalfToLong: + case Op::ZeroExtendWordToLong: + FoldZeroExtendXToLong(inst); + break; + case Op::ByteReverseWord: + case Op::ByteReverseHalf: + case Op::ByteReverseDual: + FoldByteReverse(inst, opcode); + break; + default: + break; + } + } +} + +static void DeadCodeElimination(IR::Block& block) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (auto& inst : mcl::iterator::reverse(block)) { + if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { + inst.Invalidate(); + } + } +} + +static void IdentityRemovalPass(IR::Block& block) { + boost::container::small_vector to_invalidate; + + auto iter = block.begin(); + while (iter != block.end()) { + IR::Inst& inst = *iter; + + const size_t num_args = inst.NumArgs(); + for (size_t i = 0; i < num_args; i++) { + while (true) { + IR::Value arg = inst.GetArg(i); + if (!arg.IsIdentity()) + break; + inst.SetArg(i, arg.GetInst()->GetArg(0)); + } + } + + if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { + iter = block.Instructions().erase(inst); + to_invalidate.push_back(&inst); + } else { + ++iter; + } + } + for (IR::Inst* inst : to_invalidate) { + inst->Invalidate(); + } +} + +static void NamingPass(IR::Block& block) { + u32 name = 1; + for (auto& inst : block) + inst.SetName(name++); +} + +static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + + const IR::U128 t = ir.VectorExtract(x, y, 32); + + IR::U128 result = ir.ZeroVector(); + for (size_t i = 0; i < 4; i++) { + const IR::U32 modified_element = [&] { + const IR::U32 element = ir.VectorGetElement(32, t, i); + const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); + const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); + const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); + }(); + + result = ir.VectorSetElement(32, result, i, modified_element); + } + result = ir.VectorAdd(32, result, x); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 z = (IR::U128)inst.GetArg(2); + + const IR::U128 T0 = ir.VectorExtract(y, z, 32); + + const IR::U128 lower_half = [&] { + const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); + const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); + return ir.VectorZeroUpper(tmp5); + }(); + + const IR::U64 upper_half = [&] { + const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + + // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] + const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); + const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); + + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); + return ir.VectorGetElement(64, tmp5, 0); + }(); + + const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); + + inst.ReplaceUsesWith(result); +} + +static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Eor(ir.And(ir.Eor(y, z), x), z); +} + +static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); +} + +static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 x = (IR::U128)inst.GetArg(0); + IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 w = (IR::U128)inst.GetArg(2); + const bool part1 = inst.GetArg(3).GetU1(); + + for (size_t i = 0; i < 4; i++) { + const IR::U32 low_x = ir.VectorGetElement(32, x, 0); + const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); + const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); + const IR::U32 high_x = ir.VectorGetElement(32, x, 3); + + const IR::U32 low_y = ir.VectorGetElement(32, y, 0); + const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); + const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); + const IR::U32 high_y = ir.VectorGetElement(32, y, 3); + + const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); + const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); + + const IR::U32 t = [&] { + const IR::U32 w_element = ir.VectorGetElement(32, w, i); + const IR::U32 sig = SHAhashSIGMA1(ir, low_y); + + return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); + }(); + + const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); + const IR::U32 new_low_y = ir.Add(t, high_x); + + // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] + const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); + const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); + + x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); + y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); + } + + inst.ReplaceUsesWith(part1 ? x : y); +} + +template +static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 n = (IR::U128)inst.GetArg(0); + IR::U128 m = (IR::U128)inst.GetArg(1); + + const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); + const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); + + const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { + if (polyfill == PolyfillOptions{}) { + return; + } + + IR::IREmitter ir{block}; + + for (auto& inst : block) { + ir.SetInsertionPointBefore(&inst); + + switch (inst.GetOpcode()) { + case IR::Opcode::SHA256MessageSchedule0: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule0(ir, inst); + } + break; + case IR::Opcode::SHA256MessageSchedule1: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule1(ir, inst); + } + break; + case IR::Opcode::SHA256Hash: + if (polyfill.sha256) { + PolyfillSHA256Hash(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, false>(ir, inst); + } + break; + default: + break; + } + } +} + +static void VerificationPass(const IR::Block& block) { + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) { + const IR::Type t1 = inst.GetArg(i).GetType(); + const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); + ASSERT_MSG(IR::AreTypesCompatible(t1, t2), "Block failed:\n{}", IR::DumpBlock(block)); + } + } + ankerl::unordered_dense::map actual_uses; + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) + if (IR::Value const arg = inst.GetArg(i); !arg.IsImmediate()) + actual_uses[arg.GetInst()]++; + } + for (auto const& pair : actual_uses) + ASSERT(pair.first->UseCount() == pair.second); +} + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) [[likely]] { + Optimization::A32GetSetElimination(block, {.convert_nzc_to_nz = true}); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + Optimization::IdentityRemovalPass(block); + Optimization::VerificationPass(block); +} + +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::A64CallbackConfigPass(block, conf); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) [[likely]] { + Optimization::A64GetSetElimination(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { + Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); + } + Optimization::VerificationPass(block); +} + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h new file mode 100644 index 0000000000..1963fad0a0 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -0,0 +1,34 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +namespace Dynarmic::A32 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::A64 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::IR { +class Block; +} + +namespace Dynarmic::Optimization { + +struct PolyfillOptions { + bool sha256 = false; + bool vector_multiply_widen = false; + + bool operator==(const PolyfillOptions&) const = default; +}; + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index 4d14141bbf..ad01e5718b 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -29,7 +29,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; @@ -179,13 +179,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 85d86c7966..29a8f23478 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -6,33 +6,24 @@ add_executable(dynarmic_tests fp/mantissa_util_tests.cpp fp/unpacked_tests.cpp rand_int.h + # A32 + A32/test_arm_disassembler.cpp + A32/test_arm_instructions.cpp + A32/test_coprocessor.cpp + A32/test_svc.cpp + A32/test_thumb_instructions.cpp + A32/testenv.h + decoder_tests.cpp + # A64 + A64/a64.cpp + A64/fibonacci.cpp + A64/fp_min_max.cpp + A64/misaligned_page_table.cpp + A64/test_invalidation.cpp + A64/real_world.cpp + A64/testenv.h ) - -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/test_arm_disassembler.cpp - A32/test_arm_instructions.cpp - A32/test_coprocessor.cpp - A32/test_svc.cpp - A32/test_thumb_instructions.cpp - A32/testenv.h - decoder_tests.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) - - target_sources(dynarmic_tests PRIVATE - A64/a64.cpp - A64/fibonacci.cpp - A64/fp_min_max.cpp - A64/misaligned_page_table.cpp - A64/test_invalidation.cpp - A64/real_world.cpp - A64/testenv.h - ) -endif() +target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) if (DYNARMIC_TESTS_USE_UNICORN) target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn) @@ -40,25 +31,17 @@ if (DYNARMIC_TESTS_USE_UNICORN) target_sources(dynarmic_tests PRIVATE fuzz_util.cpp fuzz_util.h + # A32 + A32/fuzz_arm.cpp + A32/fuzz_thumb.cpp + unicorn_emu/a32_unicorn.cpp + unicorn_emu/a32_unicorn.h + # A64 + A64/fuzz_with_unicorn.cpp + A64/verify_unicorn.cpp + unicorn_emu/a64_unicorn.cpp + unicorn_emu/a64_unicorn.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/fuzz_arm.cpp - A32/fuzz_thumb.cpp - unicorn_emu/a32_unicorn.cpp - unicorn_emu/a32_unicorn.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A64/fuzz_with_unicorn.cpp - A64/verify_unicorn.cpp - unicorn_emu/a64_unicorn.cpp - unicorn_emu/a64_unicorn.h - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -69,9 +52,6 @@ if ("x86_64" IN_LIST ARCHITECTURE) target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak) target_architecture_specific_sources(dynarmic_tests "x86_64" x64_cpu_info.cpp - ) - - target_architecture_specific_sources(dynarmic_tests "x86_64" native/preserve_xmm.cpp ) @@ -85,47 +65,47 @@ endif() include(CreateDirectoryGroups) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_print_info - print_info.cpp - ) +# +# dynarmic_print_info +# +add_executable(dynarmic_print_info + print_info.cpp +) +create_target_directory_groups(dynarmic_print_info) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_print_info PRIVATE . ../src) +target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_print_info) +# +# dynarmic_test_generator +# +add_executable(dynarmic_test_generator + fuzz_util.cpp + fuzz_util.h + test_generator.cpp +) - target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_print_info PRIVATE . ../src) - target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +create_target_directory_groups(dynarmic_test_generator) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_generator - fuzz_util.cpp - fuzz_util.h - test_generator.cpp - ) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_generator PRIVATE . ../src) +target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_test_generator) - - target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_generator PRIVATE . ../src) - target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() - -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_reader - test_reader.cpp - ) - - create_target_directory_groups(dynarmic_test_reader) - - target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_reader PRIVATE . ../src) - target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +# +# dynarmic_test_reader +# +add_executable(dynarmic_test_reader + test_reader.cpp +) +create_target_directory_groups(dynarmic_test_reader) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_reader PRIVATE . ../src) +target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) +# create_target_directory_groups(dynarmic_tests) target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index ef8b87bbd1..3263ca729a 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -34,7 +34,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; From bbbc4b4c96cb129c4e96c6207297739034282b6a Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:57:48 +0000 Subject: [PATCH 20/32] Fix license headers --- src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index 8e48fa3687..01af361b27 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index 2b50ad9ea3..c4c1c42792 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h index 1963fad0a0..88b8020031 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.h +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD From 20dbc3cbe61196cd58d4d8138018409da8d35385 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 00:11:40 +0000 Subject: [PATCH 21/32] [dynarmic] use small vector experiment Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index f70329f471..df1619d552 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -12,6 +12,7 @@ #include #include +#include "boost/container/small_vector.hpp" #include "dynarmic/common/common_types.h" #include #include @@ -83,7 +84,7 @@ public: void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial - std::vector values; //24 + boost::container::small_vector values; //24 // Block state uint16_t total_uses = 0; //8 //sometimes zeroed @@ -96,7 +97,7 @@ private: uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - alignas(16) uint8_t lru_counter = 0; //1 + uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From f6a3e31a2347818ac3357ea36f699a04405eee10 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:07 +0000 Subject: [PATCH 22/32] [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index fa6006ed2a..014c121be5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.push_back(inst); ASSERT(size_t(total_uses) + inst->UseCount() < std::numeric_limits::max()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); + max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); } void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index df1619d552..160c2ef098 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -78,7 +78,7 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } inline size_t GetMaxBitWidth() const noexcept { - return max_bit_width; + return 1 << max_bit_width; } void AddValue(IR::Inst* inst) noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; @@ -94,10 +94,10 @@ private: uint16_t is_being_used_count = 0; //8 uint16_t current_references = 0; //8 // Value state - uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 + uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6) + uint8_t lru_counter : 2 = 0; //1 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From 49c2789f4f7d94fb9424c8d5111bd6b5f8d293b8 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:22 +0000 Subject: [PATCH 23/32] [dynarmic] fix tests_reader and tests_generator Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 2 +- src/dynarmic/tests/A32/testenv.h | 1 - src/dynarmic/tests/A64/testenv.h | 1 - src/dynarmic/tests/CMakeLists.txt | 28 ++++++++++++++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index f35f22f15c..a61cecc740 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -371,7 +371,7 @@ target_link_libraries(dynarmic ) if (BOOST_NO_HEADERS) -target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) + target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) else() target_link_libraries(dynarmic PRIVATE Boost::headers) endif() diff --git a/src/dynarmic/tests/A32/testenv.h b/src/dynarmic/tests/A32/testenv.h index a6df2017ce..72eaafce14 100644 --- a/src/dynarmic/tests/A32/testenv.h +++ b/src/dynarmic/tests/A32/testenv.h @@ -17,7 +17,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A32/a32.h" -#include "../native/testenv.h" template class A32TestEnv : public Dynarmic::A32::UserCallbacks { diff --git a/src/dynarmic/tests/A64/testenv.h b/src/dynarmic/tests/A64/testenv.h index 31e338b138..fcdadb23e6 100644 --- a/src/dynarmic/tests/A64/testenv.h +++ b/src/dynarmic/tests/A64/testenv.h @@ -12,7 +12,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A64/a64.h" -#include "../native/testenv.h" using Vector = Dynarmic::A64::Vector; diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 29a8f23478..f8f420e2d6 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -72,7 +72,12 @@ add_executable(dynarmic_print_info print_info.cpp ) create_target_directory_groups(dynarmic_print_info) -target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_print_info PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_print_info PRIVATE . ../src) target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -88,7 +93,12 @@ add_executable(dynarmic_test_generator create_target_directory_groups(dynarmic_test_generator) -target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_generator PRIVATE . ../src) target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -100,7 +110,12 @@ add_executable(dynarmic_test_reader test_reader.cpp ) create_target_directory_groups(dynarmic_test_reader) -target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_reader PRIVATE . ../src) target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -108,7 +123,12 @@ target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LIT # create_target_directory_groups(dynarmic_tests) -target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) +target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_tests PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_tests PRIVATE . ../src) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) From 6d99502d4dffbb9a2aa90ae847b05b6d4c1ccb16 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 09:10:18 +0000 Subject: [PATCH 24/32] [docs] fastmem draft Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 5 +++++ src/dynarmic/docs/Fastmem.svg | 4 ++++ src/dynarmic/docs/HostToGuest.svg | 4 ++++ 3 files changed, 13 insertions(+) create mode 100644 src/dynarmic/docs/FastMemory.md create mode 100644 src/dynarmic/docs/Fastmem.svg create mode 100644 src/dynarmic/docs/HostToGuest.svg diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md new file mode 100644 index 0000000000..2a7c1a2583 --- /dev/null +++ b/src/dynarmic/docs/FastMemory.md @@ -0,0 +1,5 @@ +# Fast memory (Fastmem) + +![Host to guest translation](./HostToGuest.svg) + +![Fastmem translation](./Fastmem.svg) diff --git a/src/dynarmic/docs/Fastmem.svg b/src/dynarmic/docs/Fastmem.svg new file mode 100644 index 0000000000..a3ed0bb68b --- /dev/null +++ b/src/dynarmic/docs/Fastmem.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
SIGSEGV Trap
Fastmem
Only needs to linearly offset from fastmem arena
Less codegen (SIGSEGV traps)
Is fast only if SIGSEGV handlers are sufficiently fast
\ No newline at end of file diff --git a/src/dynarmic/docs/HostToGuest.svg b/src/dynarmic/docs/HostToGuest.svg new file mode 100644 index 0000000000..6a15a44b46 --- /dev/null +++ b/src/dynarmic/docs/HostToGuest.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
Resolver
Host to Guest translation
Looks up correct PTE
Translates each address 
Is slow
\ No newline at end of file From 6cd26bb9c99f5e1bfd5ba128c9674ed893807a40 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 10:16:22 +0000 Subject: [PATCH 25/32] [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 6 +- .../frontend/decoder/decoder_detail.h | 65 ++++++++----------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 2c2c54a1ad..3f7bf7f967 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,12 +210,8 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 438b7b691c..682b0cfbf6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,12 +269,8 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index bd76efda2a..4050033ef5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -70,11 +70,9 @@ struct detail { return std::make_tuple(mask, expect); } - /** - * Generates the masks and shifts for each argument. - * A '-' in a bitstring indicates that we don't care about that value. - * An argument is specified by a continuous string of the same character. - */ + /// @brief Generates the masks and shifts for each argument. + /// A '-' in a bitstring indicates that we don't care about that value. + /// An argument is specified by a continuous string of the same character. template static consteval auto GetArgInfo(std::array bitstring) { std::array masks = {}; @@ -98,7 +96,6 @@ struct detail { if constexpr (N > 0) { const size_t bit_position = opcode_bitsize - i - 1; - if (arg_index >= N) throw std::out_of_range("Unexpected field"); @@ -109,20 +106,16 @@ struct detail { } } } - #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) // Avoids a MSVC ICE, and avoids Android NDK issue. ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); #endif - return std::make_tuple(masks, shifts); } - /** - * This struct's Make member function generates a lambda which decodes an instruction based on - * the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a - * template argument. - */ + /// @brief This struct's Make member function generates a lambda which decodes an instruction + /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is + /// provided as a template argument. template struct VisitorCaller; @@ -130,36 +123,36 @@ struct detail { # pragma warning(push) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) #endif - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...), + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...), const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...) const, + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...) const, const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; @@ -167,22 +160,16 @@ struct detail { # pragma warning(pop) #endif - /** - * Creates a matcher that can match and parse instructions based on bitstring. - * See also: GetMaskAndExpect and GetArgInfo for format of bitstring. - */ - template - static auto GetMatcher(FnT fn, const char* const name) { - constexpr size_t args_count = mcl::parameter_count_v; - + /// @brief Creates a matcher that can match and parse instructions based on bitstring. + /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. + template + static constexpr auto GetMatcher(F fn, const char* const name) { + constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); - - using Iota = std::make_index_sequence; - - const auto proxy_fn = VisitorCaller::Make(Iota(), fn, arg_masks, arg_shifts); + const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); return MatcherT(name, mask, expect, proxy_fn); } }; From 1b21a05255f595da7a2040d13d5d405f19334a5d Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 4 Sep 2025 07:24:49 +0000 Subject: [PATCH 26/32] [dynarmic] reduce matcher table noise and cache misses Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/a32_types.h | 21 +++++---------- .../src/dynarmic/frontend/A32/decoder/asimd.h | 27 +++++++++---------- .../src/dynarmic/frontend/A32/decoder/vfp.h | 12 +++------ .../src/dynarmic/frontend/A64/decoder/a64.h | 23 +++++++--------- .../frontend/decoder/decoder_detail.h | 6 ++--- .../src/dynarmic/frontend/decoder/matcher.h | 11 ++------ 6 files changed, 38 insertions(+), 62 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index fdadef8257..2a0cc25751 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -9,12 +9,9 @@ #pragma once #include -#include - #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/ir/cond.h" @@ -89,23 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) { inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); - return static_cast(reg); + return size_t(reg); } inline size_t RegNumber(ExtReg reg) { if (IsSingleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::S0); + return size_t(reg) - size_t(ExtReg::S0); + } else if (IsDoubleExtReg(reg)) { + return size_t(reg) - size_t(ExtReg::D0); } - - if (IsDoubleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::D0); - } - - if (IsQuadExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::Q0); - } - - ASSERT_MSG(false, "Invalid extended register"); + ASSERT(IsQuadExtReg(reg)); + return size_t(reg) - size_t(ExtReg::Q0); } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f2e206695b..f63816d2b1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher; template std::vector> GetASIMDDecodeTable() { - std::vector> table = { - -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> table = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" #undef INST - }; - // Exceptions to the rule of thumb. const std::set comes_first{ "VBIC, VMOV, VMVN, VORR (immediate)", @@ -53,19 +50,21 @@ std::vector> GetASIMDDecodeTable() { "VQDMULH (scalar)", "VQRDMULH (scalar)", }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_first.count(matcher.GetName()) > 0; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_last.count(matcher.GetName()) == 0; + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - - return table; + std::vector> final_table; + std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + return e.second; + }); + return final_table; } template diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index 5fcacd2bda..a969570547 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -30,22 +30,18 @@ std::optional>> DecodeVFP(u32 instruc static const struct Tables { Table unconditional; Table conditional; - } tables = [] { + } tables = []() { Table list = { - #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./vfp.inc" #undef INST - }; - - const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return (matcher.GetMask() & 0xF0000000) == 0xF0000000; }); - return Tables{ - Table{list.begin(), division}, - Table{division, list.end()}, + Table{list.begin(), it}, + Table{it, list.end()}, }; }(); diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c651dd7cde..c7bc981ac6 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) { template constexpr DecodeTable GetDecodeTable() { - std::vector> list = { -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" #undef INST }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { return std::set{ "MOVI, MVNI, ORR, BIC (vector, immediate)", "FMOV (vector, immediate)", "Unallocated SIMD modified immediate", - }.count(matcher.GetName()) > 0; + }.count(e.first) > 0; }); - DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); + for (auto const& e : list) { + const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e.second); } } } diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 4050033ef5..0d9da6169c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -163,18 +163,18 @@ struct detail { /// @brief Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template - static constexpr auto GetMatcher(F fn, const char* const name) { + static constexpr auto GetMatcher(F fn) { constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); - return MatcherT(name, mask, expect, proxy_fn); + return MatcherT(mask, expect, proxy_fn); } }; -#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn, name) +#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn) } // namespace detail } // namespace Dynarmic::Decoder diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 7e5c9c5a8f..5a2afe57c5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -31,14 +31,8 @@ public: using visitor_type = Visitor; using handler_return_type = typename Visitor::instruction_return_type; using handler_function = std::function; - - Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) - : name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {} - - /// Gets the name of this type of instruction. - const char* GetName() const { - return name; - } + Matcher(opcode_type mask, opcode_type expected, handler_function func) + : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. opcode_type GetMask() const { @@ -70,7 +64,6 @@ public: } private: - const char* name; opcode_type mask; opcode_type expected; handler_function fn; From a25ab193e86f532c46c4228d0076c6ddda1e4583 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 20:59:23 +0000 Subject: [PATCH 27/32] [dynarmic] fix tests Signed-off-by: lizzie --- .../src/dynarmic/frontend/decoder/matcher.h | 40 +++++++----------- src/dynarmic/tests/A32/fuzz_arm.cpp | 1 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 1 + .../tests/A32/test_arm_instructions.cpp | 1 + src/dynarmic/tests/A32/test_coprocessor.cpp | 1 + src/dynarmic/tests/A32/test_svc.cpp | 1 + .../tests/A32/test_thumb_instructions.cpp | 1 + src/dynarmic/tests/A64/a64.cpp | 1 + src/dynarmic/tests/A64/fp_min_max.cpp | 1 + src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 1 + .../tests/A64/misaligned_page_table.cpp | 1 + src/dynarmic/tests/A64/real_world.cpp | 1 + src/dynarmic/tests/A64/test_invalidation.cpp | 1 + src/dynarmic/tests/decoder_tests.cpp | 2 + src/dynarmic/tests/native/preserve_xmm.cpp | 1 + src/dynarmic/tests/print_info.cpp | 42 ++++--------------- 16 files changed, 39 insertions(+), 58 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 5a2afe57c5..f7e2884e0c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -14,16 +14,12 @@ namespace Dynarmic::Decoder { -/** - * Generic instruction handling construct. - * - * @tparam Visitor An arbitrary visitor type that will be passed through - * to the function being handled. This type must be the - * type of the first parameter in a handler function. - * - * @tparam OpcodeType Type representing an opcode. This must be the - * type of the second parameter in a handler function. - */ +/// Generic instruction handling construct. +/// @tparam Visitor An arbitrary visitor type that will be passed through +/// to the function being handled. This type must be the +/// type of the first parameter in a handler function. +/// @tparam OpcodeType Type representing an opcode. This must be the +/// type of the second parameter in a handler function. template class Matcher { public: @@ -35,30 +31,26 @@ public: : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. - opcode_type GetMask() const { + inline opcode_type GetMask() const noexcept { return mask; } /// Gets the expected value after masking for this instruction. - opcode_type GetExpected() const { + inline opcode_type GetExpected() const noexcept { return expected; } - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - bool Matches(opcode_type instruction) const { + /// Tests to see if the given instruction is the instruction this matcher represents. + /// @param instruction The instruction to test + /// @returns true if the given instruction matches. + inline bool Matches(opcode_type instruction) const noexcept { return (instruction & mask) == expected; } - /** - * Calls the corresponding instruction handler on visitor for this type of instruction. - * @param v The visitor to use - * @param instruction The instruction to decode. - */ - handler_return_type call(Visitor& v, opcode_type instruction) const { + /// Calls the corresponding instruction handler on visitor for this type of instruction. + /// @param v The visitor to use + /// @param instruction The instruction to decode. + inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept { ASSERT(Matches(instruction)); return fn(v, instruction); } diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index 087ce54813..5ee6d2bf02 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -24,6 +24,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index ad01e5718b..7f64cb0ccb 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -22,6 +22,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/FPSCR.h" #include "dynarmic/frontend/A32/PSR.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" diff --git a/src/dynarmic/tests/A32/test_arm_instructions.cpp b/src/dynarmic/tests/A32/test_arm_instructions.cpp index 0411877823..c007a18299 100644 --- a/src/dynarmic/tests/A32/test_arm_instructions.cpp +++ b/src/dynarmic/tests/A32/test_arm_instructions.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" diff --git a/src/dynarmic/tests/A32/test_coprocessor.cpp b/src/dynarmic/tests/A32/test_coprocessor.cpp index 98da0e5d34..49cb42bdf3 100644 --- a/src/dynarmic/tests/A32/test_coprocessor.cpp +++ b/src/dynarmic/tests/A32/test_coprocessor.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/tests/A32/test_svc.cpp b/src/dynarmic/tests/A32/test_svc.cpp index 8b55d6537c..998566130e 100644 --- a/src/dynarmic/tests/A32/test_svc.cpp +++ b/src/dynarmic/tests/A32/test_svc.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A32/test_thumb_instructions.cpp b/src/dynarmic/tests/A32/test_thumb_instructions.cpp index 3501d5419f..d509acdd8d 100644 --- a/src/dynarmic/tests/A32/test_thumb_instructions.cpp +++ b/src/dynarmic/tests/A32/test_thumb_instructions.cpp @@ -10,6 +10,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A32/a32.h" static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) { diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index 40eff1f071..24e92d1210 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -7,6 +7,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/A64/fp_min_max.cpp b/src/dynarmic/tests/A64/fp_min_max.cpp index d8b45db807..1669b63071 100644 --- a/src/dynarmic/tests/A64/fp_min_max.cpp +++ b/src/dynarmic/tests/A64/fp_min_max.cpp @@ -12,6 +12,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp index 3322af06e7..45ba728917 100644 --- a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp +++ b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp @@ -19,6 +19,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a64_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A64/misaligned_page_table.cpp b/src/dynarmic/tests/A64/misaligned_page_table.cpp index 8235e14a67..ecba5a3efa 100644 --- a/src/dynarmic/tests/A64/misaligned_page_table.cpp +++ b/src/dynarmic/tests/A64/misaligned_page_table.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_access_via_page_table is set", "[a64]") { diff --git a/src/dynarmic/tests/A64/real_world.cpp b/src/dynarmic/tests/A64/real_world.cpp index 07532d95af..a083f16d61 100644 --- a/src/dynarmic/tests/A64/real_world.cpp +++ b/src/dynarmic/tests/A64/real_world.cpp @@ -5,6 +5,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/test_invalidation.cpp b/src/dynarmic/tests/A64/test_invalidation.cpp index 168043c1cb..8a63776fa4 100644 --- a/src/dynarmic/tests/A64/test_invalidation.cpp +++ b/src/dynarmic/tests/A64/test_invalidation.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index e545309960..777ea8b510 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,6 +20,7 @@ using namespace Dynarmic; +/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -76,3 +77,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { } while (x != 0); } } +*/ diff --git a/src/dynarmic/tests/native/preserve_xmm.cpp b/src/dynarmic/tests/native/preserve_xmm.cpp index 0f69697b7a..7421252063 100644 --- a/src/dynarmic/tests/native/preserve_xmm.cpp +++ b/src/dynarmic/tests/native/preserve_xmm.cpp @@ -6,6 +6,7 @@ #include #include "../A64/testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 3263ca729a..0c89e780ad 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -32,6 +32,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/frontend/A64/translate/impl/impl.h" #include "dynarmic/interface/A32/a32.h" +#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/opt_passes.h" @@ -39,20 +40,20 @@ using namespace Dynarmic; const char* GetNameOfA32Instruction(u32 instruction) { - if (auto vfp_decoder = A32::DecodeVFP(instruction)) { + /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { return vfp_decoder->get().GetName(); } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { return asimd_decoder->get().GetName(); } else if (auto decoder = A32::DecodeArm(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } const char* GetNameOfA64Instruction(u32 instruction) { - if (auto decoder = A64::Decode(instruction)) { + /*if (auto decoder = A64::Decode(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } @@ -64,18 +65,9 @@ void PrintA32Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -88,18 +80,9 @@ void PrintA64Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A64::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A64::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -115,18 +98,9 @@ void PrintThumbInstruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } From b77aa7c2d1dd9ccdf026eb5d78b6bfbc75d726bb Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:07:18 +0000 Subject: [PATCH 28/32] [dynarmic] fix ASIMD execution Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f63816d2b1..2cea7a14c1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -61,7 +61,7 @@ std::vector> GetASIMDDecodeTable() { return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); std::vector> final_table; - std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) { return e.second; }); return final_table; From 2f99bee38a746e5be8e773890671eb45f90a064e Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:40:13 +0000 Subject: [PATCH 29/32] [dynarmic] add back encoding names (for print_info) Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/decoder/arm.h | 20 ++++++++++--- .../src/dynarmic/frontend/A32/decoder/asimd.h | 26 ++++++++++++----- .../dynarmic/frontend/A32/decoder/thumb16.h | 24 +++++++++++----- .../dynarmic/frontend/A32/decoder/thumb32.h | 24 +++++++++++----- .../src/dynarmic/frontend/A32/decoder/vfp.h | 24 +++++++++++----- .../A32/translate/translate_thumb.cpp | 2 +- .../src/dynarmic/frontend/A64/decoder/a64.h | 20 ++++++++++--- src/dynarmic/tests/decoder_tests.cpp | 28 ++++++------------- src/dynarmic/tests/print_info.cpp | 22 +++++++-------- 9 files changed, 122 insertions(+), 68 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index 0257c28ddb..c6f034ae21 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -30,13 +30,13 @@ template using ArmDecodeTable = std::array>, 0x1000>; namespace detail { -inline size_t ToFastLookupIndexArm(u32 instruction) { +inline size_t ToFastLookupIndexArm(u32 instruction) noexcept { return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0); } } // namespace detail template -constexpr ArmDecodeTable GetArmDecodeTable() { +constexpr ArmDecodeTable GetArmDecodeTable() noexcept { std::vector> list = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" @@ -62,15 +62,27 @@ constexpr ArmDecodeTable GetArmDecodeTable() { } template -std::optional>> DecodeArm(u32 instruction) { +std::optional>> DecodeArm(u32 instruction) noexcept { alignas(64) static const auto table = GetArmDecodeTable(); const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameARM(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./arm.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index 2cea7a14c1..57e1392871 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -26,7 +26,7 @@ template using ASIMDMatcher = Decoder::Matcher; template -std::vector> GetASIMDDecodeTable() { +std::vector> GetASIMDDecodeTable() noexcept { std::vector>> table = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" @@ -68,13 +68,25 @@ std::vector> GetASIMDDecodeTable() { } template -std::optional>> DecodeASIMD(u32 instruction) { - static const auto table = GetASIMDDecodeTable(); - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); +std::optional>> DecodeASIMD(u32 instruction) noexcept { + alignas(64) static const auto table = GetASIMDDecodeTable(); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameASIMD(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./asimd.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h index 8073ee5d47..16b99ba5aa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h @@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher; template std::optional>> DecodeThumb16(u16 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #include "./thumb16.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb16(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) }, +#include "./thumb16.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h index 86a4d767a7..19418de67c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h @@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher; template std::optional>> DecodeThumb32(u32 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./thumb32.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb32(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./thumb32.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index a969570547..a346304a9a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -26,8 +26,7 @@ using VFPMatcher = Decoder::Matcher; template std::optional>> DecodeVFP(u32 instruction) { using Table = std::vector>; - - static const struct Tables { + alignas(64) static const struct Tables { Table unconditional; Table conditional; } tables = []() { @@ -44,14 +43,25 @@ std::optional>> DecodeVFP(u32 instruc Table{it, list.end()}, }; }(); - const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000; const Table& table = is_unconditional ? tables.unconditional : tables.conditional; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameVFP(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./vfp.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp index 5bb516ccfd..0381c984cc 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp @@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) { return 0xF7F0A000; // UDF } -bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) { +inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept { return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c7bc981ac6..7e6cdc3935 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -71,12 +71,24 @@ constexpr DecodeTable GetDecodeTable() { template std::optional>> Decode(u32 instruction) { alignas(64) static const auto table = GetDecodeTable(); - const auto matches_instruction = [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }; const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; - auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); + auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetName(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./a64.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A64 diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index 777ea8b510..4ad9d90833 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,7 +20,6 @@ using namespace Dynarmic; -/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -37,22 +36,12 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto is_decode_error = [&get_ir](const A32::ASIMDMatcher& matcher, u32 instruction) { const auto block = get_ir(matcher, instruction); - - for (const auto& ir_inst : block) { - if (ir_inst.GetOpcode() == IR::Opcode::A32ExceptionRaised) { - if (static_cast(ir_inst.GetArg(1).GetU64()) == A32::Exception::DecodeError) { - return true; - } - } - } - return false; + return std::find_if(block.cbegin(), block.cend(), [](auto const& e) { + return e.GetOpcode() == IR::Opcode::A32ExceptionRaised && A32::Exception(e.GetArg(1).GetU64()) == A32::Exception::DecodeError; + }) != block.cend(); }; for (auto iter = table.cbegin(); iter != table.cend(); ++iter) { - if (std::strncmp(iter->GetName(), "UNALLOCATED", 11) == 0) { - continue; - } - const u32 expect = iter->GetExpected(); const u32 mask = iter->GetMask(); u32 x = 0; @@ -60,15 +49,17 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const u32 instruction = expect | x; const bool iserr = is_decode_error(*iter, instruction); - const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { return m.Matches(instruction); }); + const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { + return m.Matches(instruction); + }); const bool altiserr = is_decode_error(*alternative, instruction); INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction); INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect); INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x); - INFO("Name: " << iter->GetName()); + INFO("Name: " << *A32::GetNameASIMD(instruction)); INFO("iserr: " << iserr); - INFO("alternative: " << alternative->GetName()); + //INFO("alternative: " << alternative->GetName()); INFO("altiserr: " << altiserr); REQUIRE(((!iserr && alternative == iter) || (iserr && alternative != iter && !altiserr))); @@ -76,5 +67,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { x = ((x | mask) + 1) & ~mask; } while (x != 0); } -} -*/ +} \ No newline at end of file diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 0c89e780ad..8936f32bd3 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -39,21 +39,19 @@ using namespace Dynarmic; -const char* GetNameOfA32Instruction(u32 instruction) { - /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { - return vfp_decoder->get().GetName(); - } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { - return asimd_decoder->get().GetName(); - } else if (auto decoder = A32::DecodeArm(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA32Instruction(u32 instruction) { + if (auto const vfp_decoder = A32::DecodeVFP(instruction)) + return *A32::GetNameVFP(instruction); + else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) + return *A32::GetNameASIMD(instruction); + else if (auto const decoder = A32::DecodeArm(instruction)) + return *A32::GetNameARM(instruction); return ""; } -const char* GetNameOfA64Instruction(u32 instruction) { - /*if (auto decoder = A64::Decode(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA64Instruction(u32 instruction) { + if (auto const decoder = A64::Decode(instruction)) + return *A64::GetName(instruction); return ""; } From b17845fa6507670654f1fe9664464bc8b87e3efb Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 14:08:17 +0000 Subject: [PATCH 30/32] [dynarmic] use ARCHITECTURE_ macros instead of MCL ones Signed-off-by: lizzie --- .../src/dynarmic/backend/exception_handler.h | 18 +-- .../backend/exception_handler_generic.cpp | 6 +- .../backend/exception_handler_macos.cpp | 12 +- .../backend/exception_handler_macos_mig.c | 4 +- .../backend/exception_handler_posix.cpp | 130 ++---------------- .../backend/exception_handler_windows.cpp | 4 +- src/dynarmic/src/dynarmic/common/context.h | 120 ++++++++++++++++ 7 files changed, 157 insertions(+), 137 deletions(-) create mode 100644 src/dynarmic/src/dynarmic/common/context.h diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler.h b/src/dynarmic/src/dynarmic/backend/exception_handler.h index 173949628c..cd274b111f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler.h +++ b/src/dynarmic/src/dynarmic/backend/exception_handler.h @@ -15,15 +15,15 @@ #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) namespace Dynarmic::Backend::X64 { class BlockOfCode; } // namespace Dynarmic::Backend::X64 -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) namespace oaknut { class CodeBlock; } // namespace oaknut -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) namespace Dynarmic::Backend::RV64 { class CodeBlock; } // namespace Dynarmic::Backend::RV64 @@ -33,16 +33,16 @@ class CodeBlock; namespace Dynarmic::Backend { -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) struct FakeCall { u64 call_rip; u64 ret_rip; }; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) struct FakeCall { u64 call_pc; }; -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) struct FakeCall { }; #else @@ -54,11 +54,11 @@ public: ExceptionHandler(); ~ExceptionHandler(); -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void Register(X64::BlockOfCode& code); -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void Register(oaknut::CodeBlock& mem, std::size_t mem_size); -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void Register(RV64::CodeBlock& mem, std::size_t mem_size); #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp index ad7df25ca6..985536d9f0 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp @@ -13,15 +13,15 @@ struct ExceptionHandler::Impl final { ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode&) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock&, std::size_t) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock&, std::size_t) { // Do nothing } diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp index 52bcf5972f..76e517f05b 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp @@ -25,7 +25,7 @@ #include "dynarmic/backend/exception_handler.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" # define mig_external extern "C" @@ -36,7 +36,7 @@ using dynarmic_thread_state_t = x86_thread_state64_t; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include # define mig_external extern "C" @@ -133,7 +133,7 @@ void MachHandler::MessagePump() { } } -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -151,7 +151,7 @@ kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { return KERN_SUCCESS; } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) kern_return_t MachHandler::HandleRequest(arm_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -269,13 +269,13 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { const u64 code_begin = mcl::bit_cast(code.getCode()); const u64 code_end = code_begin + code.GetTotalCodeSize(); impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c index 762a80ca42..25678ab115 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/mig/mach_exc_server.c" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/arm64/mig/mach_exc_server.c" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index 7695df57d2..ef28d99279 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -8,19 +8,6 @@ #include "dynarmic/backend/exception_handler.h" -#ifdef __APPLE__ -# include -# include -#else -# include -# ifndef __OpenBSD__ -# include -# endif -# ifdef __sun__ -# include -# endif -#endif - #include #include #include @@ -29,16 +16,17 @@ #include #include "dynarmic/common/assert.h" +#include "dynarmic/common/context.h" #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include # include "dynarmic/backend/arm64/abi.h" -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) # include "dynarmic/backend/riscv64/code_block.h" #else # error "Invalid architecture" @@ -131,139 +119,51 @@ SigHandler::~SigHandler() { void SigHandler::AddCodeBlock(CodeBlockInfo cbi) { std::lock_guard guard(code_block_infos_mutex); - if (auto iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) { + if (auto const iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) code_block_infos.erase(iter); - } code_block_infos.push_back(cbi); } void SigHandler::RemoveCodeBlock(u64 host_pc) { std::lock_guard guard(code_block_infos_mutex); const auto iter = FindCodeBlockInfo(host_pc); - if (iter == code_block_infos.end()) { - return; - } - code_block_infos.erase(iter); + if (iter != code_block_infos.end()) + code_block_infos.erase(iter); } void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { ASSERT(sig == SIGSEGV || sig == SIGBUS); - -#ifndef MCL_ARCHITECTURE_RISCV - ucontext_t* ucontext = reinterpret_cast(raw_context); -#ifndef __OpenBSD__ - auto& mctx = ucontext->uc_mcontext; -#endif -#endif - -#if defined(MCL_ARCHITECTURE_X86_64) - -# if defined(__APPLE__) -# define CTX_RIP (mctx->__ss.__rip) -# define CTX_RSP (mctx->__ss.__rsp) -# elif defined(__linux__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# elif defined(__FreeBSD__) -# define CTX_RIP (mctx.mc_rip) -# define CTX_RSP (mctx.mc_rsp) -# elif defined(__NetBSD__) -# define CTX_RIP (mctx.__gregs[_REG_RIP]) -# define CTX_RSP (mctx.__gregs[_REG_RSP]) -# elif defined(__OpenBSD__) -# define CTX_RIP (ucontext->sc_rip) -# define CTX_RSP (ucontext->sc_rsp) -# elif defined(__sun__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# else -# error "Unknown platform" -# endif - + CTX_DECLARE(raw_context); +#if defined(ARCHITECTURE_x86_64) { std::lock_guard guard(sig_handler->code_block_infos_mutex); const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); if (iter != sig_handler->code_block_infos.end()) { FakeCall fc = iter->cb(CTX_RIP); - CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; - return; } } - fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); - -#elif defined(MCL_ARCHITECTURE_ARM64) - -# if defined(__APPLE__) -# define CTX_PC (mctx->__ss.__pc) -# define CTX_SP (mctx->__ss.__sp) -# define CTX_LR (mctx->__ss.__lr) -# define CTX_X(i) (mctx->__ss.__x[i]) -# define CTX_Q(i) (mctx->__ns.__v[i]) -# elif defined(__linux__) -# define CTX_PC (mctx.pc) -# define CTX_SP (mctx.sp) -# define CTX_LR (mctx.regs[30]) -# define CTX_X(i) (mctx.regs[i]) -# define CTX_Q(i) (fpctx->vregs[i]) - [[maybe_unused]] const auto fpctx = [&mctx] { - _aarch64_ctx* header = (_aarch64_ctx*)&mctx.__reserved; - while (header->magic != FPSIMD_MAGIC) { - ASSERT(header->magic && header->size); - header = (_aarch64_ctx*)((char*)header + header->size); - } - return (fpsimd_context*)header; - }(); -# elif defined(__FreeBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__NetBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__OpenBSD__) -# define CTX_PC (ucontext->sc_elr) -# define CTX_SP (ucontext->sc_sp) -# define CTX_LR (ucontext->sc_lr) -# define CTX_X(i) (ucontext->sc_x[i]) -# define CTX_Q(i) (ucontext->sc_q[i]) -# else -# error "Unknown platform" -# endif - +#elif defined(ARCHITECTURE_arm64) + CTX_DECLARE(raw_context); { std::lock_guard guard(sig_handler->code_block_infos_mutex); - const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); if (iter != sig_handler->code_block_infos.end()) { FakeCall fc = iter->cb(CTX_PC); - CTX_PC = fc.call_pc; - return; } } - fmt::print(stderr, "Unhandled {} at pc {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_PC); - -#elif defined(MCL_ARCHITECTURE_RISCV) - +#elif defined(ARCHITECTURE_riscv64) ASSERT_FALSE("Unimplemented"); - #else - # error "Invalid architecture" - #endif struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler->old_sa_segv : &sig_handler->old_sa_bus; @@ -309,19 +209,19 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { const u64 code_begin = mcl::bit_cast(code.getCode()); const u64 code_end = code_begin + code.GetTotalCodeSize(); impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp index 719c007594..9e76cae912 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/exception_handler_windows.cpp" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/exception_handler_generic.cpp" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/common/context.h b/src/dynarmic/src/dynarmic/common/context.h new file mode 100644 index 0000000000..0eb128449c --- /dev/null +++ b/src/dynarmic/src/dynarmic/common/context.h @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#ifdef __APPLE__ +# include +# include +#else +# include +# ifndef __OpenBSD__ +# include +# endif +# ifdef __sun__ +# include +# endif +# ifdef __linux__ +# include +# endif +#endif + +#ifdef ARCHITECTURE_x86_64 +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; +# endif +#elif defined(ARCHITECTURE_arm64) +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; \ + [[maybe_unused]] const auto fpctx = GetFloatingPointState(mctx); +# endif +#endif + +#if defined(ARCHITECTURE_x86_64) +# if defined(__APPLE__) +# define CTX_RIP (mctx->__ss.__rip) +# define CTX_RSP (mctx->__ss.__rsp) +# elif defined(__linux__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# elif defined(__FreeBSD__) +# define CTX_RIP (mctx.mc_rip) +# define CTX_RSP (mctx.mc_rsp) +# elif defined(__NetBSD__) +# define CTX_RIP (mctx.__gregs[_REG_RIP]) +# define CTX_RSP (mctx.__gregs[_REG_RSP]) +# elif defined(__OpenBSD__) +# define CTX_RIP (ucontext->sc_rip) +# define CTX_RSP (ucontext->sc_rsp) +# elif defined(__sun__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# else +# error "Unknown platform" +# endif +#elif defined(ARCHITECTURE_arm64) +# if defined(__APPLE__) +# define CTX_PC (mctx->__ss.__pc) +# define CTX_SP (mctx->__ss.__sp) +# define CTX_LR (mctx->__ss.__lr) +# define CTX_PSTATE (mctx->__ss.__cpsr) +# define CTX_X(i) (mctx->__ss.__x[i]) +# define CTX_Q(i) (mctx->__ns.__v[i]) +# define CTX_FPSR (mctx->__ns.__fpsr) +# define CTX_FPCR (mctx->__ns.__fpcr) +# elif defined(__linux__) +# define CTX_PC (mctx.pc) +# define CTX_SP (mctx.sp) +# define CTX_LR (mctx.regs[30]) +# define CTX_PSTATE (mctx.pstate) +# define CTX_X(i) (mctx.regs[i]) +# define CTX_Q(i) (fpctx->vregs[i]) +# define CTX_FPSR (fpctx->fpsr) +# define CTX_FPCR (fpctx->fpcr) +# elif defined(__FreeBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__NetBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__OpenBSD__) +# define CTX_PC (ucontext->sc_elr) +# define CTX_SP (ucontext->sc_sp) +# define CTX_LR (ucontext->sc_lr) +# define CTX_X(i) (ucontext->sc_x[i]) +# define CTX_Q(i) (ucontext->sc_q[i]) +# else +# error "Unknown platform" +# endif +#else +# error "unimplemented" +#endif + +#ifdef ARCHITECTURE_arm64 +#ifdef __APPLE__ +inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) { + return &(host_ctx->__ns); +} +#elif defined(__linux__) +inline fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { + _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved); + while (header->magic != FPSIMD_MAGIC) + header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast(header) + header->size); + return reinterpret_cast(header); +} +#endif +#endif From 56327d738dcd88c6caa47e7684d17c7a51b65baa Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 17:03:36 +0000 Subject: [PATCH 31/32] [dynarmic] fix android Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index ef28d99279..b22befaff9 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -149,7 +149,6 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { } fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) - CTX_DECLARE(raw_context); { std::lock_guard guard(sig_handler->code_block_infos_mutex); const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); From 852a0ff5ac885b89751593a53a929afcb8b8e907 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 02:47:04 +0000 Subject: [PATCH 32/32] [dynarmic, cmake][ remove unusd frontends var Signed-off-by: lizzie --- src/dynarmic/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 842eb91a88..037600f2af 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -36,9 +36,6 @@ option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF) option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF) option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT}) option(DYNARMIC_ENABLE_LTO "Enable LTO" OFF) -if (NOT DEFINED DYNARMIC_FRONTENDS) - set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable") -endif() # Default to a Release build if (NOT CMAKE_BUILD_TYPE)