From 0de8db4d33120b07e5f2d3022a2dd212657f5fbc Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:54:53 +0000 Subject: [PATCH 01/56] [dynarmic] reduce opt pass latency Signed-off-by: lizzie --- src/dynarmic/docs/RegisterAllocator.md | 40 +- .../docs/ReturnStackBufferOptimization.md | 162 +- src/dynarmic/src/dynarmic/CMakeLists.txt | 199 +-- .../backend/arm64/a32_address_space.cpp | 18 +- .../backend/arm64/a64_address_space.cpp | 19 +- .../backend/riscv64/a32_address_space.cpp | 16 +- .../dynarmic/backend/x64/a32_interface.cpp | 16 +- .../dynarmic/backend/x64/a64_interface.cpp | 18 +- .../src/dynarmic/common/memory_pool.cpp | 13 - .../src/dynarmic/common/memory_pool.h | 61 - src/dynarmic/src/dynarmic/ir/basic_block.cpp | 7 +- src/dynarmic/src/dynarmic/ir/basic_block.h | 3 - src/dynarmic/src/dynarmic/ir/ir_emitter.cpp | 21 - .../ir/opt/a32_constant_memory_reads_pass.cpp | 70 - .../ir/opt/a32_get_set_elimination_pass.cpp | 382 ----- .../ir/opt/a64_callback_config_pass.cpp | 57 - .../ir/opt/a64_get_set_elimination_pass.cpp | 165 -- .../ir/opt/a64_merge_interpret_blocks.cpp | 57 - .../ir/opt/constant_propagation_pass.cpp | 559 ------ .../ir/opt/dead_code_elimination_pass.cpp | 23 - .../dynarmic/ir/opt/identity_removal_pass.cpp | 44 - src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h | 127 -- .../src/dynarmic/ir/opt/naming_pass.cpp | 18 - src/dynarmic/src/dynarmic/ir/opt/passes.h | 47 - .../src/dynarmic/ir/opt/polyfill_pass.cpp | 218 --- .../src/dynarmic/ir/opt/verification_pass.cpp | 51 - src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 1502 +++++++++++++++++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 34 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 10 +- src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 2 +- src/dynarmic/tests/CMakeLists.txt | 146 +- src/dynarmic/tests/print_info.cpp | 2 +- 32 files changed, 1808 insertions(+), 2299 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.cpp delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.h delete mode 100644 src/dynarmic/src/dynarmic/ir/ir_emitter.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/passes.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.h diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index fea6f19e6a..5a7210c791 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -16,19 +16,31 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun The member functions on `RegAlloc` are just a combination of the above concepts. +The following registers are reserved for internal use and should NOT participate in register allocation: +- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. +- `%rsp`: Stack pointer. + +The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. + +Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration. + ### `Scratch` - Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) - Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) +```c++ +Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr); +Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm); +``` At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope. ### Pure `Use` - Xbyak::Reg64 UseGpr(Argument& arg); - Xbyak::Xmm UseXmm(Argument& arg); - OpArg UseOpArg(Argument& arg); - void Use(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseGpr(Argument& arg); +Xbyak::Xmm UseXmm(Argument& arg); +OpArg UseOpArg(Argument& arg); +void Use(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it @@ -39,9 +51,11 @@ This register **must not** have it's value changed. ### `UseScratch` - Xbyak::Reg64 UseScratchGpr(Argument& arg); - Xbyak::Xmm UseScratchXmm(Argument& arg); - void UseScratch(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseScratchGpr(Argument& arg); +Xbyak::Xmm UseScratchXmm(Argument& arg); +void UseScratch(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it @@ -55,7 +69,9 @@ You are free to modify the value in the register. The register is discarded at t A `Define` is the defintion of a value. This is the only time when a value may be set. - void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +```c++ +void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +``` By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the value to the specified register `reg`. @@ -64,7 +80,9 @@ value to the specified register `reg`. Adding a `Define` to an existing value. - void DefineValue(IR::Inst* inst, Argument& arg); +```c++ +void DefineValue(IR::Inst* inst, Argument& arg); +``` You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are emitted. diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index 6ffe41bcc6..0e72c3bce8 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -23,15 +23,17 @@ One complication dynarmic has is that a compiled block is not uniquely identifia the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block. - u64 LocationDescriptor::UniqueHash() const { - // This value MUST BE UNIQUE. - // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint - u64 pc_u64 = u64(arm_pc) << 32; - u64 fpscr_u64 = u64(fpscr.Value()); - u64 t_u64 = cpsr.T() ? 1 : 0; - u64 e_u64 = cpsr.E() ? 2 : 0; - return pc_u64 | fpscr_u64 | t_u64 | e_u64; - } +```c++ +u64 LocationDescriptor::UniqueHash() const { + // This value MUST BE UNIQUE. + // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint + u64 pc_u64 = u64(arm_pc) << 32; + u64 fpscr_u64 = u64(fpscr.Value()); + u64 t_u64 = cpsr.T() ? 1 : 0; + u64 e_u64 = cpsr.E() ? 2 : 0; + return pc_u64 | fpscr_u64 | t_u64 | e_u64; +} +``` ## Our implementation isn't actually a stack @@ -49,97 +51,107 @@ host addresses for the corresponding the compiled blocks. size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8 showed degraded performance. - struct JitState { - // ... +```c++ +struct JitState { + // ... - static constexpr size_t RSBSize = 8; // MUST be a power of 2. - u32 rsb_ptr = 0; - std::array rsb_location_descriptors; - std::array rsb_codeptrs; - void ResetRSB(); + static constexpr size_t RSBSize = 8; // MUST be a power of 2. + u32 rsb_ptr = 0; + std::array rsb_location_descriptors; + std::array rsb_codeptrs; + void ResetRSB(); - // ... - }; + // ... +}; +``` ### RSB Push We insert our prediction at the insertion point iff the RSB doesn't already contain a prediction with the same `UniqueHash`. - void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { + using namespace Xbyak::util; - ASSERT(inst->GetArg(0).IsImmediate()); - u64 imm64 = inst->GetArg(0).GetU64(); + ASSERT(inst->GetArg(0).IsImmediate()); + u64 imm64 = inst->GetArg(0).GetU64(); - Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); - Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); - Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); - u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() - ? u64(unique_hash_to_code_ptr[imm64]) - : u64(code->GetReturnFromRunCodeAddress()); + Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); + Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); + Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); + u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() + ? u64(unique_hash_to_code_ptr[imm64]) + : u64(code->GetReturnFromRunCodeAddress()); - code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); - code->add(index_reg, 1); - code->and_(index_reg, u32(JitState::RSBSize - 1)); + code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); + code->add(index_reg, 1); + code->and_(index_reg, u32(JitState::RSBSize - 1)); - code->mov(loc_desc_reg, u64(imm64)); - CodePtr patch_location = code->getCurr(); - patch_unique_hash_locations[imm64].emplace_back(patch_location); - code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. - code->EnsurePatchLocationSize(patch_location, 10); + code->mov(loc_desc_reg, u64(imm64)); + CodePtr patch_location = code->getCurr(); + patch_unique_hash_locations[imm64].emplace_back(patch_location); + code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. + code->EnsurePatchLocationSize(patch_location, 10); - Xbyak::Label label; - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->je(label, code->T_SHORT); - } - - code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); - code->L(label); + Xbyak::Label label; + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->je(label, code->T_SHORT); } + code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); + code->L(label); +} +``` + In pseudocode: - for (i := 0 .. RSBSize-1) - if (rsb_location_descriptors[i] == imm64) - goto label; - rsb_ptr++; - rsb_ptr %= RSBSize; - rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash - rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; - label: +```c++ + for (i := 0 .. RSBSize-1) + if (rsb_location_descriptors[i] == imm64) + goto label; + rsb_ptr++; + rsb_ptr %= RSBSize; + rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash + rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; +label: +``` ## RSB Pop To check if a predicition is in the RSB, we linearly scan the RSB. - void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { + using namespace Xbyak::util; - // This calculation has to match up with IREmitter::PushRSB - code->mov(ecx, MJitStateReg(Arm::Reg::PC)); - code->shl(rcx, 32); - code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); - code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); - code->or_(rbx, rcx); + // This calculation has to match up with IREmitter::PushRSB + code->mov(ecx, MJitStateReg(Arm::Reg::PC)); + code->shl(rcx, 32); + code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); + code->or_(rbx, rcx); - code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); - } - - code->jmp(rax); + code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); } + code->jmp(rax); +} +``` + In pseudocode: - rbx := ComputeUniqueHash() - rax := ReturnToDispatch - for (i := 0 .. RSBSize-1) - if (rbx == rsb_location_descriptors[i]) - rax = rsb_codeptrs[i] - goto rax \ No newline at end of file +```c++ +rbx := ComputeUniqueHash() +rax := ReturnToDispatch +for (i := 0 .. RSBSize-1) + if (rbx == rsb_location_descriptors[i]) + rax = rsb_codeptrs[i] +goto rax +``` diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index b74626bcd5..ee06ccf19a 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -56,8 +56,6 @@ add_library(dynarmic common/lut_from_list.h common/math_util.cpp common/math_util.h - common/memory_pool.cpp - common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h @@ -78,7 +76,6 @@ add_library(dynarmic ir/basic_block.cpp ir/basic_block.h ir/cond.h - ir/ir_emitter.cpp ir/ir_emitter.h ir/location_descriptor.cpp ir/location_descriptor.h @@ -87,78 +84,59 @@ add_library(dynarmic ir/opcodes.cpp ir/opcodes.h ir/opcodes.inc - ir/opt/constant_propagation_pass.cpp - ir/opt/dead_code_elimination_pass.cpp - ir/opt/identity_removal_pass.cpp - ir/opt/ir_matcher.h - ir/opt/naming_pass.cpp - ir/opt/passes.h - ir/opt/polyfill_pass.cpp - ir/opt/verification_pass.cpp + ir/opt_passes.cpp + ir/opt_passes.h ir/terminal.h ir/type.cpp ir/type.h ir/value.cpp ir/value.h + # A32 + frontend/A32/a32_ir_emitter.cpp + frontend/A32/a32_ir_emitter.h + frontend/A32/a32_location_descriptor.cpp + frontend/A32/a32_location_descriptor.h + frontend/A32/decoder/arm.h + frontend/A32/decoder/arm.inc + frontend/A32/decoder/asimd.h + frontend/A32/decoder/asimd.inc + frontend/A32/decoder/thumb16.h + frontend/A32/decoder/thumb16.inc + frontend/A32/decoder/thumb32.h + frontend/A32/decoder/thumb32.inc + frontend/A32/decoder/vfp.h + frontend/A32/decoder/vfp.inc + frontend/A32/disassembler/disassembler.h + frontend/A32/disassembler/disassembler_arm.cpp + frontend/A32/disassembler/disassembler_thumb.cpp + frontend/A32/FPSCR.h + frontend/A32/ITState.h + frontend/A32/PSR.h + frontend/A32/translate/a32_translate.cpp + frontend/A32/translate/a32_translate.h + frontend/A32/translate/conditional_state.cpp + frontend/A32/translate/conditional_state.h + frontend/A32/translate/translate_arm.cpp + frontend/A32/translate/translate_thumb.cpp + interface/A32/a32.h + interface/A32/arch_version.h + interface/A32/config.h + interface/A32/coprocessor.h + interface/A32/coprocessor_util.h + interface/A32/disassembler.h + # A64 + frontend/A64/a64_ir_emitter.cpp + frontend/A64/a64_ir_emitter.h + frontend/A64/a64_location_descriptor.cpp + frontend/A64/a64_location_descriptor.h + frontend/A64/decoder/a64.h + frontend/A64/decoder/a64.inc + frontend/A64/translate/a64_translate.cpp + frontend/A64/translate/a64_translate.h + interface/A64/a64.h + interface/A64/config.h ) -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A32/a32_ir_emitter.cpp - frontend/A32/a32_ir_emitter.h - frontend/A32/a32_location_descriptor.cpp - frontend/A32/a32_location_descriptor.h - frontend/A32/decoder/arm.h - frontend/A32/decoder/arm.inc - frontend/A32/decoder/asimd.h - frontend/A32/decoder/asimd.inc - frontend/A32/decoder/thumb16.h - frontend/A32/decoder/thumb16.inc - frontend/A32/decoder/thumb32.h - frontend/A32/decoder/thumb32.inc - frontend/A32/decoder/vfp.h - frontend/A32/decoder/vfp.inc - frontend/A32/disassembler/disassembler.h - frontend/A32/disassembler/disassembler_arm.cpp - frontend/A32/disassembler/disassembler_thumb.cpp - frontend/A32/FPSCR.h - frontend/A32/ITState.h - frontend/A32/PSR.h - frontend/A32/translate/a32_translate.cpp - frontend/A32/translate/a32_translate.h - frontend/A32/translate/conditional_state.cpp - frontend/A32/translate/conditional_state.h - frontend/A32/translate/translate_arm.cpp - frontend/A32/translate/translate_thumb.cpp - interface/A32/a32.h - interface/A32/arch_version.h - interface/A32/config.h - interface/A32/coprocessor.h - interface/A32/coprocessor_util.h - interface/A32/disassembler.h - ir/opt/a32_constant_memory_reads_pass.cpp - ir/opt/a32_get_set_elimination_pass.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A64/a64_ir_emitter.cpp - frontend/A64/a64_ir_emitter.h - frontend/A64/a64_location_descriptor.cpp - frontend/A64/a64_location_descriptor.h - frontend/A64/decoder/a64.h - frontend/A64/decoder/a64.inc - frontend/A64/translate/a64_translate.cpp - frontend/A64/translate/a64_translate.h - interface/A64/a64.h - interface/A64/config.h - ir/opt/a64_callback_config_pass.cpp - ir/opt/a64_get_set_elimination_pass.cpp - ir/opt/a64_merge_interpret_blocks.cpp - ) -endif() - if ("x86_64" IN_LIST ARCHITECTURE) # Newer versions of xbyak (>= 7.25.0) have stricter checks that currently # fail in dynarmic @@ -215,29 +193,21 @@ if ("x86_64" IN_LIST ARCHITECTURE) common/spin_lock_x64.h common/x64_disassemble.cpp common/x64_disassemble.h + # A32 + backend/x64/a32_emit_x64.cpp + backend/x64/a32_emit_x64.h + backend/x64/a32_emit_x64_memory.cpp + backend/x64/a32_interface.cpp + backend/x64/a32_jitstate.cpp + backend/x64/a32_jitstate.h + # A64 + backend/x64/a64_emit_x64.cpp + backend/x64/a64_emit_x64.h + backend/x64/a64_emit_x64_memory.cpp + backend/x64/a64_interface.cpp + backend/x64/a64_jitstate.cpp + backend/x64/a64_jitstate.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a32_emit_x64.cpp - backend/x64/a32_emit_x64.h - backend/x64/a32_emit_x64_memory.cpp - backend/x64/a32_interface.cpp - backend/x64/a32_jitstate.cpp - backend/x64/a32_jitstate.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a64_emit_x64.cpp - backend/x64/a64_emit_x64.h - backend/x64/a64_emit_x64_memory.cpp - backend/x64/a64_interface.cpp - backend/x64/a64_jitstate.cpp - backend/x64/a64_jitstate.h - ) - endif() endif() if ("arm64" IN_LIST ARCHITECTURE) @@ -281,25 +251,17 @@ if ("arm64" IN_LIST ARCHITECTURE) backend/arm64/verbose_debugging_output.h common/spin_lock_arm64.cpp common/spin_lock_arm64.h + # A32 + backend/arm64/a32_address_space.cpp + backend/arm64/a32_address_space.h + backend/arm64/a32_core.h + backend/arm64/a32_interface.cpp + # A64 + backend/arm64/a64_address_space.cpp + backend/arm64/a64_address_space.h + backend/arm64/a64_core.h + backend/arm64/a64_interface.cpp ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a32_address_space.cpp - backend/arm64/a32_address_space.h - backend/arm64/a32_core.h - backend/arm64/a32_interface.cpp - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a64_address_space.cpp - backend/arm64/a64_address_space.h - backend/arm64/a64_core.h - backend/arm64/a64_interface.cpp - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -328,21 +290,14 @@ if ("riscv" IN_LIST ARCHITECTURE) backend/riscv64/reg_alloc.cpp backend/riscv64/reg_alloc.h backend/riscv64/stack_layout.h + # A32 + backend/riscv64/a32_address_space.cpp + backend/riscv64/a32_address_space.h + backend/riscv64/a32_core.h + backend/riscv64/a32_interface.cpp + backend/riscv64/code_block.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - backend/riscv64/a32_address_space.cpp - backend/riscv64/a32_address_space.h - backend/riscv64/a32_core.h - backend/riscv64/a32_interface.cpp - backend/riscv64/code_block.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") - endif() + message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") endif() if (WIN32) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index bbdf925ff4..8e48fa3687 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -16,7 +16,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -163,21 +163,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index d4fe9a9cb7..2b50ad9ea3 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -331,22 +331,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index efa211618b..0bb9591411 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/backend/riscv64/stack_layout.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::RV64 { @@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index b116ec180e..382eb70f3f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -29,7 +29,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/location_descriptor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A32 { @@ -217,19 +217,7 @@ private: block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index ddd2327395..c65b582982 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -25,7 +25,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/a64.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A64 { @@ -275,21 +275,7 @@ private: const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block).entrypoint; } diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.cpp b/src/dynarmic/src/dynarmic/common/memory_pool.cpp deleted file mode 100644 index f41dd92af5..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/common/memory_pool.h" - -#include - -namespace Dynarmic::Common { - - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.h b/src/dynarmic/src/dynarmic/common/memory_pool.h deleted file mode 100644 index c99316e107..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -namespace Dynarmic::Common { - -/// @tparam object_size Byte-size of objects to construct -/// @tparam slab_size Number of objects to have per slab -template -class Pool { -public: - inline Pool() noexcept { - AllocateNewSlab(); - } - inline ~Pool() noexcept { - std::free(current_slab); - for (char* slab : slabs) { - std::free(slab); - } - } - - Pool(const Pool&) = delete; - Pool(Pool&&) = delete; - - Pool& operator=(const Pool&) = delete; - Pool& operator=(Pool&&) = delete; - - /// @brief Returns a pointer to an `object_size`-bytes block of memory. - [[nodiscard]] void* Alloc() noexcept { - if (remaining == 0) { - slabs.push_back(current_slab); - AllocateNewSlab(); - } - void* ret = static_cast(current_ptr); - current_ptr += object_size; - remaining--; - return ret; - } -private: - /// @brief Allocates a completely new memory slab. - /// Used when an entirely new slab is needed - /// due the current one running out of usable space. - void AllocateNewSlab() noexcept { - current_slab = static_cast(std::malloc(object_size * slab_size)); - current_ptr = current_slab; - remaining = slab_size; - } - - std::vector slabs; - char* current_slab = nullptr; - char* current_ptr = nullptr; - size_t remaining = 0; -}; - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index b00ab3cb20..a13a7ebfc9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -15,8 +15,6 @@ #include #include "dynarmic/common/assert.h" - -#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -27,8 +25,7 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL}, - instruction_alloc_pool{std::make_unique>()} + cond{Cond::AL} { } @@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); + IR::Inst* inst = new IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index e88dc92fc4..2f2d9ab6de 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -21,7 +21,6 @@ #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" #include "dynarmic/ir/dense_list.h" -#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -174,8 +173,6 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; - /// Memory pool for instruction list - std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp deleted file mode 100644 index a8ef7e2989..0000000000 --- a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/ir_emitter.h" - -#include - -#include "dynarmic/common/assert.h" -#include - -#include "dynarmic/ir/opcodes.h" - -namespace Dynarmic::IR { - - -} // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp deleted file mode 100644 index 9699f18345..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/interface/A32/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { - for (auto& inst : block) { - switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp deleted file mode 100644 index 499b38b120..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp +++ /dev/null @@ -1,382 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A32/a32_ir_emitter.h" -#include "dynarmic/frontend/A32/a32_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -namespace { - -void FlagsPass(IR::Block& block) { - using Iterator = std::reverse_iterator; - - struct FlagInfo { - bool set_not_required = false; - bool has_value_request = false; - Iterator value_request = {}; - }; - struct ValuelessFlagInfo { - bool set_not_required = false; - }; - ValuelessFlagInfo nzcvq; - ValuelessFlagInfo nzcv; - ValuelessFlagInfo nz; - FlagInfo c_flag; - FlagInfo ge; - - auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(value); - } - info.has_value_request = false; - - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_get = [](FlagInfo& info, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(IR::Value{&*inst}); - } - info.has_value_request = true; - info.value_request = inst; - }; - - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetCFlag: { - do_get(c_flag, inst); - break; - } - case IR::Opcode::A32SetCpsrNZCV: { - if (c_flag.has_value_request) { - ir.SetInsertionPointBefore(inst.base()); // base is one ahead - IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); - c_flag.value_request->ReplaceUsesWith(c); - c_flag.has_value_request = false; - break; // This case will be executed again because of the above - } - - do_set_valueless(nzcv, inst); - - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVRaw: { - if (c_flag.has_value_request) { - nzcv.set_not_required = false; - } - - do_set_valueless(nzcv, inst); - - nzcvq = {}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVQ: { - if (c_flag.has_value_request) { - nzcvq.set_not_required = false; - } - - do_set_valueless(nzcvq, inst); - - nzcv = {.set_not_required = true}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZ: { - do_set_valueless(nz, inst); - - nzcvq = {}; - nzcv = {}; - break; - } - case IR::Opcode::A32SetCpsrNZC: { - if (c_flag.has_value_request) { - c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); - c_flag.has_value_request = false; - } - - if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { - const auto nz_value = inst->GetArg(0); - - inst->Invalidate(); - - ir.SetInsertionPointBefore(inst.base()); - ir.SetCpsrNZ(IR::NZCV{nz_value}); - - nzcvq = {}; - nzcv = {}; - nz = {.set_not_required = true}; - break; - } - - if (nz.set_not_required && c_flag.set_not_required) { - inst->Invalidate(); - } else if (nz.set_not_required) { - inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); - } - nz.set_not_required = true; - c_flag.set_not_required = true; - - nzcv = {}; - nzcvq = {}; - break; - } - case IR::Opcode::A32SetGEFlags: { - do_set(ge, inst->GetArg(0), inst); - break; - } - case IR::Opcode::A32GetGEFlags: { - do_get(ge, inst); - break; - } - case IR::Opcode::A32SetGEFlagsCompressed: { - ge = {.set_not_required = true}; - break; - } - case IR::Opcode::A32OrQFlag: { - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcvq = {}; - nzcv = {}; - nz = {}; - c_flag = {}; - ge = {}; - } - break; - } - } - } -} - -void RegisterPass(IR::Block& block) { - using Iterator = IR::Block::iterator; - - struct RegInfo { - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array reg_info; - - const auto do_get = [](RegInfo& info, Iterator get_inst) { - if (info.register_value.IsEmpty()) { - info.register_value = IR::Value(&*get_inst); - return; - } - get_inst->ReplaceUsesWith(info.register_value); - }; - - const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { - if (info.last_set_instruction) { - (*info.last_set_instruction)->Invalidate(); - } - info = { - .register_value = value, - .last_set_instruction = set_inst, - }; - }; - - enum class ExtValueType { - Empty, - Single, - Double, - VectorDouble, - VectorQuad, - }; - struct ExtRegInfo { - ExtValueType value_type = {}; - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array ext_reg_info; - - const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { - if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = IR::Value(&*get_inst), - .last_set_instruction = std::nullopt, - }; - } - return; - } - get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); - }; - - const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { - if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - if (std::data(infos)[0].get().last_set_instruction) { - (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); - } - } - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = value, - .last_set_instruction = set_inst, - }; - } - }; - - // Location and version don't matter here. - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - ASSERT(reg != A32::Reg::PC); - const size_t reg_index = static_cast(reg); - do_get(reg_info[reg_index], inst); - break; - } - case IR::Opcode::A32SetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - if (reg == A32::Reg::PC) { - break; - } - const auto reg_index = static_cast(reg); - do_set(reg_info[reg_index], inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); - break; - } - case IR::Opcode::A32SetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - break; - } - case IR::Opcode::A32SetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst->GetArg(1), - inst); - break; - } - case IR::Opcode::A32GetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - do_ext_get(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_get(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst); - } - break; - } - case IR::Opcode::A32SetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - ir.SetInsertionPointAfter(inst); - const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); - do_ext_set(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - stored_value, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_set(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst->GetArg(1), - inst); - } - break; - } - default: { - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - ext_reg_info = {}; - } - break; - } - } - } -} - -} // namespace - -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { - FlagsPass(block); - RegisterPass(block); -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp deleted file mode 100644 index 79d9769520..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/frontend/A64/a64_ir_emitter.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { - if (conf.hook_data_cache_operations) { - return; - } - - for (auto& inst : block) { - if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { - continue; - } - - const auto op = static_cast(inst.GetArg(1).GetU64()); - if (op == A64::DataCacheOperation::ZeroByVA) { - A64::IREmitter ir{block}; - ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; - ir.SetInsertionPointBefore(&inst); - - size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); - IR::U64 addr{inst.GetArg(2)}; - - const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); - while (bytes >= 16) { - ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(16)); - bytes -= 16; - } - - while (bytes >= 8) { - ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(8)); - bytes -= 8; - } - - while (bytes >= 4) { - ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(4)); - bytes -= 4; - } - } - inst.Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp deleted file mode 100644 index 53e3b27176..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -void A64GetSetElimination(IR::Block& block) { - using Iterator = IR::Block::iterator; - - enum class TrackingType { - W, - X, - S, - D, - Q, - SP, - NZCV, - NZCVRaw, - }; - struct RegisterInfo { - IR::Value register_value; - TrackingType tracking_type; - bool set_instruction_present = false; - Iterator last_set_instruction; - }; - std::array reg_info; - std::array vec_info; - RegisterInfo sp_info; - RegisterInfo nzcv_info; - - const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { - if (info.set_instruction_present) { - info.last_set_instruction->Invalidate(); - block.Instructions().erase(info.last_set_instruction); - } - - info.register_value = value; - info.tracking_type = tracking_type; - info.set_instruction_present = true; - info.last_set_instruction = set_inst; - }; - - const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { - const auto do_nothing = [&] { - info = {}; - info.register_value = IR::Value(&*get_inst); - info.tracking_type = tracking_type; - }; - - if (info.register_value.IsEmpty()) { - do_nothing(); - return; - } - - if (info.tracking_type == tracking_type) { - get_inst->ReplaceUsesWith(info.register_value); - return; - } - - do_nothing(); - }; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A64GetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::W); - break; - } - case IR::Opcode::A64GetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::X); - break; - } - case IR::Opcode::A64GetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::S); - break; - } - case IR::Opcode::A64GetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::D); - break; - } - case IR::Opcode::A64GetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64GetSP: { - do_get(sp_info, inst, TrackingType::SP); - break; - } - case IR::Opcode::A64GetNZCVRaw: { - do_get(nzcv_info, inst, TrackingType::NZCVRaw); - break; - } - case IR::Opcode::A64SetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); - break; - } - case IR::Opcode::A64SetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); - break; - } - case IR::Opcode::A64SetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); - break; - } - case IR::Opcode::A64SetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); - break; - } - case IR::Opcode::A64SetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64SetSP: { - do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); - break; - } - case IR::Opcode::A64SetNZCV: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); - break; - } - case IR::Opcode::A64SetNZCVRaw: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcv_info = {}; - } - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - vec_info = {}; - sp_info = {}; - } - break; - } - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp deleted file mode 100644 index 25b7ef0ff1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_location_descriptor.h" -#include "dynarmic/frontend/A64/translate/a64_translate.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { - const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { - const auto instruction = cb->MemoryReadCode(location.PC()); - if (!instruction) - return false; - - IR::Block new_block{location}; - A64::TranslateSingleInstruction(new_block, location, *instruction); - - if (!new_block.Instructions().empty()) - return false; - - const IR::Terminal terminal = new_block.GetTerminal(); - if (auto term = boost::get(&terminal)) { - return term->next == location; - } - - return false; - }; - - IR::Terminal terminal = block.GetTerminal(); - auto term = boost::get(&terminal); - if (!term) - return; - - A64::LocationDescriptor location{term->next}; - size_t num_instructions = 1; - - while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { - num_instructions++; - } - - term->num_instructions = num_instructions; - block.ReplaceTerminal(terminal); - block.CycleCount() += num_instructions - 1; -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp deleted file mode 100644 index 86ebca87d2..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp +++ /dev/null @@ -1,559 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/assert.h" -#include -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/common/safe_ops.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -using Op = Dynarmic::IR::Opcode; - -namespace { - -// Tiny helper to avoid the need to store based off the opcode -// bit size all over the place within folding functions. -void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { - if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); - } else { - inst.ReplaceUsesWith(IR::Value{value}); - } -} - -IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; -} - -template -bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - - const bool is_lhs_immediate = lhs.IsImmediate(); - const bool is_rhs_immediate = rhs.IsImmediate(); - - if (is_lhs_immediate && is_rhs_immediate) { - const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); - ReplaceUsesWith(inst, is_32_bit, result); - return false; - } - - if (is_lhs_immediate && !is_rhs_immediate) { - const IR::Inst* rhs_inst = rhs.GetInstRecursive(); - if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, rhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } else { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - } - } - - if (!is_lhs_immediate && is_rhs_immediate) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } - } - - return true; -} - -void FoldAdd(IR::Inst& inst, bool is_32_bit) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - if (lhs.IsImmediate() && !rhs.IsImmediate()) { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - FoldAdd(inst, is_32_bit); - return; - } - - if (inst.HasAssociatedPseudoOperation()) { - return; - } - - if (!lhs.IsImmediate() && rhs.IsImmediate()) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { - const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); - if (combined == 0) { - inst.ReplaceUsesWith(lhs_inst->GetArg(0)); - return; - } - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - return; - } - if (rhs.IsZero() && carry.IsZero()) { - inst.ReplaceUsesWith(lhs); - return; - } - } - - if (inst.AreAllArgsImmediates()) { - const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); - return; - } -} - -/// Folds AND operations based on the following: -/// -/// 1. imm_x & imm_y -> result -/// 2. x & 0 -> 0 -/// 3. 0 & y -> 0 -/// 4. x & y -> y (where x has all bits set to 1) -/// 5. x & y -> x (where y has all bits set to 1) -/// -void FoldAND(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.HasAllBitsSet()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -/// Folds byte reversal opcodes based on the following: -/// -/// 1. imm -> swap(imm) -/// -void FoldByteReverse(IR::Inst& inst, Op op) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - if (op == Op::ByteReverseWord) { - const u32 result = mcl::bit::swap_bytes_32(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else if (op == Op::ByteReverseHalf) { - const u16 result = mcl::bit::swap_bytes_16(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else { - const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); - inst.ReplaceUsesWith(IR::Value{result}); - } -} - -/// Folds division operations based on the following: -/// -/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) -/// 2. imm_x / imm_y -> result -/// 3. x / 1 -> x -/// -void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { - const auto rhs = inst.GetArg(1); - - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - return; - } - - const auto lhs = inst.GetArg(0); - if (lhs.IsImmediate() && rhs.IsImmediate()) { - if (is_signed) { - const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); - ReplaceUsesWith(inst, is_32_bit, static_cast(result)); - } else { - const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); - } - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(IR::Value{lhs}); - } -} - -// Folds EOR operations based on the following: -// -// 1. imm_x ^ imm_y -> result -// 2. x ^ 0 -> x -// 3. 0 ^ y -> y -// -void FoldEOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -void FoldLeastSignificantByte(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantHalf(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldMostSignificantBit(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); -} - -void FoldMostSignificantWord(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - if (carry_inst) { - carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); - } - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); -} - -// Folds multiplication operations based on the following: -// -// 1. imm_x * imm_y -> result -// 2. x * 0 -> 0 -// 3. 0 * y -> 0 -// 4. x * 1 -> x -// 5. 1 * y -> y -// -void FoldMultiply(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -// Folds NOT operations if the contained value is an immediate. -void FoldNOT(IR::Inst& inst, bool is_32_bit) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - const u64 result = ~operand.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -// Folds OR operations based on the following: -// -// 1. imm_x | imm_y -> result -// 2. x | 0 -> x -// 3. 0 | y -> y -// -void FoldOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -bool FoldShifts(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - // The 32-bit variants can contain 3 arguments, while the - // 64-bit variants only contain 2. - if (inst.NumArgs() == 3 && !carry_inst) { - inst.SetArg(2, IR::Value(false)); - } - - const auto shift_amount = inst.GetArg(1); - - if (shift_amount.IsZero()) { - if (carry_inst) { - carry_inst->ReplaceUsesWith(inst.GetArg(2)); - } - inst.ReplaceUsesWith(inst.GetArg(0)); - return false; - } - - if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { - inst.SetArg(2, IR::Value(false)); - } - - if (!inst.AreAllArgsImmediates() || carry_inst) { - return false; - } - - return true; -} - -void FoldSignExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSignExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSub(IR::Inst& inst, bool is_32_bit) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -void FoldZeroExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldZeroExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{value}); -} -} // Anonymous namespace - -void ConstantPropagation(IR::Block& block) { - for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - - switch (opcode) { - case Op::LeastSignificantWord: - FoldLeastSignificantWord(inst); - break; - case Op::MostSignificantWord: - FoldMostSignificantWord(inst); - break; - case Op::LeastSignificantHalf: - FoldLeastSignificantHalf(inst); - break; - case Op::LeastSignificantByte: - FoldLeastSignificantByte(inst); - break; - case Op::MostSignificantBit: - FoldMostSignificantBit(inst); - break; - case Op::IsZero32: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); - } - break; - case Op::IsZero64: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); - } - break; - case Op::LogicalShiftLeft32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeft64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeftMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftLeftMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::LogicalShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::ArithmeticShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::ArithmeticShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::RotateRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); - } - break; - case Op::RotateRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); - } - break; - case Op::Add32: - case Op::Add64: - FoldAdd(inst, opcode == Op::Add32); - break; - case Op::Sub32: - case Op::Sub64: - FoldSub(inst, opcode == Op::Sub32); - break; - case Op::Mul32: - case Op::Mul64: - FoldMultiply(inst, opcode == Op::Mul32); - break; - case Op::SignedDiv32: - case Op::SignedDiv64: - FoldDivide(inst, opcode == Op::SignedDiv32, true); - break; - case Op::UnsignedDiv32: - case Op::UnsignedDiv64: - FoldDivide(inst, opcode == Op::UnsignedDiv32, false); - break; - case Op::And32: - case Op::And64: - FoldAND(inst, opcode == Op::And32); - break; - case Op::Eor32: - case Op::Eor64: - FoldEOR(inst, opcode == Op::Eor32); - break; - case Op::Or32: - case Op::Or64: - FoldOR(inst, opcode == Op::Or32); - break; - case Op::Not32: - case Op::Not64: - FoldNOT(inst, opcode == Op::Not32); - break; - case Op::SignExtendByteToWord: - case Op::SignExtendHalfToWord: - FoldSignExtendXToWord(inst); - break; - case Op::SignExtendByteToLong: - case Op::SignExtendHalfToLong: - case Op::SignExtendWordToLong: - FoldSignExtendXToLong(inst); - break; - case Op::ZeroExtendByteToWord: - case Op::ZeroExtendHalfToWord: - FoldZeroExtendXToWord(inst); - break; - case Op::ZeroExtendByteToLong: - case Op::ZeroExtendHalfToLong: - case Op::ZeroExtendWordToLong: - FoldZeroExtendXToLong(inst); - break; - case Op::ByteReverseWord: - case Op::ByteReverseHalf: - case Op::ByteReverseDual: - FoldByteReverse(inst, opcode); - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp deleted file mode 100644 index bda9f6efd1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void DeadCodeElimination(IR::Block& block) { - // We iterate over the instructions in reverse order. - // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp deleted file mode 100644 index e87fcc335b..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void IdentityRemovalPass(IR::Block& block) { - std::vector to_invalidate; - - auto iter = block.begin(); - while (iter != block.end()) { - IR::Inst& inst = *iter; - - const size_t num_args = inst.NumArgs(); - for (size_t i = 0; i < num_args; i++) { - while (true) { - IR::Value arg = inst.GetArg(i); - if (!arg.IsIdentity()) - break; - inst.SetArg(i, arg.GetInst()->GetArg(0)); - } - } - - if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { - iter = block.Instructions().erase(inst); - to_invalidate.push_back(&inst); - } else { - ++iter; - } - } - - for (IR::Inst* inst : to_invalidate) { - inst->Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h b/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h deleted file mode 100644 index 5eb1a55100..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h +++ /dev/null @@ -1,127 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization::IRMatcher { - -struct CaptureValue { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value); - } -}; - -struct CaptureInst { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return std::tuple(value.GetInstRecursive()); - } -}; - -struct CaptureUImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsU64()); - } -}; - -struct CaptureSImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsS64()); - } -}; - -template -struct UImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsU64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct SImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsS64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct Inst { -public: - using ReturnType = mp::concat, typename Args::ReturnType...>; - - static std::optional Match(const IR::Inst& inst) { - if (inst.GetOpcode() != Opcode) - return std::nullopt; - if (inst.HasAssociatedPseudoOperation()) - return std::nullopt; - return MatchArgs<0>(inst); - } - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return Match(*value.GetInstRecursive()); - } - -private: - template - static auto MatchArgs(const IR::Inst& inst) -> std::optional>, std::tuple<>>>> { - if constexpr (I >= sizeof...(Args)) { - return std::tuple(); - } else { - using Arg = mp::get>; - - if (const auto arg = Arg::Match(inst.GetArg(I))) { - if (const auto rest = MatchArgs(inst)) { - return std::tuple_cat(*arg, *rest); - } - } - - return std::nullopt; - } - } -}; - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t); -} - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t); -} - -} // namespace Dynarmic::Optimization::IRMatcher diff --git a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp deleted file mode 100644 index a766bdc83f..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2023 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" - -namespace Dynarmic::Optimization { - -void NamingPass(IR::Block& block) { - unsigned name = 1; - for (auto& inst : block) { - inst.SetName(name++); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/passes.h b/src/dynarmic/src/dynarmic/ir/opt/passes.h deleted file mode 100644 index 703145b556..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/passes.h +++ /dev/null @@ -1,47 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -namespace Dynarmic::A32 { -struct UserCallbacks; -} - -namespace Dynarmic::A64 { -struct UserCallbacks; -struct UserConfig; -} // namespace Dynarmic::A64 - -namespace Dynarmic::IR { -class Block; -} - -namespace Dynarmic::Optimization { - -struct PolyfillOptions { - bool sha256 = false; - bool vector_multiply_widen = false; - - bool operator==(const PolyfillOptions&) const = default; -}; - -struct A32GetSetEliminationOptions { - bool convert_nzc_to_nz = false; - bool convert_nz_to_nzc = false; -}; - -void PolyfillPass(IR::Block& block, const PolyfillOptions& opt); -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb); -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt); -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf); -void A64GetSetElimination(IR::Block& block); -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb); -void ConstantPropagation(IR::Block& block); -void DeadCodeElimination(IR::Block& block); -void IdentityRemovalPass(IR::Block& block); -void VerificationPass(const IR::Block& block); -void NamingPass(IR::Block& block); - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp deleted file mode 100644 index 1aa3aea91e..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2022 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -namespace { - -void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - - const IR::U128 t = ir.VectorExtract(x, y, 32); - - IR::U128 result = ir.ZeroVector(); - for (size_t i = 0; i < 4; i++) { - const IR::U32 modified_element = [&] { - const IR::U32 element = ir.VectorGetElement(32, t, i); - const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); - const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); - const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); - }(); - - result = ir.VectorSetElement(32, result, i, modified_element); - } - result = ir.VectorAdd(32, result, x); - - inst.ReplaceUsesWith(result); -} - -void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 z = (IR::U128)inst.GetArg(2); - - const IR::U128 T0 = ir.VectorExtract(y, z, 32); - - const IR::U128 lower_half = [&] { - const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); - const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); - return ir.VectorZeroUpper(tmp5); - }(); - - const IR::U64 upper_half = [&] { - const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - - // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] - const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); - const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); - - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); - return ir.VectorGetElement(64, tmp5, 0); - }(); - - const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); - - inst.ReplaceUsesWith(result); -} - -IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Eor(ir.And(ir.Eor(y, z), x), z); -} - -IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); -} - -IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 x = (IR::U128)inst.GetArg(0); - IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 w = (IR::U128)inst.GetArg(2); - const bool part1 = inst.GetArg(3).GetU1(); - - for (size_t i = 0; i < 4; i++) { - const IR::U32 low_x = ir.VectorGetElement(32, x, 0); - const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); - const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); - const IR::U32 high_x = ir.VectorGetElement(32, x, 3); - - const IR::U32 low_y = ir.VectorGetElement(32, y, 0); - const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); - const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); - const IR::U32 high_y = ir.VectorGetElement(32, y, 3); - - const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); - const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); - - const IR::U32 t = [&] { - const IR::U32 w_element = ir.VectorGetElement(32, w, i); - const IR::U32 sig = SHAhashSIGMA1(ir, low_y); - - return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); - }(); - - const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); - const IR::U32 new_low_y = ir.Add(t, high_x); - - // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] - const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); - const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); - - x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); - y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); - } - - inst.ReplaceUsesWith(part1 ? x : y); -} - -template -void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 n = (IR::U128)inst.GetArg(0); - IR::U128 m = (IR::U128)inst.GetArg(1); - - const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); - const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); - - const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); - - inst.ReplaceUsesWith(result); -} - -} // namespace - -void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { - if (polyfill == PolyfillOptions{}) { - return; - } - - IR::IREmitter ir{block}; - - for (auto& inst : block) { - ir.SetInsertionPointBefore(&inst); - - switch (inst.GetOpcode()) { - case IR::Opcode::SHA256MessageSchedule0: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule0(ir, inst); - } - break; - case IR::Opcode::SHA256MessageSchedule1: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule1(ir, inst); - } - break; - case IR::Opcode::SHA256Hash: - if (polyfill.sha256) { - PolyfillSHA256Hash(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, false>(ir, inst); - } - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp deleted file mode 100644 index c6c2cff231..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/type.h" - -namespace Dynarmic::Optimization { - -void VerificationPass(const IR::Block& block) { - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const IR::Type t1 = inst.GetArg(i).GetType(); - const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); - if (!IR::AreTypesCompatible(t1, t2)) { - std::puts(IR::DumpBlock(block).c_str()); - ASSERT_FALSE("above block failed validation"); - } - } - } - - ankerl::unordered_dense::map actual_uses; - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const auto arg = inst.GetArg(i); - if (!arg.IsImmediate()) { - actual_uses[arg.GetInst()]++; - } - } - } - - for (const auto& pair : actual_uses) { - ASSERT(pair.first->UseCount() == pair.second); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp new file mode 100644 index 0000000000..383b915839 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -0,0 +1,1502 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include + +#include +#include "boost/container/small_vector.hpp" +#include "dynarmic/frontend/A32/a32_ir_emitter.h" +#include "dynarmic/frontend/A32/a32_location_descriptor.h" +#include "dynarmic/frontend/A32/a32_types.h" +#include "dynarmic/frontend/A64/a64_ir_emitter.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/A32/config.h" +#include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/optimization_flags.h" +#include "dynarmic/common/safe_ops.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/microinstruction.h" +#include "dynarmic/ir/opcodes.h" +#include "dynarmic/ir/opt_passes.h" +#include "dynarmic/ir/type.h" +#include "mcl/bit/swap.hpp" +#include "mcl/bit/rotate.hpp" +#include "mcl/iterator/reverse.hpp" + +namespace Dynarmic::Optimization { + +static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { + for (auto& inst : block) { + switch (inst.GetOpcode()) { + case IR::Opcode::A32ReadMemory8: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory16: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory32: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory64: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + default: + break; + } + } +} + +static void FlagsPass(IR::Block& block) { + using Iterator = std::reverse_iterator; + + struct FlagInfo { + bool set_not_required = false; + bool has_value_request = false; + Iterator value_request = {}; + }; + struct ValuelessFlagInfo { + bool set_not_required = false; + }; + ValuelessFlagInfo nzcvq; + ValuelessFlagInfo nzcv; + ValuelessFlagInfo nz; + FlagInfo c_flag; + FlagInfo ge; + + auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(value); + } + info.has_value_request = false; + + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_get = [](FlagInfo& info, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(IR::Value{&*inst}); + } + info.has_value_request = true; + info.value_request = inst; + }; + + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetCFlag: { + do_get(c_flag, inst); + break; + } + case IR::Opcode::A32SetCpsrNZCV: { + if (c_flag.has_value_request) { + ir.SetInsertionPointBefore(inst.base()); // base is one ahead + IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); + c_flag.value_request->ReplaceUsesWith(c); + c_flag.has_value_request = false; + break; // This case will be executed again because of the above + } + + do_set_valueless(nzcv, inst); + + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVRaw: { + if (c_flag.has_value_request) { + nzcv.set_not_required = false; + } + + do_set_valueless(nzcv, inst); + + nzcvq = {}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVQ: { + if (c_flag.has_value_request) { + nzcvq.set_not_required = false; + } + + do_set_valueless(nzcvq, inst); + + nzcv = {.set_not_required = true}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZ: { + do_set_valueless(nz, inst); + + nzcvq = {}; + nzcv = {}; + break; + } + case IR::Opcode::A32SetCpsrNZC: { + if (c_flag.has_value_request) { + c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); + c_flag.has_value_request = false; + } + + if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { + const auto nz_value = inst->GetArg(0); + + inst->Invalidate(); + + ir.SetInsertionPointBefore(inst.base()); + ir.SetCpsrNZ(IR::NZCV{nz_value}); + + nzcvq = {}; + nzcv = {}; + nz = {.set_not_required = true}; + break; + } + + if (nz.set_not_required && c_flag.set_not_required) { + inst->Invalidate(); + } else if (nz.set_not_required) { + inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); + } + nz.set_not_required = true; + c_flag.set_not_required = true; + + nzcv = {}; + nzcvq = {}; + break; + } + case IR::Opcode::A32SetGEFlags: { + do_set(ge, inst->GetArg(0), inst); + break; + } + case IR::Opcode::A32GetGEFlags: { + do_get(ge, inst); + break; + } + case IR::Opcode::A32SetGEFlagsCompressed: { + ge = {.set_not_required = true}; + break; + } + case IR::Opcode::A32OrQFlag: { + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcvq = {}; + nzcv = {}; + nz = {}; + c_flag = {}; + ge = {}; + } + break; + } + } + } +} + +static void RegisterPass(IR::Block& block) { + using Iterator = IR::Block::iterator; + + struct RegInfo { + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array reg_info; + + const auto do_get = [](RegInfo& info, Iterator get_inst) { + if (info.register_value.IsEmpty()) { + info.register_value = IR::Value(&*get_inst); + return; + } + get_inst->ReplaceUsesWith(info.register_value); + }; + + const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { + if (info.last_set_instruction) { + (*info.last_set_instruction)->Invalidate(); + } + info = { + .register_value = value, + .last_set_instruction = set_inst, + }; + }; + + enum class ExtValueType { + Empty, + Single, + Double, + VectorDouble, + VectorQuad, + }; + struct ExtRegInfo { + ExtValueType value_type = {}; + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array ext_reg_info; + + const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { + if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = IR::Value(&*get_inst), + .last_set_instruction = std::nullopt, + }; + } + return; + } + get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); + }; + + const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { + if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + if (std::data(infos)[0].get().last_set_instruction) { + (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); + } + } + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = value, + .last_set_instruction = set_inst, + }; + } + }; + + // Location and version don't matter here. + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + ASSERT(reg != A32::Reg::PC); + const size_t reg_index = size_t(reg); + do_get(reg_info[reg_index], inst); + break; + } + case IR::Opcode::A32SetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + if (reg == A32::Reg::PC) { + break; + } + const auto reg_index = size_t(reg); + do_set(reg_info[reg_index], inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); + break; + } + case IR::Opcode::A32SetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + break; + } + case IR::Opcode::A32SetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst->GetArg(1), + inst); + break; + } + case IR::Opcode::A32GetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + do_ext_get(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_get(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst); + } + break; + } + case IR::Opcode::A32SetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + ir.SetInsertionPointAfter(inst); + const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); + do_ext_set(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + stored_value, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_set(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst->GetArg(1), + inst); + } + break; + } + default: { + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + ext_reg_info = {}; + } + break; + } + } + } +} + +struct A32GetSetEliminationOptions { + bool convert_nzc_to_nz = false; + bool convert_nz_to_nzc = false; +}; + +static void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { + FlagsPass(block); + RegisterPass(block); +} + +static void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { + if (conf.hook_data_cache_operations) { + return; + } + + for (auto& inst : block) { + if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { + continue; + } + + const auto op = static_cast(inst.GetArg(1).GetU64()); + if (op == A64::DataCacheOperation::ZeroByVA) { + A64::IREmitter ir{block}; + ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; + ir.SetInsertionPointBefore(&inst); + + size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); + IR::U64 addr{inst.GetArg(2)}; + + const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); + while (bytes >= 16) { + ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(16)); + bytes -= 16; + } + + while (bytes >= 8) { + ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(8)); + bytes -= 8; + } + + while (bytes >= 4) { + ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(4)); + bytes -= 4; + } + } + inst.Invalidate(); + } +} + +static void A64GetSetElimination(IR::Block& block) { + using Iterator = IR::Block::iterator; + + enum class TrackingType { + W, + X, + S, + D, + Q, + SP, + NZCV, + NZCVRaw, + }; + struct RegisterInfo { + IR::Value register_value; + TrackingType tracking_type; + bool set_instruction_present = false; + Iterator last_set_instruction; + }; + std::array reg_info; + std::array vec_info; + RegisterInfo sp_info; + RegisterInfo nzcv_info; + + const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { + if (info.set_instruction_present) { + info.last_set_instruction->Invalidate(); + block.Instructions().erase(info.last_set_instruction); + } + + info.register_value = value; + info.tracking_type = tracking_type; + info.set_instruction_present = true; + info.last_set_instruction = set_inst; + }; + + const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { + const auto do_nothing = [&] { + info = {}; + info.register_value = IR::Value(&*get_inst); + info.tracking_type = tracking_type; + }; + + if (info.register_value.IsEmpty()) { + do_nothing(); + return; + } + + if (info.tracking_type == tracking_type) { + get_inst->ReplaceUsesWith(info.register_value); + return; + } + + do_nothing(); + }; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A64GetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::W); + break; + } + case IR::Opcode::A64GetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::X); + break; + } + case IR::Opcode::A64GetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::S); + break; + } + case IR::Opcode::A64GetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::D); + break; + } + case IR::Opcode::A64GetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64GetSP: { + do_get(sp_info, inst, TrackingType::SP); + break; + } + case IR::Opcode::A64GetNZCVRaw: { + do_get(nzcv_info, inst, TrackingType::NZCVRaw); + break; + } + case IR::Opcode::A64SetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); + break; + } + case IR::Opcode::A64SetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); + break; + } + case IR::Opcode::A64SetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); + break; + } + case IR::Opcode::A64SetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); + break; + } + case IR::Opcode::A64SetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64SetSP: { + do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); + break; + } + case IR::Opcode::A64SetNZCV: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); + break; + } + case IR::Opcode::A64SetNZCVRaw: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcv_info = {}; + } + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + vec_info = {}; + sp_info = {}; + } + break; + } + } + } +} + +static void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { + const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { + const auto instruction = cb->MemoryReadCode(location.PC()); + if (!instruction) + return false; + + IR::Block new_block{location}; + A64::TranslateSingleInstruction(new_block, location, *instruction); + + if (!new_block.Instructions().empty()) + return false; + + const IR::Terminal terminal = new_block.GetTerminal(); + if (auto term = boost::get(&terminal)) { + return term->next == location; + } + + return false; + }; + + IR::Terminal terminal = block.GetTerminal(); + auto term = boost::get(&terminal); + if (!term) + return; + + A64::LocationDescriptor location{term->next}; + size_t num_instructions = 1; + + while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { + num_instructions++; + } + + term->num_instructions = num_instructions; + block.ReplaceTerminal(terminal); + block.CycleCount() += num_instructions - 1; +} + +using Op = Dynarmic::IR::Opcode; + +// Tiny helper to avoid the need to store based off the opcode +// bit size all over the place within folding functions. +static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { + if (is_32_bit) { + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + } else { + inst.ReplaceUsesWith(IR::Value{value}); + } +} + +static IR::Value Value(bool is_32_bit, u64 value) { + return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; +} + +template +static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + + const bool is_lhs_immediate = lhs.IsImmediate(); + const bool is_rhs_immediate = rhs.IsImmediate(); + + if (is_lhs_immediate && is_rhs_immediate) { + const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); + ReplaceUsesWith(inst, is_32_bit, result); + return false; + } + + if (is_lhs_immediate && !is_rhs_immediate) { + const IR::Inst* rhs_inst = rhs.GetInstRecursive(); + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, rhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } + } + + return true; +} + +static void FoldAdd(IR::Inst& inst, bool is_32_bit) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + if (lhs.IsImmediate() && !rhs.IsImmediate()) { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + FoldAdd(inst, is_32_bit); + return; + } + + if (inst.HasAssociatedPseudoOperation()) { + return; + } + + if (!lhs.IsImmediate() && rhs.IsImmediate()) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { + const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); + if (combined == 0) { + inst.ReplaceUsesWith(lhs_inst->GetArg(0)); + return; + } + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + return; + } + if (rhs.IsZero() && carry.IsZero()) { + inst.ReplaceUsesWith(lhs); + return; + } + } + + if (inst.AreAllArgsImmediates()) { + const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); + return; + } +} + +/// Folds AND operations based on the following: +/// +/// 1. imm_x & imm_y -> result +/// 2. x & 0 -> 0 +/// 3. 0 & y -> 0 +/// 4. x & y -> y (where x has all bits set to 1) +/// 5. x & y -> x (where y has all bits set to 1) +/// +static void FoldAND(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.HasAllBitsSet()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +/// Folds byte reversal opcodes based on the following: +/// +/// 1. imm -> swap(imm) +/// +static void FoldByteReverse(IR::Inst& inst, Op op) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + if (op == Op::ByteReverseWord) { + const u32 result = mcl::bit::swap_bytes_32(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else if (op == Op::ByteReverseHalf) { + const u16 result = mcl::bit::swap_bytes_16(u16(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } +} + +/// Folds division operations based on the following: +/// +/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) +/// 2. imm_x / imm_y -> result +/// 3. x / 1 -> x +/// +static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { + const auto rhs = inst.GetArg(1); + + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + return; + } + + const auto lhs = inst.GetArg(0); + if (lhs.IsImmediate() && rhs.IsImmediate()) { + if (is_signed) { + const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); + ReplaceUsesWith(inst, is_32_bit, static_cast(result)); + } else { + const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); + } + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(IR::Value{lhs}); + } +} + +// Folds EOR operations based on the following: +// +// 1. imm_x ^ imm_y -> result +// 2. x ^ 0 -> x +// 3. 0 ^ y -> y +// +static void FoldEOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static void FoldLeastSignificantByte(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantHalf(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldMostSignificantBit(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); +} + +static void FoldMostSignificantWord(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + if (carry_inst) { + carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); + } + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); +} + +// Folds multiplication operations based on the following: +// +// 1. imm_x * imm_y -> result +// 2. x * 0 -> 0 +// 3. 0 * y -> 0 +// 4. x * 1 -> x +// 5. 1 * y -> y +// +static void FoldMultiply(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +// Folds NOT operations if the contained value is an immediate. +static void FoldNOT(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + const u64 result = ~operand.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +// Folds OR operations based on the following: +// +// 1. imm_x | imm_y -> result +// 2. x | 0 -> x +// 3. 0 | y -> y +// +static void FoldOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static bool FoldShifts(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + // The 32-bit variants can contain 3 arguments, while the + // 64-bit variants only contain 2. + if (inst.NumArgs() == 3 && !carry_inst) { + inst.SetArg(2, IR::Value(false)); + } + + const auto shift_amount = inst.GetArg(1); + + if (shift_amount.IsZero()) { + if (carry_inst) { + carry_inst->ReplaceUsesWith(inst.GetArg(2)); + } + inst.ReplaceUsesWith(inst.GetArg(0)); + return false; + } + + if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { + inst.SetArg(2, IR::Value(false)); + } + + if (!inst.AreAllArgsImmediates() || carry_inst) { + return false; + } + + return true; +} + +static void FoldSignExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSignExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSub(IR::Inst& inst, bool is_32_bit) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return; + } + + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +static void FoldZeroExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldZeroExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{value}); +} + +static void ConstantPropagation(IR::Block& block) { + for (auto& inst : block) { + const auto opcode = inst.GetOpcode(); + + switch (opcode) { + case Op::LeastSignificantWord: + FoldLeastSignificantWord(inst); + break; + case Op::MostSignificantWord: + FoldMostSignificantWord(inst); + break; + case Op::LeastSignificantHalf: + FoldLeastSignificantHalf(inst); + break; + case Op::LeastSignificantByte: + FoldLeastSignificantByte(inst); + break; + case Op::MostSignificantBit: + FoldMostSignificantBit(inst); + break; + case Op::IsZero32: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); + } + break; + case Op::IsZero64: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); + } + break; + case Op::LogicalShiftLeft32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeft64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeftMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftLeftMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::LogicalShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::ArithmeticShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::ArithmeticShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::RotateRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); + } + break; + case Op::RotateRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); + } + break; + case Op::Add32: + case Op::Add64: + FoldAdd(inst, opcode == Op::Add32); + break; + case Op::Sub32: + case Op::Sub64: + FoldSub(inst, opcode == Op::Sub32); + break; + case Op::Mul32: + case Op::Mul64: + FoldMultiply(inst, opcode == Op::Mul32); + break; + case Op::SignedDiv32: + case Op::SignedDiv64: + FoldDivide(inst, opcode == Op::SignedDiv32, true); + break; + case Op::UnsignedDiv32: + case Op::UnsignedDiv64: + FoldDivide(inst, opcode == Op::UnsignedDiv32, false); + break; + case Op::And32: + case Op::And64: + FoldAND(inst, opcode == Op::And32); + break; + case Op::Eor32: + case Op::Eor64: + FoldEOR(inst, opcode == Op::Eor32); + break; + case Op::Or32: + case Op::Or64: + FoldOR(inst, opcode == Op::Or32); + break; + case Op::Not32: + case Op::Not64: + FoldNOT(inst, opcode == Op::Not32); + break; + case Op::SignExtendByteToWord: + case Op::SignExtendHalfToWord: + FoldSignExtendXToWord(inst); + break; + case Op::SignExtendByteToLong: + case Op::SignExtendHalfToLong: + case Op::SignExtendWordToLong: + FoldSignExtendXToLong(inst); + break; + case Op::ZeroExtendByteToWord: + case Op::ZeroExtendHalfToWord: + FoldZeroExtendXToWord(inst); + break; + case Op::ZeroExtendByteToLong: + case Op::ZeroExtendHalfToLong: + case Op::ZeroExtendWordToLong: + FoldZeroExtendXToLong(inst); + break; + case Op::ByteReverseWord: + case Op::ByteReverseHalf: + case Op::ByteReverseDual: + FoldByteReverse(inst, opcode); + break; + default: + break; + } + } +} + +static void DeadCodeElimination(IR::Block& block) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (auto& inst : mcl::iterator::reverse(block)) { + if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { + inst.Invalidate(); + } + } +} + +static void IdentityRemovalPass(IR::Block& block) { + boost::container::small_vector to_invalidate; + + auto iter = block.begin(); + while (iter != block.end()) { + IR::Inst& inst = *iter; + + const size_t num_args = inst.NumArgs(); + for (size_t i = 0; i < num_args; i++) { + while (true) { + IR::Value arg = inst.GetArg(i); + if (!arg.IsIdentity()) + break; + inst.SetArg(i, arg.GetInst()->GetArg(0)); + } + } + + if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { + iter = block.Instructions().erase(inst); + to_invalidate.push_back(&inst); + } else { + ++iter; + } + } + for (IR::Inst* inst : to_invalidate) { + inst->Invalidate(); + } +} + +static void NamingPass(IR::Block& block) { + u32 name = 1; + for (auto& inst : block) + inst.SetName(name++); +} + +static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + + const IR::U128 t = ir.VectorExtract(x, y, 32); + + IR::U128 result = ir.ZeroVector(); + for (size_t i = 0; i < 4; i++) { + const IR::U32 modified_element = [&] { + const IR::U32 element = ir.VectorGetElement(32, t, i); + const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); + const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); + const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); + }(); + + result = ir.VectorSetElement(32, result, i, modified_element); + } + result = ir.VectorAdd(32, result, x); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 z = (IR::U128)inst.GetArg(2); + + const IR::U128 T0 = ir.VectorExtract(y, z, 32); + + const IR::U128 lower_half = [&] { + const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); + const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); + return ir.VectorZeroUpper(tmp5); + }(); + + const IR::U64 upper_half = [&] { + const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + + // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] + const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); + const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); + + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); + return ir.VectorGetElement(64, tmp5, 0); + }(); + + const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); + + inst.ReplaceUsesWith(result); +} + +static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Eor(ir.And(ir.Eor(y, z), x), z); +} + +static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); +} + +static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 x = (IR::U128)inst.GetArg(0); + IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 w = (IR::U128)inst.GetArg(2); + const bool part1 = inst.GetArg(3).GetU1(); + + for (size_t i = 0; i < 4; i++) { + const IR::U32 low_x = ir.VectorGetElement(32, x, 0); + const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); + const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); + const IR::U32 high_x = ir.VectorGetElement(32, x, 3); + + const IR::U32 low_y = ir.VectorGetElement(32, y, 0); + const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); + const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); + const IR::U32 high_y = ir.VectorGetElement(32, y, 3); + + const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); + const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); + + const IR::U32 t = [&] { + const IR::U32 w_element = ir.VectorGetElement(32, w, i); + const IR::U32 sig = SHAhashSIGMA1(ir, low_y); + + return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); + }(); + + const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); + const IR::U32 new_low_y = ir.Add(t, high_x); + + // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] + const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); + const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); + + x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); + y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); + } + + inst.ReplaceUsesWith(part1 ? x : y); +} + +template +static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 n = (IR::U128)inst.GetArg(0); + IR::U128 m = (IR::U128)inst.GetArg(1); + + const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); + const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); + + const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { + if (polyfill == PolyfillOptions{}) { + return; + } + + IR::IREmitter ir{block}; + + for (auto& inst : block) { + ir.SetInsertionPointBefore(&inst); + + switch (inst.GetOpcode()) { + case IR::Opcode::SHA256MessageSchedule0: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule0(ir, inst); + } + break; + case IR::Opcode::SHA256MessageSchedule1: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule1(ir, inst); + } + break; + case IR::Opcode::SHA256Hash: + if (polyfill.sha256) { + PolyfillSHA256Hash(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, false>(ir, inst); + } + break; + default: + break; + } + } +} + +static void VerificationPass(const IR::Block& block) { + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) { + const IR::Type t1 = inst.GetArg(i).GetType(); + const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); + ASSERT_MSG(IR::AreTypesCompatible(t1, t2), "Block failed:\n{}", IR::DumpBlock(block)); + } + } + ankerl::unordered_dense::map actual_uses; + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) + if (IR::Value const arg = inst.GetArg(i); !arg.IsImmediate()) + actual_uses[arg.GetInst()]++; + } + for (auto const& pair : actual_uses) + ASSERT(pair.first->UseCount() == pair.second); +} + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) [[likely]] { + Optimization::A32GetSetElimination(block, {.convert_nzc_to_nz = true}); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + Optimization::IdentityRemovalPass(block); + Optimization::VerificationPass(block); +} + +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::A64CallbackConfigPass(block, conf); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) [[likely]] { + Optimization::A64GetSetElimination(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { + Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); + } + Optimization::VerificationPass(block); +} + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h new file mode 100644 index 0000000000..1963fad0a0 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -0,0 +1,34 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +namespace Dynarmic::A32 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::A64 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::IR { +class Block; +} + +namespace Dynarmic::Optimization { + +struct PolyfillOptions { + bool sha256 = false; + bool vector_multiply_widen = false; + + bool operator==(const PolyfillOptions&) const = default; +}; + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index 4d14141bbf..ad01e5718b 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -29,7 +29,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; @@ -179,13 +179,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 85d86c7966..29a8f23478 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -6,33 +6,24 @@ add_executable(dynarmic_tests fp/mantissa_util_tests.cpp fp/unpacked_tests.cpp rand_int.h + # A32 + A32/test_arm_disassembler.cpp + A32/test_arm_instructions.cpp + A32/test_coprocessor.cpp + A32/test_svc.cpp + A32/test_thumb_instructions.cpp + A32/testenv.h + decoder_tests.cpp + # A64 + A64/a64.cpp + A64/fibonacci.cpp + A64/fp_min_max.cpp + A64/misaligned_page_table.cpp + A64/test_invalidation.cpp + A64/real_world.cpp + A64/testenv.h ) - -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/test_arm_disassembler.cpp - A32/test_arm_instructions.cpp - A32/test_coprocessor.cpp - A32/test_svc.cpp - A32/test_thumb_instructions.cpp - A32/testenv.h - decoder_tests.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) - - target_sources(dynarmic_tests PRIVATE - A64/a64.cpp - A64/fibonacci.cpp - A64/fp_min_max.cpp - A64/misaligned_page_table.cpp - A64/test_invalidation.cpp - A64/real_world.cpp - A64/testenv.h - ) -endif() +target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) if (DYNARMIC_TESTS_USE_UNICORN) target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn) @@ -40,25 +31,17 @@ if (DYNARMIC_TESTS_USE_UNICORN) target_sources(dynarmic_tests PRIVATE fuzz_util.cpp fuzz_util.h + # A32 + A32/fuzz_arm.cpp + A32/fuzz_thumb.cpp + unicorn_emu/a32_unicorn.cpp + unicorn_emu/a32_unicorn.h + # A64 + A64/fuzz_with_unicorn.cpp + A64/verify_unicorn.cpp + unicorn_emu/a64_unicorn.cpp + unicorn_emu/a64_unicorn.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/fuzz_arm.cpp - A32/fuzz_thumb.cpp - unicorn_emu/a32_unicorn.cpp - unicorn_emu/a32_unicorn.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A64/fuzz_with_unicorn.cpp - A64/verify_unicorn.cpp - unicorn_emu/a64_unicorn.cpp - unicorn_emu/a64_unicorn.h - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -69,9 +52,6 @@ if ("x86_64" IN_LIST ARCHITECTURE) target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak) target_architecture_specific_sources(dynarmic_tests "x86_64" x64_cpu_info.cpp - ) - - target_architecture_specific_sources(dynarmic_tests "x86_64" native/preserve_xmm.cpp ) @@ -85,47 +65,47 @@ endif() include(CreateDirectoryGroups) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_print_info - print_info.cpp - ) +# +# dynarmic_print_info +# +add_executable(dynarmic_print_info + print_info.cpp +) +create_target_directory_groups(dynarmic_print_info) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_print_info PRIVATE . ../src) +target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_print_info) +# +# dynarmic_test_generator +# +add_executable(dynarmic_test_generator + fuzz_util.cpp + fuzz_util.h + test_generator.cpp +) - target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_print_info PRIVATE . ../src) - target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +create_target_directory_groups(dynarmic_test_generator) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_generator - fuzz_util.cpp - fuzz_util.h - test_generator.cpp - ) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_generator PRIVATE . ../src) +target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_test_generator) - - target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_generator PRIVATE . ../src) - target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() - -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_reader - test_reader.cpp - ) - - create_target_directory_groups(dynarmic_test_reader) - - target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_reader PRIVATE . ../src) - target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +# +# dynarmic_test_reader +# +add_executable(dynarmic_test_reader + test_reader.cpp +) +create_target_directory_groups(dynarmic_test_reader) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_reader PRIVATE . ../src) +target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) +# create_target_directory_groups(dynarmic_tests) target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index ef8b87bbd1..3263ca729a 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -34,7 +34,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; From da780c1db764968e0feb94209173028e6767cf95 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:57:48 +0000 Subject: [PATCH 02/56] Fix license headers --- src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index 8e48fa3687..01af361b27 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index 2b50ad9ea3..c4c1c42792 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h index 1963fad0a0..88b8020031 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.h +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD From 5eaac3c529dc0305b866104ccb454a335a284952 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 00:11:40 +0000 Subject: [PATCH 03/56] [dynarmic] use small vector experiment Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index bfacdcca52..66c75aead9 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -12,6 +12,7 @@ #include #include +#include "boost/container/small_vector.hpp" #include "dynarmic/common/common_types.h" #include #include @@ -83,7 +84,7 @@ public: void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial - std::vector values; //24 + boost::container::small_vector values; //24 // Block state uint16_t total_uses = 0; //8 //sometimes zeroed @@ -96,7 +97,7 @@ private: uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - alignas(16) uint8_t lru_counter = 0; //1 + uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From ac38005abe79015bbdcb21cf43bf15e16ac45f74 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:07 +0000 Subject: [PATCH 04/56] [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 29eab7908b..919e75b77b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.push_back(inst); ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits::max)()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); + max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); } void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 66c75aead9..2bd274730a 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -78,7 +78,7 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } inline size_t GetMaxBitWidth() const noexcept { - return max_bit_width; + return 1 << max_bit_width; } void AddValue(IR::Inst* inst) noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; @@ -94,10 +94,10 @@ private: uint16_t is_being_used_count = 0; //8 uint16_t current_references = 0; //8 // Value state - uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 + uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6) + uint8_t lru_counter : 2 = 0; //1 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From 254f953e0c997075c6e65a528446265852677deb Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:22 +0000 Subject: [PATCH 05/56] [dynarmic] fix tests_reader and tests_generator Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 2 +- src/dynarmic/tests/A32/testenv.h | 1 - src/dynarmic/tests/A64/testenv.h | 1 - src/dynarmic/tests/CMakeLists.txt | 28 ++++++++++++++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index ee06ccf19a..d6be793dc6 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -375,7 +375,7 @@ target_link_libraries(dynarmic ) if (BOOST_NO_HEADERS) -target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) + target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) else() target_link_libraries(dynarmic PRIVATE Boost::headers) endif() diff --git a/src/dynarmic/tests/A32/testenv.h b/src/dynarmic/tests/A32/testenv.h index a6df2017ce..72eaafce14 100644 --- a/src/dynarmic/tests/A32/testenv.h +++ b/src/dynarmic/tests/A32/testenv.h @@ -17,7 +17,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A32/a32.h" -#include "../native/testenv.h" template class A32TestEnv : public Dynarmic::A32::UserCallbacks { diff --git a/src/dynarmic/tests/A64/testenv.h b/src/dynarmic/tests/A64/testenv.h index 31e338b138..fcdadb23e6 100644 --- a/src/dynarmic/tests/A64/testenv.h +++ b/src/dynarmic/tests/A64/testenv.h @@ -12,7 +12,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A64/a64.h" -#include "../native/testenv.h" using Vector = Dynarmic::A64::Vector; diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 29a8f23478..f8f420e2d6 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -72,7 +72,12 @@ add_executable(dynarmic_print_info print_info.cpp ) create_target_directory_groups(dynarmic_print_info) -target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_print_info PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_print_info PRIVATE . ../src) target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -88,7 +93,12 @@ add_executable(dynarmic_test_generator create_target_directory_groups(dynarmic_test_generator) -target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_generator PRIVATE . ../src) target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -100,7 +110,12 @@ add_executable(dynarmic_test_reader test_reader.cpp ) create_target_directory_groups(dynarmic_test_reader) -target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_reader PRIVATE . ../src) target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -108,7 +123,12 @@ target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LIT # create_target_directory_groups(dynarmic_tests) -target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) +target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_tests PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_tests PRIVATE . ../src) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) From fc642a505da3b0ed6d1d66af5f2a19f954639883 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 09:10:18 +0000 Subject: [PATCH 06/56] [docs] fastmem draft Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 5 +++++ src/dynarmic/docs/Fastmem.svg | 4 ++++ src/dynarmic/docs/HostToGuest.svg | 4 ++++ 3 files changed, 13 insertions(+) create mode 100644 src/dynarmic/docs/FastMemory.md create mode 100644 src/dynarmic/docs/Fastmem.svg create mode 100644 src/dynarmic/docs/HostToGuest.svg diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md new file mode 100644 index 0000000000..2a7c1a2583 --- /dev/null +++ b/src/dynarmic/docs/FastMemory.md @@ -0,0 +1,5 @@ +# Fast memory (Fastmem) + +![Host to guest translation](./HostToGuest.svg) + +![Fastmem translation](./Fastmem.svg) diff --git a/src/dynarmic/docs/Fastmem.svg b/src/dynarmic/docs/Fastmem.svg new file mode 100644 index 0000000000..a3ed0bb68b --- /dev/null +++ b/src/dynarmic/docs/Fastmem.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
SIGSEGV Trap
Fastmem
Only needs to linearly offset from fastmem arena
Less codegen (SIGSEGV traps)
Is fast only if SIGSEGV handlers are sufficiently fast
\ No newline at end of file diff --git a/src/dynarmic/docs/HostToGuest.svg b/src/dynarmic/docs/HostToGuest.svg new file mode 100644 index 0000000000..6a15a44b46 --- /dev/null +++ b/src/dynarmic/docs/HostToGuest.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
Resolver
Host to Guest translation
Looks up correct PTE
Translates each address 
Is slow
\ No newline at end of file From fa33ce477f2804b33969f9f5d4bf553d1e5e14b1 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 10:16:22 +0000 Subject: [PATCH 07/56] [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 6 +- .../frontend/decoder/decoder_detail.h | 65 ++++++++----------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index d2035d0fe0..b74817a611 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,12 +210,8 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 438b7b691c..682b0cfbf6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,12 +269,8 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index bd76efda2a..4050033ef5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -70,11 +70,9 @@ struct detail { return std::make_tuple(mask, expect); } - /** - * Generates the masks and shifts for each argument. - * A '-' in a bitstring indicates that we don't care about that value. - * An argument is specified by a continuous string of the same character. - */ + /// @brief Generates the masks and shifts for each argument. + /// A '-' in a bitstring indicates that we don't care about that value. + /// An argument is specified by a continuous string of the same character. template static consteval auto GetArgInfo(std::array bitstring) { std::array masks = {}; @@ -98,7 +96,6 @@ struct detail { if constexpr (N > 0) { const size_t bit_position = opcode_bitsize - i - 1; - if (arg_index >= N) throw std::out_of_range("Unexpected field"); @@ -109,20 +106,16 @@ struct detail { } } } - #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) // Avoids a MSVC ICE, and avoids Android NDK issue. ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); #endif - return std::make_tuple(masks, shifts); } - /** - * This struct's Make member function generates a lambda which decodes an instruction based on - * the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a - * template argument. - */ + /// @brief This struct's Make member function generates a lambda which decodes an instruction + /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is + /// provided as a template argument. template struct VisitorCaller; @@ -130,36 +123,36 @@ struct detail { # pragma warning(push) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) #endif - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...), + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...), const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...) const, + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...) const, const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; @@ -167,22 +160,16 @@ struct detail { # pragma warning(pop) #endif - /** - * Creates a matcher that can match and parse instructions based on bitstring. - * See also: GetMaskAndExpect and GetArgInfo for format of bitstring. - */ - template - static auto GetMatcher(FnT fn, const char* const name) { - constexpr size_t args_count = mcl::parameter_count_v; - + /// @brief Creates a matcher that can match and parse instructions based on bitstring. + /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. + template + static constexpr auto GetMatcher(F fn, const char* const name) { + constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); - - using Iota = std::make_index_sequence; - - const auto proxy_fn = VisitorCaller::Make(Iota(), fn, arg_masks, arg_shifts); + const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); return MatcherT(name, mask, expect, proxy_fn); } }; From 64f1c7c3e3da5d09857a16b5a7b9e2f3da319bad Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 4 Sep 2025 07:24:49 +0000 Subject: [PATCH 08/56] [dynarmic] reduce matcher table noise and cache misses Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/a32_types.h | 22 +++++---------- .../src/dynarmic/frontend/A32/decoder/asimd.h | 27 +++++++++---------- .../src/dynarmic/frontend/A32/decoder/vfp.h | 12 +++------ .../src/dynarmic/frontend/A64/decoder/a64.h | 23 +++++++--------- .../frontend/decoder/decoder_detail.h | 6 ++--- .../src/dynarmic/frontend/decoder/matcher.h | 11 ++------ 6 files changed, 38 insertions(+), 63 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index 3f4501a528..2a0cc25751 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -9,12 +9,9 @@ #pragma once #include -#include - #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/ir/cond.h" @@ -89,24 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) { inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); - return static_cast(reg); + return size_t(reg); } inline size_t RegNumber(ExtReg reg) { if (IsSingleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::S0); + return size_t(reg) - size_t(ExtReg::S0); + } else if (IsDoubleExtReg(reg)) { + return size_t(reg) - size_t(ExtReg::D0); } - - if (IsDoubleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::D0); - } - - if (IsQuadExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::Q0); - } - - ASSERT_MSG(false, "Invalid extended register"); - return 0; + ASSERT(IsQuadExtReg(reg)); + return size_t(reg) - size_t(ExtReg::Q0); } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f2e206695b..f63816d2b1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher; template std::vector> GetASIMDDecodeTable() { - std::vector> table = { - -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> table = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" #undef INST - }; - // Exceptions to the rule of thumb. const std::set comes_first{ "VBIC, VMOV, VMVN, VORR (immediate)", @@ -53,19 +50,21 @@ std::vector> GetASIMDDecodeTable() { "VQDMULH (scalar)", "VQRDMULH (scalar)", }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_first.count(matcher.GetName()) > 0; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_last.count(matcher.GetName()) == 0; + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - - return table; + std::vector> final_table; + std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + return e.second; + }); + return final_table; } template diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index 5fcacd2bda..a969570547 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -30,22 +30,18 @@ std::optional>> DecodeVFP(u32 instruc static const struct Tables { Table unconditional; Table conditional; - } tables = [] { + } tables = []() { Table list = { - #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./vfp.inc" #undef INST - }; - - const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return (matcher.GetMask() & 0xF0000000) == 0xF0000000; }); - return Tables{ - Table{list.begin(), division}, - Table{division, list.end()}, + Table{list.begin(), it}, + Table{it, list.end()}, }; }(); diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c651dd7cde..c7bc981ac6 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) { template constexpr DecodeTable GetDecodeTable() { - std::vector> list = { -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" #undef INST }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { return std::set{ "MOVI, MVNI, ORR, BIC (vector, immediate)", "FMOV (vector, immediate)", "Unallocated SIMD modified immediate", - }.count(matcher.GetName()) > 0; + }.count(e.first) > 0; }); - DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); + for (auto const& e : list) { + const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e.second); } } } diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 4050033ef5..0d9da6169c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -163,18 +163,18 @@ struct detail { /// @brief Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template - static constexpr auto GetMatcher(F fn, const char* const name) { + static constexpr auto GetMatcher(F fn) { constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); - return MatcherT(name, mask, expect, proxy_fn); + return MatcherT(mask, expect, proxy_fn); } }; -#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn, name) +#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn) } // namespace detail } // namespace Dynarmic::Decoder diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 7e5c9c5a8f..5a2afe57c5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -31,14 +31,8 @@ public: using visitor_type = Visitor; using handler_return_type = typename Visitor::instruction_return_type; using handler_function = std::function; - - Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) - : name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {} - - /// Gets the name of this type of instruction. - const char* GetName() const { - return name; - } + Matcher(opcode_type mask, opcode_type expected, handler_function func) + : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. opcode_type GetMask() const { @@ -70,7 +64,6 @@ public: } private: - const char* name; opcode_type mask; opcode_type expected; handler_function fn; From cff1ede8fb36761d9c13cee3e4d8c6724ea04ec9 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 20:59:23 +0000 Subject: [PATCH 09/56] [dynarmic] fix tests Signed-off-by: lizzie --- .../src/dynarmic/frontend/decoder/matcher.h | 40 +++++++----------- src/dynarmic/tests/A32/fuzz_arm.cpp | 1 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 1 + .../tests/A32/test_arm_instructions.cpp | 1 + src/dynarmic/tests/A32/test_coprocessor.cpp | 1 + src/dynarmic/tests/A32/test_svc.cpp | 1 + .../tests/A32/test_thumb_instructions.cpp | 1 + src/dynarmic/tests/A64/a64.cpp | 1 + src/dynarmic/tests/A64/fp_min_max.cpp | 1 + src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 1 + .../tests/A64/misaligned_page_table.cpp | 1 + src/dynarmic/tests/A64/real_world.cpp | 1 + src/dynarmic/tests/A64/test_invalidation.cpp | 1 + src/dynarmic/tests/decoder_tests.cpp | 2 + src/dynarmic/tests/native/preserve_xmm.cpp | 1 + src/dynarmic/tests/print_info.cpp | 42 ++++--------------- 16 files changed, 39 insertions(+), 58 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 5a2afe57c5..f7e2884e0c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -14,16 +14,12 @@ namespace Dynarmic::Decoder { -/** - * Generic instruction handling construct. - * - * @tparam Visitor An arbitrary visitor type that will be passed through - * to the function being handled. This type must be the - * type of the first parameter in a handler function. - * - * @tparam OpcodeType Type representing an opcode. This must be the - * type of the second parameter in a handler function. - */ +/// Generic instruction handling construct. +/// @tparam Visitor An arbitrary visitor type that will be passed through +/// to the function being handled. This type must be the +/// type of the first parameter in a handler function. +/// @tparam OpcodeType Type representing an opcode. This must be the +/// type of the second parameter in a handler function. template class Matcher { public: @@ -35,30 +31,26 @@ public: : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. - opcode_type GetMask() const { + inline opcode_type GetMask() const noexcept { return mask; } /// Gets the expected value after masking for this instruction. - opcode_type GetExpected() const { + inline opcode_type GetExpected() const noexcept { return expected; } - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - bool Matches(opcode_type instruction) const { + /// Tests to see if the given instruction is the instruction this matcher represents. + /// @param instruction The instruction to test + /// @returns true if the given instruction matches. + inline bool Matches(opcode_type instruction) const noexcept { return (instruction & mask) == expected; } - /** - * Calls the corresponding instruction handler on visitor for this type of instruction. - * @param v The visitor to use - * @param instruction The instruction to decode. - */ - handler_return_type call(Visitor& v, opcode_type instruction) const { + /// Calls the corresponding instruction handler on visitor for this type of instruction. + /// @param v The visitor to use + /// @param instruction The instruction to decode. + inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept { ASSERT(Matches(instruction)); return fn(v, instruction); } diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index 087ce54813..5ee6d2bf02 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -24,6 +24,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index ad01e5718b..7f64cb0ccb 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -22,6 +22,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/FPSCR.h" #include "dynarmic/frontend/A32/PSR.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" diff --git a/src/dynarmic/tests/A32/test_arm_instructions.cpp b/src/dynarmic/tests/A32/test_arm_instructions.cpp index 0411877823..c007a18299 100644 --- a/src/dynarmic/tests/A32/test_arm_instructions.cpp +++ b/src/dynarmic/tests/A32/test_arm_instructions.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" diff --git a/src/dynarmic/tests/A32/test_coprocessor.cpp b/src/dynarmic/tests/A32/test_coprocessor.cpp index 98da0e5d34..49cb42bdf3 100644 --- a/src/dynarmic/tests/A32/test_coprocessor.cpp +++ b/src/dynarmic/tests/A32/test_coprocessor.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/tests/A32/test_svc.cpp b/src/dynarmic/tests/A32/test_svc.cpp index 8b55d6537c..998566130e 100644 --- a/src/dynarmic/tests/A32/test_svc.cpp +++ b/src/dynarmic/tests/A32/test_svc.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A32/test_thumb_instructions.cpp b/src/dynarmic/tests/A32/test_thumb_instructions.cpp index 3501d5419f..d509acdd8d 100644 --- a/src/dynarmic/tests/A32/test_thumb_instructions.cpp +++ b/src/dynarmic/tests/A32/test_thumb_instructions.cpp @@ -10,6 +10,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A32/a32.h" static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) { diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index 40eff1f071..24e92d1210 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -7,6 +7,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/A64/fp_min_max.cpp b/src/dynarmic/tests/A64/fp_min_max.cpp index d8b45db807..1669b63071 100644 --- a/src/dynarmic/tests/A64/fp_min_max.cpp +++ b/src/dynarmic/tests/A64/fp_min_max.cpp @@ -12,6 +12,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp index 3322af06e7..45ba728917 100644 --- a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp +++ b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp @@ -19,6 +19,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a64_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A64/misaligned_page_table.cpp b/src/dynarmic/tests/A64/misaligned_page_table.cpp index 8235e14a67..ecba5a3efa 100644 --- a/src/dynarmic/tests/A64/misaligned_page_table.cpp +++ b/src/dynarmic/tests/A64/misaligned_page_table.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_access_via_page_table is set", "[a64]") { diff --git a/src/dynarmic/tests/A64/real_world.cpp b/src/dynarmic/tests/A64/real_world.cpp index 07532d95af..a083f16d61 100644 --- a/src/dynarmic/tests/A64/real_world.cpp +++ b/src/dynarmic/tests/A64/real_world.cpp @@ -5,6 +5,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/test_invalidation.cpp b/src/dynarmic/tests/A64/test_invalidation.cpp index 168043c1cb..8a63776fa4 100644 --- a/src/dynarmic/tests/A64/test_invalidation.cpp +++ b/src/dynarmic/tests/A64/test_invalidation.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index e545309960..777ea8b510 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,6 +20,7 @@ using namespace Dynarmic; +/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -76,3 +77,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { } while (x != 0); } } +*/ diff --git a/src/dynarmic/tests/native/preserve_xmm.cpp b/src/dynarmic/tests/native/preserve_xmm.cpp index 0f69697b7a..7421252063 100644 --- a/src/dynarmic/tests/native/preserve_xmm.cpp +++ b/src/dynarmic/tests/native/preserve_xmm.cpp @@ -6,6 +6,7 @@ #include #include "../A64/testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 3263ca729a..0c89e780ad 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -32,6 +32,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/frontend/A64/translate/impl/impl.h" #include "dynarmic/interface/A32/a32.h" +#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/opt_passes.h" @@ -39,20 +40,20 @@ using namespace Dynarmic; const char* GetNameOfA32Instruction(u32 instruction) { - if (auto vfp_decoder = A32::DecodeVFP(instruction)) { + /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { return vfp_decoder->get().GetName(); } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { return asimd_decoder->get().GetName(); } else if (auto decoder = A32::DecodeArm(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } const char* GetNameOfA64Instruction(u32 instruction) { - if (auto decoder = A64::Decode(instruction)) { + /*if (auto decoder = A64::Decode(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } @@ -64,18 +65,9 @@ void PrintA32Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -88,18 +80,9 @@ void PrintA64Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A64::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A64::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -115,18 +98,9 @@ void PrintThumbInstruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } From d282f639153c1a0b04e011f270f07d8a895713b7 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:07:18 +0000 Subject: [PATCH 10/56] [dynarmic] fix ASIMD execution Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f63816d2b1..2cea7a14c1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -61,7 +61,7 @@ std::vector> GetASIMDDecodeTable() { return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); std::vector> final_table; - std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) { return e.second; }); return final_table; From 57e76e433ef6eee0ce8a289f5071ed7ab3b1932f Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:40:13 +0000 Subject: [PATCH 11/56] [dynarmic] add back encoding names (for print_info) Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/decoder/arm.h | 20 ++++++++++--- .../src/dynarmic/frontend/A32/decoder/asimd.h | 26 ++++++++++++----- .../dynarmic/frontend/A32/decoder/thumb16.h | 24 +++++++++++----- .../dynarmic/frontend/A32/decoder/thumb32.h | 24 +++++++++++----- .../src/dynarmic/frontend/A32/decoder/vfp.h | 24 +++++++++++----- .../A32/translate/translate_thumb.cpp | 2 +- .../src/dynarmic/frontend/A64/decoder/a64.h | 20 ++++++++++--- src/dynarmic/tests/decoder_tests.cpp | 28 ++++++------------- src/dynarmic/tests/print_info.cpp | 22 +++++++-------- 9 files changed, 122 insertions(+), 68 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index 0257c28ddb..c6f034ae21 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -30,13 +30,13 @@ template using ArmDecodeTable = std::array>, 0x1000>; namespace detail { -inline size_t ToFastLookupIndexArm(u32 instruction) { +inline size_t ToFastLookupIndexArm(u32 instruction) noexcept { return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0); } } // namespace detail template -constexpr ArmDecodeTable GetArmDecodeTable() { +constexpr ArmDecodeTable GetArmDecodeTable() noexcept { std::vector> list = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" @@ -62,15 +62,27 @@ constexpr ArmDecodeTable GetArmDecodeTable() { } template -std::optional>> DecodeArm(u32 instruction) { +std::optional>> DecodeArm(u32 instruction) noexcept { alignas(64) static const auto table = GetArmDecodeTable(); const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameARM(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./arm.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index 2cea7a14c1..57e1392871 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -26,7 +26,7 @@ template using ASIMDMatcher = Decoder::Matcher; template -std::vector> GetASIMDDecodeTable() { +std::vector> GetASIMDDecodeTable() noexcept { std::vector>> table = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" @@ -68,13 +68,25 @@ std::vector> GetASIMDDecodeTable() { } template -std::optional>> DecodeASIMD(u32 instruction) { - static const auto table = GetASIMDDecodeTable(); - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); +std::optional>> DecodeASIMD(u32 instruction) noexcept { + alignas(64) static const auto table = GetASIMDDecodeTable(); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameASIMD(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./asimd.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h index 8073ee5d47..16b99ba5aa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h @@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher; template std::optional>> DecodeThumb16(u16 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #include "./thumb16.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb16(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) }, +#include "./thumb16.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h index 86a4d767a7..19418de67c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h @@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher; template std::optional>> DecodeThumb32(u32 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./thumb32.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb32(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./thumb32.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index a969570547..a346304a9a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -26,8 +26,7 @@ using VFPMatcher = Decoder::Matcher; template std::optional>> DecodeVFP(u32 instruction) { using Table = std::vector>; - - static const struct Tables { + alignas(64) static const struct Tables { Table unconditional; Table conditional; } tables = []() { @@ -44,14 +43,25 @@ std::optional>> DecodeVFP(u32 instruc Table{it, list.end()}, }; }(); - const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000; const Table& table = is_unconditional ? tables.unconditional : tables.conditional; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameVFP(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./vfp.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp index 5bb516ccfd..0381c984cc 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp @@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) { return 0xF7F0A000; // UDF } -bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) { +inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept { return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c7bc981ac6..7e6cdc3935 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -71,12 +71,24 @@ constexpr DecodeTable GetDecodeTable() { template std::optional>> Decode(u32 instruction) { alignas(64) static const auto table = GetDecodeTable(); - const auto matches_instruction = [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }; const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; - auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); + auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetName(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./a64.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A64 diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index 777ea8b510..4ad9d90833 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,7 +20,6 @@ using namespace Dynarmic; -/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -37,22 +36,12 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto is_decode_error = [&get_ir](const A32::ASIMDMatcher& matcher, u32 instruction) { const auto block = get_ir(matcher, instruction); - - for (const auto& ir_inst : block) { - if (ir_inst.GetOpcode() == IR::Opcode::A32ExceptionRaised) { - if (static_cast(ir_inst.GetArg(1).GetU64()) == A32::Exception::DecodeError) { - return true; - } - } - } - return false; + return std::find_if(block.cbegin(), block.cend(), [](auto const& e) { + return e.GetOpcode() == IR::Opcode::A32ExceptionRaised && A32::Exception(e.GetArg(1).GetU64()) == A32::Exception::DecodeError; + }) != block.cend(); }; for (auto iter = table.cbegin(); iter != table.cend(); ++iter) { - if (std::strncmp(iter->GetName(), "UNALLOCATED", 11) == 0) { - continue; - } - const u32 expect = iter->GetExpected(); const u32 mask = iter->GetMask(); u32 x = 0; @@ -60,15 +49,17 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const u32 instruction = expect | x; const bool iserr = is_decode_error(*iter, instruction); - const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { return m.Matches(instruction); }); + const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { + return m.Matches(instruction); + }); const bool altiserr = is_decode_error(*alternative, instruction); INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction); INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect); INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x); - INFO("Name: " << iter->GetName()); + INFO("Name: " << *A32::GetNameASIMD(instruction)); INFO("iserr: " << iserr); - INFO("alternative: " << alternative->GetName()); + //INFO("alternative: " << alternative->GetName()); INFO("altiserr: " << altiserr); REQUIRE(((!iserr && alternative == iter) || (iserr && alternative != iter && !altiserr))); @@ -76,5 +67,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { x = ((x | mask) + 1) & ~mask; } while (x != 0); } -} -*/ +} \ No newline at end of file diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 0c89e780ad..8936f32bd3 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -39,21 +39,19 @@ using namespace Dynarmic; -const char* GetNameOfA32Instruction(u32 instruction) { - /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { - return vfp_decoder->get().GetName(); - } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { - return asimd_decoder->get().GetName(); - } else if (auto decoder = A32::DecodeArm(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA32Instruction(u32 instruction) { + if (auto const vfp_decoder = A32::DecodeVFP(instruction)) + return *A32::GetNameVFP(instruction); + else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) + return *A32::GetNameASIMD(instruction); + else if (auto const decoder = A32::DecodeArm(instruction)) + return *A32::GetNameARM(instruction); return ""; } -const char* GetNameOfA64Instruction(u32 instruction) { - /*if (auto decoder = A64::Decode(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA64Instruction(u32 instruction) { + if (auto const decoder = A64::Decode(instruction)) + return *A64::GetName(instruction); return ""; } From d2665904d2939ba92da935305bd8354766480cda Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 14:08:17 +0000 Subject: [PATCH 12/56] [dynarmic] use ARCHITECTURE_ macros instead of MCL ones Signed-off-by: lizzie --- .../src/dynarmic/backend/exception_handler.h | 18 +- .../backend/exception_handler_generic.cpp | 6 +- .../backend/exception_handler_macos.cpp | 12 +- .../backend/exception_handler_macos_mig.c | 4 +- .../backend/exception_handler_posix.cpp | 184 ++++++++---------- .../backend/exception_handler_windows.cpp | 4 +- src/dynarmic/src/dynarmic/common/context.h | 120 ++++++++++++ 7 files changed, 220 insertions(+), 128 deletions(-) create mode 100644 src/dynarmic/src/dynarmic/common/context.h diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler.h b/src/dynarmic/src/dynarmic/backend/exception_handler.h index 173949628c..cd274b111f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler.h +++ b/src/dynarmic/src/dynarmic/backend/exception_handler.h @@ -15,15 +15,15 @@ #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) namespace Dynarmic::Backend::X64 { class BlockOfCode; } // namespace Dynarmic::Backend::X64 -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) namespace oaknut { class CodeBlock; } // namespace oaknut -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) namespace Dynarmic::Backend::RV64 { class CodeBlock; } // namespace Dynarmic::Backend::RV64 @@ -33,16 +33,16 @@ class CodeBlock; namespace Dynarmic::Backend { -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) struct FakeCall { u64 call_rip; u64 ret_rip; }; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) struct FakeCall { u64 call_pc; }; -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) struct FakeCall { }; #else @@ -54,11 +54,11 @@ public: ExceptionHandler(); ~ExceptionHandler(); -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void Register(X64::BlockOfCode& code); -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void Register(oaknut::CodeBlock& mem, std::size_t mem_size); -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void Register(RV64::CodeBlock& mem, std::size_t mem_size); #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp index ad7df25ca6..985536d9f0 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp @@ -13,15 +13,15 @@ struct ExceptionHandler::Impl final { ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode&) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock&, std::size_t) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock&, std::size_t) { // Do nothing } diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp index 52bcf5972f..76e517f05b 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp @@ -25,7 +25,7 @@ #include "dynarmic/backend/exception_handler.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" # define mig_external extern "C" @@ -36,7 +36,7 @@ using dynarmic_thread_state_t = x86_thread_state64_t; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include # define mig_external extern "C" @@ -133,7 +133,7 @@ void MachHandler::MessagePump() { } } -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -151,7 +151,7 @@ kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { return KERN_SUCCESS; } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) kern_return_t MachHandler::HandleRequest(arm_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -269,13 +269,13 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { const u64 code_begin = mcl::bit_cast(code.getCode()); const u64 code_end = code_begin + code.GetTotalCodeSize(); impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c index 762a80ca42..25678ab115 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/mig/mach_exc_server.c" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/arm64/mig/mach_exc_server.c" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index d0653eceab..3f0643c087 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -7,36 +7,20 @@ */ #include -#include -#include #include -#include #include -#include -#ifdef __APPLE__ -# include -# include -#else -# include -# ifndef __OpenBSD__ -# include -# endif -# ifdef __sun__ -# include -# endif -#endif - -#include - #include "dynarmic/backend/exception_handler.h" +#include "dynarmic/common/assert.h" +#include "dynarmic/common/context.h" +#include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) -# include "dynarmic/backend/x64/block_of_code.h" -#elif defined(MCL_ARCHITECTURE_ARM64) -# include +#if defined(ARCHITECTURE_x86_64) +# include "dynarmic/backend/x64/block_of_code.h" +#elif defined(ARCHITECTURE_arm64) +# include # include "dynarmic/backend/arm64/abi.h" -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) # include "dynarmic/backend/riscv64/code_block.h" #else # error "Invalid architecture" @@ -127,41 +111,68 @@ void RegisterHandler() { } } -void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { - DEBUG_ASSERT(sig == SIGSEGV || sig == SIGBUS); -#ifndef MCL_ARCHITECTURE_RISCV - ucontext_t* ucontext = reinterpret_cast(raw_context); -#ifndef __OpenBSD__ - auto& mctx = ucontext->uc_mcontext; -#endif -#endif +SigHandler::SigHandler() { + const size_t signal_stack_size = std::max(SIGSTKSZ, 2 * 1024 * 1024); -#if defined(MCL_ARCHITECTURE_X86_64) -# if defined(__APPLE__) -# define CTX_RIP (mctx->__ss.__rip) -# define CTX_RSP (mctx->__ss.__rsp) -# elif defined(__linux__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# elif defined(__FreeBSD__) -# define CTX_RIP (mctx.mc_rip) -# define CTX_RSP (mctx.mc_rsp) -# elif defined(__NetBSD__) -# define CTX_RIP (mctx.__gregs[_REG_RIP]) -# define CTX_RSP (mctx.__gregs[_REG_RSP]) -# elif defined(__OpenBSD__) -# define CTX_RIP (ucontext->sc_rip) -# define CTX_RSP (ucontext->sc_rsp) -# elif defined(__sun__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# else -# error "Unknown platform" -# endif + signal_stack_memory = std::malloc(signal_stack_size); + + stack_t signal_stack; + signal_stack.ss_sp = signal_stack_memory; + signal_stack.ss_size = signal_stack_size; + signal_stack.ss_flags = 0; + if (sigaltstack(&signal_stack, nullptr) != 0) { + fmt::print(stderr, "dynarmic: POSIX SigHandler: init failure at sigaltstack\n"); + supports_fast_mem = false; + return; + } + + struct sigaction sa; + sa.sa_handler = nullptr; + sa.sa_sigaction = &SigHandler::SigAction; + sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, &old_sa_segv) != 0) { + fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGSEGV handler\n"); + supports_fast_mem = false; + return; + } +#ifdef __APPLE__ + if (sigaction(SIGBUS, &sa, &old_sa_bus) != 0) { + fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGBUS handler\n"); + supports_fast_mem = false; + return; + } +#endif +} + +SigHandler::~SigHandler() { + std::free(signal_stack_memory); +} + +void SigHandler::AddCodeBlock(CodeBlockInfo cbi) { + std::lock_guard guard(code_block_infos_mutex); + if (auto const iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) + code_block_infos.erase(iter); + code_block_infos.push_back(cbi); +} + +void SigHandler::RemoveCodeBlock(u64 host_pc) { + std::lock_guard guard(code_block_infos_mutex); + const auto iter = FindCodeBlockInfo(host_pc); + if (iter != code_block_infos.end()) + code_block_infos.erase(iter); +} + +void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { + ASSERT(sig == SIGSEGV || sig == SIGBUS); + CTX_DECLARE(raw_context); +#if defined(ARCHITECTURE_x86_64) { - std::shared_lock guard(sig_handler->code_block_infos_mutex); - if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->second.cb(CTX_RIP); + std::lock_guard guard(sig_handler->code_block_infos_mutex); + + const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); + if (iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->cb(CTX_RIP); CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; @@ -169,58 +180,19 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { } } fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); -#elif defined(MCL_ARCHITECTURE_ARM64) -# if defined(__APPLE__) -# define CTX_PC (mctx->__ss.__pc) -# define CTX_SP (mctx->__ss.__sp) -# define CTX_LR (mctx->__ss.__lr) -# define CTX_X(i) (mctx->__ss.__x[i]) -# define CTX_Q(i) (mctx->__ns.__v[i]) -# elif defined(__linux__) -# define CTX_PC (mctx.pc) -# define CTX_SP (mctx.sp) -# define CTX_LR (mctx.regs[30]) -# define CTX_X(i) (mctx.regs[i]) -# define CTX_Q(i) (fpctx->vregs[i]) - [[maybe_unused]] const auto fpctx = [&mctx] { - _aarch64_ctx* header = (_aarch64_ctx*)&mctx.__reserved; - while (header->magic != FPSIMD_MAGIC) { - ASSERT(header->magic && header->size); - header = (_aarch64_ctx*)((char*)header + header->size); - } - return (fpsimd_context*)header; - }(); -# elif defined(__FreeBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__NetBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__OpenBSD__) -# define CTX_PC (ucontext->sc_elr) -# define CTX_SP (ucontext->sc_sp) -# define CTX_LR (ucontext->sc_lr) -# define CTX_X(i) (ucontext->sc_x[i]) -# define CTX_Q(i) (ucontext->sc_q[i]) -# else -# error "Unknown platform" -# endif +#elif defined(ARCHITECTURE_arm64) + CTX_DECLARE(raw_context); { - std::shared_lock guard(sig_handler->code_block_infos_mutex); - if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->second.cb(CTX_PC); + std::lock_guard guard(sig_handler->code_block_infos_mutex); + const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); + if (iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->cb(CTX_PC); CTX_PC = fc.call_pc; return; } } fmt::print(stderr, "Unhandled {} at pc {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_PC); -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) ASSERT_FALSE("Unimplemented"); #else # error "Invalid architecture" @@ -269,15 +241,15 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { impl = std::make_unique(mcl::bit_cast(code.getCode()), code.GetTotalCodeSize()); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp index 719c007594..9e76cae912 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/exception_handler_windows.cpp" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/exception_handler_generic.cpp" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/common/context.h b/src/dynarmic/src/dynarmic/common/context.h new file mode 100644 index 0000000000..0eb128449c --- /dev/null +++ b/src/dynarmic/src/dynarmic/common/context.h @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#ifdef __APPLE__ +# include +# include +#else +# include +# ifndef __OpenBSD__ +# include +# endif +# ifdef __sun__ +# include +# endif +# ifdef __linux__ +# include +# endif +#endif + +#ifdef ARCHITECTURE_x86_64 +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; +# endif +#elif defined(ARCHITECTURE_arm64) +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; \ + [[maybe_unused]] const auto fpctx = GetFloatingPointState(mctx); +# endif +#endif + +#if defined(ARCHITECTURE_x86_64) +# if defined(__APPLE__) +# define CTX_RIP (mctx->__ss.__rip) +# define CTX_RSP (mctx->__ss.__rsp) +# elif defined(__linux__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# elif defined(__FreeBSD__) +# define CTX_RIP (mctx.mc_rip) +# define CTX_RSP (mctx.mc_rsp) +# elif defined(__NetBSD__) +# define CTX_RIP (mctx.__gregs[_REG_RIP]) +# define CTX_RSP (mctx.__gregs[_REG_RSP]) +# elif defined(__OpenBSD__) +# define CTX_RIP (ucontext->sc_rip) +# define CTX_RSP (ucontext->sc_rsp) +# elif defined(__sun__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# else +# error "Unknown platform" +# endif +#elif defined(ARCHITECTURE_arm64) +# if defined(__APPLE__) +# define CTX_PC (mctx->__ss.__pc) +# define CTX_SP (mctx->__ss.__sp) +# define CTX_LR (mctx->__ss.__lr) +# define CTX_PSTATE (mctx->__ss.__cpsr) +# define CTX_X(i) (mctx->__ss.__x[i]) +# define CTX_Q(i) (mctx->__ns.__v[i]) +# define CTX_FPSR (mctx->__ns.__fpsr) +# define CTX_FPCR (mctx->__ns.__fpcr) +# elif defined(__linux__) +# define CTX_PC (mctx.pc) +# define CTX_SP (mctx.sp) +# define CTX_LR (mctx.regs[30]) +# define CTX_PSTATE (mctx.pstate) +# define CTX_X(i) (mctx.regs[i]) +# define CTX_Q(i) (fpctx->vregs[i]) +# define CTX_FPSR (fpctx->fpsr) +# define CTX_FPCR (fpctx->fpcr) +# elif defined(__FreeBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__NetBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__OpenBSD__) +# define CTX_PC (ucontext->sc_elr) +# define CTX_SP (ucontext->sc_sp) +# define CTX_LR (ucontext->sc_lr) +# define CTX_X(i) (ucontext->sc_x[i]) +# define CTX_Q(i) (ucontext->sc_q[i]) +# else +# error "Unknown platform" +# endif +#else +# error "unimplemented" +#endif + +#ifdef ARCHITECTURE_arm64 +#ifdef __APPLE__ +inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) { + return &(host_ctx->__ns); +} +#elif defined(__linux__) +inline fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { + _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved); + while (header->magic != FPSIMD_MAGIC) + header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast(header) + header->size); + return reinterpret_cast(header); +} +#endif +#endif From d0c3f730106c06f23ae1bebf0d0f5fc741ab7473 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 17:03:36 +0000 Subject: [PATCH 13/56] [dynarmic] fix android Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index 3f0643c087..dea7a0c4b0 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -181,7 +181,6 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { } fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) - CTX_DECLARE(raw_context); { std::lock_guard guard(sig_handler->code_block_infos_mutex); const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); From abe52cde407ef8f6a0d324a6ef0c6501ab465b36 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 02:47:04 +0000 Subject: [PATCH 14/56] [dynarmic, cmake][ remove unusd frontends var Signed-off-by: lizzie --- src/dynarmic/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 38457deb50..5fbb03b80e 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -39,9 +39,6 @@ option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF) option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF) option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT}) option(DYNARMIC_ENABLE_LTO "Enable LTO" OFF) -if (NOT DEFINED DYNARMIC_FRONTENDS) - set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable") -endif() # Default to a Release build if (NOT CMAKE_BUILD_TYPE) From c697dd8478a99b7ad560e9c3b15c0b3a4cc16e97 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 03:36:35 +0000 Subject: [PATCH 15/56] [dynarmic, docs] fastmem docs Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md index 2a7c1a2583..c4f57996ba 100644 --- a/src/dynarmic/docs/FastMemory.md +++ b/src/dynarmic/docs/FastMemory.md @@ -1,5 +1,19 @@ # Fast memory (Fastmem) +The main way of accessing memory in JITed programs is via an invoked function, say "Read()" and "Write()". On our translator, such functions usually take a sizable amounts of code space (push + call + pop). Trash the i-cache (due to an indirect call) and overall make code emission more bloated. + +The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput. + +Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. + ![Host to guest translation](./HostToGuest.svg) ![Fastmem translation](./Fastmem.svg) + +In x86_64 for example, when a page fault occurs, the CPU will transmit via control registers and the stack (see `IRETQ`) the appropriate arguments for a page fault handler, the OS then will transform that into something that can be sent into userspace. + +Most modern OSes implement kernel-page-table-isolation, which means a set of system calls will invoke a context switch (not often used syscalls), whereas others are handled by the same process address space (the smaller kernel portion, often used syscalls) without needing a context switch. This effect can be negated on systems with PCID (up to 4096 unique IDs). + +Signal dispatching takes a performance hit from reloading `%cr3` - but Linux does something more clever to avoid reloads: VDSO will take care of the entire thing in the same address space. Making dispatching as costly as an indirect call - without the hazards of increased code size. + +The main downside from this is the constant i-cache trashing and pipeline hazards introduced by the VDSO signal handlers. However on most benchmarks fastmem does perform faster than without (Linux only). This also abuses the fact of continous address space emulation by using an arena - which can then be potentially transparently mapped into a hugepage, reducing TLB walk times. From ae7b864c9232e725ed843e6adb09d768be2f05af Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:29 +0000 Subject: [PATCH 16/56] [dynarmic] remove use of mcl reverse iterator Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/abi.cpp | 8 ++- src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 65 ++++++++----------- 3 files changed, 33 insertions(+), 42 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1e673338a8..1691bbb3b7 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -764,7 +764,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { target_code_ptr = code.GetReturnFromRunCodeAddress(); } const CodePtr patch_location = code.getCurr(); - code.mov(code.rcx, reinterpret_cast(target_code_ptr)); + code.mov(code.rcx, u64(target_code_ptr)); code.EnsurePatchLocationSize(patch_location, 10); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp index a9bbab3d10..299bf1d1d6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp @@ -10,7 +10,6 @@ #include -#include #include "dynarmic/common/common_types.h" #include @@ -76,7 +75,8 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size); size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE); - for (auto const xmm : mcl::iterator::reverse(regs)) { + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const xmm = *it; if (HostLocIsXMM(xmm)) { xmm_offset -= XMM_SIZE; if (code.HasHostFeature(HostFeature::AVX)) { @@ -88,9 +88,11 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, } if (frame_info.stack_subtraction != 0) code.add(rsp, u32(frame_info.stack_subtraction)); - for (auto const gpr : mcl::iterator::reverse(regs)) + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const gpr = *it; if (HostLocIsGPR(gpr)) code.pop(HostLocToReg64(gpr)); + } } void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) { diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 383b915839..88fc34fa10 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -28,7 +28,6 @@ #include "dynarmic/ir/type.h" #include "mcl/bit/swap.hpp" #include "mcl/bit/rotate.hpp" -#include "mcl/iterator/reverse.hpp" namespace Dynarmic::Optimization { @@ -36,50 +35,42 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { for (auto& inst : block) { switch (inst.GetOpcode()) { case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } @@ -1205,11 +1196,9 @@ static void ConstantPropagation(IR::Block& block) { static void DeadCodeElimination(IR::Block& block) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } + for (auto it = block.rbegin(); it != block.rend(); ++it) + if (!it->HasUses() && !MayHaveSideEffects(it->GetOpcode())) + it->Invalidate(); } static void IdentityRemovalPass(IR::Block& block) { From 6a8431da643e96c0783c55516a5916ad5209cd1b Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:41 +0000 Subject: [PATCH 17/56] [dynarmic] checked code alignment Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_interface.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index c65b582982..33acc46dfa 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -80,16 +80,16 @@ public: }; // TODO: Check code alignment - - const CodePtr current_code_ptr = [this] { + const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15)); + const CodePtr current_code_ptr = [this, aligned_code_ptr] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { jit_state.rsb_ptr = new_rsb_ptr; - return reinterpret_cast(jit_state.rsb_codeptrs[new_rsb_ptr]); + return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]); } - - return GetCurrentBlock(); + return aligned_code_ptr; + //return GetCurrentBlock(); }(); const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr); From df23a4aec471da4b5f1457886a5ba5a679c13e95 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:31:12 +0000 Subject: [PATCH 18/56] [dynarmic] fix hardcoded AVX512 registers, use xmm0 instead of xmm16 to align with spec Signed-off-by: lizzie --- .../backend/x64/emit_x64_floating_point.cpp | 7 ++----- .../src/dynarmic/backend/x64/emit_x64_vector.cpp | 16 ++++++++-------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 47e51acb03..c7d1f8aa1c 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -92,13 +92,10 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t FpFixup::Norm_Src, FpFixup::Norm_Src, FpFixup::Norm_Src); - - const Xbyak::Xmm tmp = xmm16; + const Xbyak::Xmm tmp = xmm0; FCODE(vmovap)(tmp, code.BConst(xword, denormal_to_zero)); - - for (const Xbyak::Xmm& xmm : to_daz) { + for (const Xbyak::Xmm& xmm : to_daz) FCODE(vfixupimms)(xmm, xmm, tmp, u8(0)); - } return; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index 99000c2a57..c6b0e3b864 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vpxorq(right_shift, right_shift, right_shift); @@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm c = xmm16; + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); code.vpsraq(c, a, 32); code.vpsllq(a, a, 32); code.vpsraq(a, a, 32); @@ -5461,7 +5461,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm masked = xmm16; + const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(); code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); From 5c5c64bbccd321fc44139a778d67432ed0383e09 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 10:33:12 +0000 Subject: [PATCH 19/56] [dynarmic] unconditional branches always take Signed-off-by: lizzie --- src/dynarmic/docs/Design.md | 49 +++++++++++++------ src/dynarmic/docs/RegisterAllocator.md | 3 ++ .../dynarmic/backend/x64/block_of_code.cpp | 4 +- .../src/dynarmic/backend/x64/reg_alloc.cpp | 7 +-- .../A64/translate/impl/a64_branch.cpp | 9 ++-- 5 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/dynarmic/docs/Design.md b/src/dynarmic/docs/Design.md index 3c0deb5972..ffa8ccecdb 100644 --- a/src/dynarmic/docs/Design.md +++ b/src/dynarmic/docs/Design.md @@ -273,52 +273,73 @@ Exclusive OR (i.e.: XOR) ### Callback: {Read,Write}Memory{8,16,32,64} - ReadMemory8( vaddr) - ReadMemory16( vaddr) - ReadMemory32( vaddr) - ReadMemory64( vaddr) - WriteMemory8( vaddr, value_to_store) - WriteMemory16( vaddr, value_to_store) - WriteMemory32( vaddr, value_to_store) - WriteMemory64( vaddr, value_to_store) +```c++ + ReadMemory8( vaddr) + ReadMemory16( vaddr) + ReadMemory32( vaddr) + ReadMemory64( vaddr) + WriteMemory8( vaddr, value_to_store) + WriteMemory16( vaddr, value_to_store) + WriteMemory32( vaddr, value_to_store) + WriteMemory64( vaddr, value_to_store) +``` Memory access. ### Terminal: Interpret - SetTerm(IR::Term::Interpret{next}) +```c++ +SetTerm(IR::Term::Interpret{next}) +``` This terminal instruction calls the interpreter, starting at `next`. The interpreter must interpret exactly one instruction. ### Terminal: ReturnToDispatch - SetTerm(IR::Term::ReturnToDispatch{}) +```c++ +SetTerm(IR::Term::ReturnToDispatch{}) +``` This terminal instruction returns control to the dispatcher. The dispatcher will use the value in R15 to determine what comes next. ### Terminal: LinkBlock - SetTerm(IR::Term::LinkBlock{next}) +```c++ +SetTerm(IR::Term::LinkBlock{next}) +``` This terminal instruction jumps to the basic block described by `next` if we have enough cycles remaining. If we do not have enough cycles remaining, we return to the dispatcher, which will return control to the host. +### Terminal: LinkBlockFast + +```c++ +SetTerm(IR::Term::LinkBlockFast{next}) +``` + +This terminal instruction jumps to the basic block described by `next` unconditionally. +This promises guarantees that must be held at runtime - i.e that the program wont hang, + ### Terminal: PopRSBHint - SetTerm(IR::Term::PopRSBHint{}) +```c++ +SetTerm(IR::Term::PopRSBHint{}) +``` This terminal instruction checks the top of the Return Stack Buffer against R15. If RSB lookup fails, control is returned to the dispatcher. This is an optimization for faster function calls. A backend that doesn't support this optimization or doesn't have a RSB may choose to implement this exactly as -ReturnToDispatch. +`ReturnToDispatch`. ### Terminal: If - SetTerm(IR::Term::If{cond, term_then, term_else}) +```c++ +SetTerm(IR::Term::If{cond, term_then, term_else}) +``` This terminal instruction conditionally executes one terminal or another depending on the run-time state of the ARM flags. diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index 5a7210c791..f5bbaaf168 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -19,6 +19,9 @@ The member functions on `RegAlloc` are just a combination of the above concepts. The following registers are reserved for internal use and should NOT participate in register allocation: - `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. - `%rsp`: Stack pointer. +- `%r15`: JIT pointer +- `%r14`: Page table pointer. +- `%r13`: Fastmem pointer. The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index d5d5f089ff..31b10ec6d5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); - lock(); - or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); + lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); SwitchMxcsrOnEntry(); jmp(ABI_PARAM2); @@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function rcp) { } xor_(eax, eax); - lock(); xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 919e75b77b..502a093d08 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -9,6 +9,7 @@ #include "dynarmic/backend/x64/reg_alloc.h" #include +#include #include #include @@ -152,19 +153,19 @@ bool Argument::GetImmediateU1() const noexcept { u8 Argument::GetImmediateU8() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100); + ASSERT(imm <= u64(std::numeric_limits::max())); return u8(imm); } u16 Argument::GetImmediateU16() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x10000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u16(imm); } u32 Argument::GetImmediateU32() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100000000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u32(imm); } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp index faf0686231..1bb0be823a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp @@ -20,9 +20,12 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) { bool TranslatorVisitor::B_uncond(Imm<26> imm26) { const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend(); const u64 target = ir.PC() + offset; - - //ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); - ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + // Pattern to halt execution (B .) + if (target == ir.PC()) { + ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + return false; + } + ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); return false; } From a410bc43d15b3775fafad1761a1f5164d2fabbb0 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 11:04:34 +0000 Subject: [PATCH 20/56] [dynarmic] Implement constant folding for CountLeadingZeros, add readXX constnat folding for A64 Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 48 +++++++++++++++------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 88fc34fa10..750a8a496b 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -31,10 +31,11 @@ namespace Dynarmic::Optimization { -static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { +static void ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { for (auto& inst : block) { switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { + case IR::Opcode::A32ReadMemory8: + case IR::Opcode::A64ReadMemory8: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -44,7 +45,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory16: { + case IR::Opcode::A32ReadMemory16: + case IR::Opcode::A64ReadMemory16: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -54,7 +56,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory32: { + case IR::Opcode::A32ReadMemory32: + case IR::Opcode::A64ReadMemory32: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -64,7 +67,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory64: { + case IR::Opcode::A32ReadMemory64: + case IR::Opcode::A64ReadMemory64: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -667,14 +671,14 @@ using Op = Dynarmic::IR::Opcode; // bit size all over the place within folding functions. static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + inst.ReplaceUsesWith(IR::Value{u32(value)}); } else { inst.ReplaceUsesWith(IR::Value{value}); } } static IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; + return is_32_bit ? IR::Value{u32(value)} : IR::Value{value}; } template @@ -800,6 +804,23 @@ static void FoldByteReverse(IR::Inst& inst, Op op) { } } +/// Folds leading zero population count +/// +/// 1. imm -> countl_zero(imm) +/// +static void FoldCountLeadingZeros(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + if (operand.IsImmediate()) { + if (is_32_bit) { + const u32 result = std::countl_zero(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = std::countl_zero(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } + } +} + /// Folds division operations based on the following: /// /// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) @@ -1020,8 +1041,7 @@ static void FoldZeroExtendXToLong(IR::Inst& inst) { static void ConstantPropagation(IR::Block& block) { for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - + auto const opcode = inst.GetOpcode(); switch (opcode) { case Op::LeastSignificantWord: FoldLeastSignificantWord(inst); @@ -1110,12 +1130,12 @@ static void ConstantPropagation(IR::Block& block) { break; case Op::ArithmeticShiftRightMasked32: if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + ReplaceUsesWith(inst, true, s32(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); } break; case Op::ArithmeticShiftRightMasked64: if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + ReplaceUsesWith(inst, false, s64(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); } break; case Op::RotateRightMasked32: @@ -1187,6 +1207,10 @@ static void ConstantPropagation(IR::Block& block) { case Op::ByteReverseDual: FoldByteReverse(inst, opcode); break; + case Op::CountLeadingZeros32: + case Op::CountLeadingZeros64: + FoldCountLeadingZeros(inst, opcode == Op::CountLeadingZeros32); + break; default: break; } @@ -1462,7 +1486,7 @@ void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization: Optimization::DeadCodeElimination(block); } if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { - Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantMemoryReads(block, conf.callbacks); Optimization::ConstantPropagation(block); Optimization::DeadCodeElimination(block); } From 4836522d575a7ccdd2421ec1f017f09477dd01c3 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 14:46:35 +0000 Subject: [PATCH 21/56] [dynarmic] regalloc use scratchimpl that uses all instead of iteraiting Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 4 +-- .../src/dynarmic/backend/x64/reg_alloc.cpp | 30 ++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1691bbb3b7..fa1f071fbf 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { auto const opcode = inst.GetOpcode(); // Call the relevant Emit* member function. switch (opcode) { -#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch; +#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch; #define A32OPC(name, type, ...) -#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch; +#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch; #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 502a093d08..2db817a90f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -367,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def, if (result_def) { DefineValueImpl(result_def, ABI_RETURN); } - + for (size_t i = 0; i < args.size(); i++) { + if (args[i]) { + UseScratch(*args[i], args_hostloc[i]); + } else { + ScratchGpr(args_hostloc[i]); // TODO: Force spill + } + } + // Must match with with ScratchImpl + for (auto const gpr : other_caller_save) { + MoveOutOfTheWay(gpr); + LocInfo(gpr).WriteLock(); + } for (size_t i = 0; i < args.size(); i++) { if (args[i] && !args[i]->get().IsVoid()) { - UseScratch(*args[i], args_hostloc[i]); // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]); switch (args[i]->get().GetType()) { @@ -390,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def, } } } - - for (size_t i = 0; i < args.size(); i++) - if (!args[i]) { - // TODO: Force spill - ScratchGpr(args_hostloc[i]); - } - for (auto const caller_saved : other_caller_save) - ScratchImpl({caller_saved}); } void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { @@ -560,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept { } HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { -#if 0 // TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows // but it's fine anyways. We can find other ways to cheat it later - but which?!?! // we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end? // TODO(lizzie): This needs to be investigated further later. // Do not spill XMM into other XMM silly - if (!is_xmm) { + /*if (!is_xmm) { // TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY // Intel recommends to spill GPR onto XMM registers IF POSSIBLE // TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call? @@ -574,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) return loc; - } -#endif + }*/ + // TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits + // of spilling on XMM versus the potential cost of using XMM registers..... // Otherwise go to stack spilling for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) From 3ee12deb712539684f9b9e97de9b1a63fc0ec747 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 18:29:39 +0000 Subject: [PATCH 22/56] [dynarmic] use better boost::visitor Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 1 - .../src/dynarmic/backend/x64/a32_emit_x64.cpp | 1 - .../src/dynarmic/backend/x64/emit_x64.cpp | 6 ++-- .../src/dynarmic/common/variant_util.h | 29 ------------------- src/dynarmic/tests/A32/fuzz_arm.cpp | 4 +-- 5 files changed, 5 insertions(+), 36 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/variant_util.h diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index d6be793dc6..3d85f1de29 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -61,7 +61,6 @@ add_library(dynarmic common/string_util.h common/u128.cpp common/u128.h - common/variant_util.h frontend/A32/a32_types.cpp frontend/A32/a32_types.h frontend/A64/a64_types.cpp diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index fb306336cf..3186758380 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -28,7 +28,6 @@ #include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 3bc93e6fd5..0bb2604025 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -11,6 +11,7 @@ #include #include "dynarmic/common/assert.h" +#include #include #include #include "dynarmic/common/common_types.h" @@ -21,7 +22,6 @@ #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/verbose_debugging_output.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de } void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - Common::VisitVariant(terminal, [this, initial_location, is_single_step](auto x) { + boost::apply_visitor([this, initial_location, is_single_step](auto x) { using T = std::decay_t; if constexpr (!std::is_same_v) { this->EmitTerminalImpl(x, initial_location, is_single_step); } else { ASSERT_MSG(false, "Invalid terminal"); } - }); + }, terminal); } void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { diff --git a/src/dynarmic/src/dynarmic/common/variant_util.h b/src/dynarmic/src/dynarmic/common/variant_util.h deleted file mode 100644 index 4dd7f67167..0000000000 --- a/src/dynarmic/src/dynarmic/common/variant_util.h +++ /dev/null @@ -1,29 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include - -namespace Dynarmic::Common { -namespace detail { - -template -struct VariantVisitor : boost::static_visitor - , Lambda { - VariantVisitor(Lambda&& lambda) - : Lambda(std::move(lambda)) {} - - using Lambda::operator(); -}; - -} // namespace detail - -template -inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) { - return boost::apply_visitor(detail::VariantVisitor(std::move(lambda)), variant); -} - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index 5ee6d2bf02..bef473a491 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -47,7 +47,7 @@ using namespace Dynarmic; template bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) { - return Common::VisitVariant(terminal, [&](auto t) -> bool { + return boost::apply_visitor([&](auto t) -> bool { using T = std::decay_t; if constexpr (std::is_same_v) { return false; @@ -73,7 +73,7 @@ bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) { ASSERT_MSG(false, "Invalid terminal type"); return false; } - }); + }, terminal); } bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) { From d3663971d86afcf28e6ca715fbcffd49f1e29bc6 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 16 Sep 2025 20:20:21 +0000 Subject: [PATCH 23/56] [dynarmic] fix exception posix handler Signed-off-by: lizzie --- .../backend/exception_handler_posix.cpp | 81 ++++--------------- 1 file changed, 14 insertions(+), 67 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index dea7a0c4b0..f1f208179f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -7,14 +7,16 @@ */ #include +#include +#include #include +#include #include +#include #include "dynarmic/backend/exception_handler.h" #include "dynarmic/common/assert.h" #include "dynarmic/common/context.h" -#include #include "dynarmic/common/common_types.h" - #if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" #elif defined(ARCHITECTURE_arm64) @@ -111,68 +113,14 @@ void RegisterHandler() { } } -SigHandler::SigHandler() { - const size_t signal_stack_size = std::max(SIGSTKSZ, 2 * 1024 * 1024); - - signal_stack_memory = std::malloc(signal_stack_size); - - stack_t signal_stack; - signal_stack.ss_sp = signal_stack_memory; - signal_stack.ss_size = signal_stack_size; - signal_stack.ss_flags = 0; - if (sigaltstack(&signal_stack, nullptr) != 0) { - fmt::print(stderr, "dynarmic: POSIX SigHandler: init failure at sigaltstack\n"); - supports_fast_mem = false; - return; - } - - struct sigaction sa; - sa.sa_handler = nullptr; - sa.sa_sigaction = &SigHandler::SigAction; - sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART; - sigemptyset(&sa.sa_mask); - if (sigaction(SIGSEGV, &sa, &old_sa_segv) != 0) { - fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGSEGV handler\n"); - supports_fast_mem = false; - return; - } -#ifdef __APPLE__ - if (sigaction(SIGBUS, &sa, &old_sa_bus) != 0) { - fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGBUS handler\n"); - supports_fast_mem = false; - return; - } -#endif -} - -SigHandler::~SigHandler() { - std::free(signal_stack_memory); -} - -void SigHandler::AddCodeBlock(CodeBlockInfo cbi) { - std::lock_guard guard(code_block_infos_mutex); - if (auto const iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) - code_block_infos.erase(iter); - code_block_infos.push_back(cbi); -} - -void SigHandler::RemoveCodeBlock(u64 host_pc) { - std::lock_guard guard(code_block_infos_mutex); - const auto iter = FindCodeBlockInfo(host_pc); - if (iter != code_block_infos.end()) - code_block_infos.erase(iter); -} - void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { - ASSERT(sig == SIGSEGV || sig == SIGBUS); + DEBUG_ASSERT(sig == SIGSEGV || sig == SIGBUS); CTX_DECLARE(raw_context); #if defined(ARCHITECTURE_x86_64) { - std::lock_guard guard(sig_handler->code_block_infos_mutex); - - const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); - if (iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->cb(CTX_RIP); + std::shared_lock guard(sig_handler->code_block_infos_mutex); + if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->second.cb(CTX_RIP); CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; @@ -182,10 +130,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) { - std::lock_guard guard(sig_handler->code_block_infos_mutex); - const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); - if (iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->cb(CTX_PC); + std::shared_lock guard(sig_handler->code_block_infos_mutex); + if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->second.cb(CTX_PC); CTX_PC = fc.call_pc; return; } @@ -240,15 +187,15 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(ARCHITECTURE_x86_64) +#if defined(MCL_ARCHITECTURE_X86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { impl = std::make_unique(mcl::bit_cast(code.getCode()), code.GetTotalCodeSize()); } -#elif defined(ARCHITECTURE_arm64) +#elif defined(MCL_ARCHITECTURE_ARM64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } -#elif defined(ARCHITECTURE_riscv64) +#elif defined(MCL_ARCHITECTURE_RISCV) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } From e72d668be3068faa19c8dd65052155c76d0acfd9 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 17 Sep 2025 20:51:37 +0000 Subject: [PATCH 24/56] [dynarmic] Allow to skip verification pass Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/interface/optimization_flags.h | 2 ++ src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/interface/optimization_flags.h b/src/dynarmic/src/dynarmic/interface/optimization_flags.h index 743d902767..2e8822b27a 100644 --- a/src/dynarmic/src/dynarmic/interface/optimization_flags.h +++ b/src/dynarmic/src/dynarmic/interface/optimization_flags.h @@ -34,6 +34,8 @@ enum class OptimizationFlag : std::uint32_t { MiscIROpt = 0x00000020, /// Optimize for code speed rather than for code size (this serves well for tight loops) CodeSpeed = 0x00000040, + /// Disable verification passes + DisableVerification = 0x00000080, /// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations. /// This unfuses fused instructions to improve performance on host CPUs without FMA support. diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 750a8a496b..e9175f0e6b 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -1491,7 +1491,9 @@ void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization: Optimization::DeadCodeElimination(block); } Optimization::IdentityRemovalPass(block); - Optimization::VerificationPass(block); + if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { + Optimization::VerificationPass(block); + } } void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { @@ -1509,7 +1511,9 @@ void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization: if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); } - Optimization::VerificationPass(block); + if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { + Optimization::VerificationPass(block); + } } } // namespace Dynarmic::Optimization From 2eebedcb172dacbb126b3a30ee09e57e91c48820 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 17 Sep 2025 21:27:23 +0000 Subject: [PATCH 25/56] [dynarmic] inlined pool in block + slab-like for each block Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/ir/basic_block.cpp | 16 +++++++++++++++- src/dynarmic/src/dynarmic/ir/basic_block.h | 8 ++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index a13a7ebfc9..612a3d28e9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -37,7 +37,21 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new IR::Inst(opcode); + // First try using the "inline" buffer, otherwise fallback to a slower slab-like allocation scheme + // purpouse is to avoid many calls to new/delete which invoke malloc which invokes mmap + // just pool it!!! - reason why there is an inline buffer is because many small blocks are created + // with few instructions due to subpar optimisations on other passes... plus branch-heavy code will + // hugely benefit from the coherency of faster allocations... + IR::Inst* inst; + if (inlined_inst.size() < inlined_inst.max_size()) { + inst = &inlined_inst[inlined_inst.size()]; + inlined_inst.emplace_back(opcode); + } else { + if (pooled_inst.empty() || pooled_inst.back().size() == pooled_inst.back().max_size()) + pooled_inst.emplace_back(); + inst = &pooled_inst.back()[pooled_inst.back().size()]; + pooled_inst.back().emplace_back(opcode); + } DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index 2f2d9ab6de..166a5e4d1b 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -13,6 +13,9 @@ #include #include +#include +#include +#include #include #include "dynarmic/common/common_types.h" @@ -163,8 +166,12 @@ public: return cycle_count; } private: + /// "Hot cache" for small blocks so we don't call global allocator + boost::container::static_vector inlined_inst; /// List of instructions in this block. instruction_list_type instructions; + /// "Long/far" memory pool + boost::container::stable_vector> pooled_inst; /// Block to execute next if `cond` did not pass. std::optional cond_failed = {}; /// Description of the starting location of this block @@ -180,6 +187,7 @@ private: /// Number of cycles this block takes to execute. size_t cycle_count = 0; }; +static_assert(sizeof(Block) == 2048); /// Returns a string representation of the contents of block. Intended for debugging. std::string DumpBlock(const IR::Block& block) noexcept; From a3c0d59dc9cbc8648d779e81df79b4f5e7ba6e16 Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 19 Sep 2025 16:57:34 +0200 Subject: [PATCH 26/56] [android] update translations for dynarmic to say it's jit, remove "(slow)" from paranoid (#2527) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2527 Reviewed-by: MaranBr Reviewed-by: crueter Co-authored-by: lizzie Co-committed-by: lizzie --- src/android/app/src/main/res/values-ar/strings.xml | 4 ++-- src/android/app/src/main/res/values-es/strings.xml | 4 ++-- src/android/app/src/main/res/values-fa/strings.xml | 4 ++-- src/android/app/src/main/res/values-fr/strings.xml | 4 ++-- src/android/app/src/main/res/values-he/strings.xml | 4 ++-- src/android/app/src/main/res/values-hu/strings.xml | 4 ++-- src/android/app/src/main/res/values-id/strings.xml | 4 ++-- src/android/app/src/main/res/values-ja/strings.xml | 4 ++-- src/android/app/src/main/res/values-ko/strings.xml | 4 ++-- src/android/app/src/main/res/values-pt-rBR/strings.xml | 4 ++-- src/android/app/src/main/res/values-pt-rPT/strings.xml | 4 ++-- src/android/app/src/main/res/values-ru/strings.xml | 4 ++-- src/android/app/src/main/res/values-sr/strings.xml | 4 ++-- src/android/app/src/main/res/values-zh-rCN/strings.xml | 4 ++-- src/android/app/src/main/res/values-zh-rTW/strings.xml | 4 ++-- src/android/app/src/main/res/values/strings.xml | 4 ++-- 16 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/android/app/src/main/res/values-ar/strings.xml b/src/android/app/src/main/res/values-ar/strings.xml index a758e1c7cd..35f7d20198 100644 --- a/src/android/app/src/main/res/values-ar/strings.xml +++ b/src/android/app/src/main/res/values-ar/strings.xml @@ -836,13 +836,13 @@ تمتد إلى النافذة - Dynarmic (بطيء) + Dynarmic (JIT) تنفيذ التعليمات البرمجية الأصلية (NCE) دقه غير آمن - Paranoid (بطيء) + Paranoid الأسهم diff --git a/src/android/app/src/main/res/values-es/strings.xml b/src/android/app/src/main/res/values-es/strings.xml index 2ee0e1783a..1f73df7385 100644 --- a/src/android/app/src/main/res/values-es/strings.xml +++ b/src/android/app/src/main/res/values-es/strings.xml @@ -919,13 +919,13 @@ Ajustar a la ventana - DynARMic (lento) + Dynarmic (JIT) Ejecución nativa de código (NCE) Preciso Impreciso - Paranoico (Lento) + Paranoico Cruceta diff --git a/src/android/app/src/main/res/values-fa/strings.xml b/src/android/app/src/main/res/values-fa/strings.xml index 79cf5f49e6..fd5c6ae6ca 100644 --- a/src/android/app/src/main/res/values-fa/strings.xml +++ b/src/android/app/src/main/res/values-fa/strings.xml @@ -918,13 +918,13 @@ کشش تا پر کردن پنجره - دینارمیک (کند) + Dynarmic (JIT) اجرای کد اصلی (NCE) دقیق ناامن - بدگمان (کند) + بدگمان کلیدهای جهتی diff --git a/src/android/app/src/main/res/values-fr/strings.xml b/src/android/app/src/main/res/values-fr/strings.xml index e9df08a4de..97496ef33d 100644 --- a/src/android/app/src/main/res/values-fr/strings.xml +++ b/src/android/app/src/main/res/values-fr/strings.xml @@ -967,13 +967,13 @@ Étirer à la fenêtre - Dynarmic (Lent) + Dynarmic (JIT) Exécution de code natif (NCE) Précis Risqué - Paranoïaque (Lent) + Paranoïaque Pavé directionnel diff --git a/src/android/app/src/main/res/values-he/strings.xml b/src/android/app/src/main/res/values-he/strings.xml index 4e1624a556..359e8dff9a 100644 --- a/src/android/app/src/main/res/values-he/strings.xml +++ b/src/android/app/src/main/res/values-he/strings.xml @@ -844,13 +844,13 @@ הרחב לגודל המסך - דינמי (איטי) + דינמי ביצוע קוד מקורי (NCE) מדויק לא בטוח - פראנואידי (איטי) + פראנואידי D-pad diff --git a/src/android/app/src/main/res/values-hu/strings.xml b/src/android/app/src/main/res/values-hu/strings.xml index 061ac07388..36157d1578 100644 --- a/src/android/app/src/main/res/values-hu/strings.xml +++ b/src/android/app/src/main/res/values-hu/strings.xml @@ -954,13 +954,13 @@ Ablakhoz nyújtás - Dinamikus (lassú) + Dynarmic (JIT) Natív kód végrehajtás (Native code execution (NCE)) Pontos Nem biztonságos - Paranoid (Lassú) + Paranoid D-pad diff --git a/src/android/app/src/main/res/values-id/strings.xml b/src/android/app/src/main/res/values-id/strings.xml index 6e3b64953f..18e881a97b 100644 --- a/src/android/app/src/main/res/values-id/strings.xml +++ b/src/android/app/src/main/res/values-id/strings.xml @@ -911,13 +911,13 @@ Merentangkan - Dynamic (Lambat) + Dynarmic (JIT) Native code execution (NCE) Akurat Berbahaya - Paranoid (Slow) + Paranoid D Pad diff --git a/src/android/app/src/main/res/values-ja/strings.xml b/src/android/app/src/main/res/values-ja/strings.xml index 179601f182..0a873b04eb 100644 --- a/src/android/app/src/main/res/values-ja/strings.xml +++ b/src/android/app/src/main/res/values-ja/strings.xml @@ -834,13 +834,13 @@ 画面に合わせる - Dynarmic (低速) + Dynarmic (JIT) ネイティブコード実行 (NCE) 正確 不安定 - パラノイド (低速) + パラノイド 方向ボタン diff --git a/src/android/app/src/main/res/values-ko/strings.xml b/src/android/app/src/main/res/values-ko/strings.xml index 6f4dd42af2..e598bb1120 100644 --- a/src/android/app/src/main/res/values-ko/strings.xml +++ b/src/android/app/src/main/res/values-ko/strings.xml @@ -910,13 +910,13 @@ 화면에 맞춤 - Dynarmic (느림) + Dynarmic (JIT) 네이티브 코드 실행 (NCE) 정확함 최적화 (안전하지 않음) - 최적화하지 않음 (느림) + 최적화하지 않음 십자키 diff --git a/src/android/app/src/main/res/values-pt-rBR/strings.xml b/src/android/app/src/main/res/values-pt-rBR/strings.xml index eec3fdf715..5571c2aea4 100644 --- a/src/android/app/src/main/res/values-pt-rBR/strings.xml +++ b/src/android/app/src/main/res/values-pt-rBR/strings.xml @@ -968,13 +968,13 @@ uma tentativa de mapeamento automático Esticar à janela - Dynarmic (Lenta) + Dynarmic (JIT) Execução de código nativo (NCE) Precisa Não segura - Paranoica (Lenta) + Paranoica Botões Direcionais diff --git a/src/android/app/src/main/res/values-pt-rPT/strings.xml b/src/android/app/src/main/res/values-pt-rPT/strings.xml index d45bf4bfc9..81aa7e92c0 100644 --- a/src/android/app/src/main/res/values-pt-rPT/strings.xml +++ b/src/android/app/src/main/res/values-pt-rPT/strings.xml @@ -968,13 +968,13 @@ uma tentativa de mapeamento automático Esticar à janela - Dynarmic (Lento) + Dynarmic (JIT) Native code execution (NCE) Preciso Inseguro - Paranoid (Lento) + Paranoid Botões Direcionais diff --git a/src/android/app/src/main/res/values-ru/strings.xml b/src/android/app/src/main/res/values-ru/strings.xml index 2f7714257f..eee249c44a 100644 --- a/src/android/app/src/main/res/values-ru/strings.xml +++ b/src/android/app/src/main/res/values-ru/strings.xml @@ -969,13 +969,13 @@ Растянуть до окна - Dynarmic (Медленно) + Dynarmic (JIT) Нативное выполнение (NCE) Точно Небезопасно - Параноик (медленно) + Параноик Крестовина diff --git a/src/android/app/src/main/res/values-sr/strings.xml b/src/android/app/src/main/res/values-sr/strings.xml index e261772fc4..b123757f5a 100644 --- a/src/android/app/src/main/res/values-sr/strings.xml +++ b/src/android/app/src/main/res/values-sr/strings.xml @@ -988,13 +988,13 @@ Протезање до прозора - Динамина (споро) + Dynarmic (JIT) Извођење изворног кода (НЦЕ) Тачан Несигуран - Параноичан (споро) + Параноичан Д-пад diff --git a/src/android/app/src/main/res/values-zh-rCN/strings.xml b/src/android/app/src/main/res/values-zh-rCN/strings.xml index bfdc3af3d3..5cf657ef3d 100644 --- a/src/android/app/src/main/res/values-zh-rCN/strings.xml +++ b/src/android/app/src/main/res/values-zh-rCN/strings.xml @@ -961,13 +961,13 @@ 拉伸窗口 - 动态编译 (慢速) + Dynarmic (JIT) 本机代码执行 (NCE) 高精度 低精度 - 偏执模式 (慢速) + 偏执模式 十字方向键 diff --git a/src/android/app/src/main/res/values-zh-rTW/strings.xml b/src/android/app/src/main/res/values-zh-rTW/strings.xml index e64aaa9a54..f4d690dbaa 100644 --- a/src/android/app/src/main/res/values-zh-rTW/strings.xml +++ b/src/android/app/src/main/res/values-zh-rTW/strings.xml @@ -966,13 +966,13 @@ 延展視窗 - 動態 (慢) + Dynarmic (JIT) 機器碼執行 (NCE) 高精度 低精度 - 不合理 (慢) + 不合理 方向鍵 diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index 2cef5903cb..3b76c0ba79 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -1014,13 +1014,13 @@ Stretch to window - Dynarmic (Slow) + Dynarmic (JIT) Native code execution (NCE) Accurate Unsafe - Paranoid (Slow) + Paranoid D-pad From a487cea683ab4a43c25989637a8c7a9c39a7e695 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Fri, 19 Sep 2025 17:02:53 +0200 Subject: [PATCH 27/56] [core] Fix buiding with fmt 10 (#2524) Signed-off-by: Caio Oliveira Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2524 Reviewed-by: MaranBr Reviewed-by: crueter Co-authored-by: Caio Oliveira Co-committed-by: Caio Oliveira --- src/core/perf_stats.cpp | 12 +++++++++++- src/core/reporter.cpp | 7 ++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp index 4439611d2e..35e76624f4 100644 --- a/src/core/perf_stats.cpp +++ b/src/core/perf_stats.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2017 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -41,7 +44,14 @@ PerfStats::~PerfStats() { const auto path = Common::FS::GetEdenPath(Common::FS::EdenPath::LogDir); // %F Date format expanded is "%Y-%m-%d" - const auto filename = fmt::format("{:%F-%H-%M}_{:016X}.csv", *std::localtime(&t), title_id); + const auto filename = fmt::format("{}_{:016X}.csv", + [&] { + std::ostringstream oss; + oss << std::put_time(std::localtime(&t), "%F-%H-%M"); + return oss.str(); + }(), + title_id); + const auto filepath = path / filename; if (Common::FS::CreateParentDir(filepath)) { diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index 4bac8142c3..1723636811 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -33,7 +36,9 @@ std::filesystem::path GetPath(std::string_view type, u64 title_id, std::string_v std::string GetTimestamp() { const auto time = std::time(nullptr); - return fmt::format("{:%FT%H-%M-%S}", *std::localtime(&time)); + std::ostringstream oss; + oss << std::put_time(std::localtime(&time), "%FT%H-%M-%S"); + return oss.str(); } using namespace nlohmann; From 6510818fca962ebee78089e79e972221dfbe3ef4 Mon Sep 17 00:00:00 2001 From: crueter Date: Fri, 19 Sep 2025 18:20:51 +0200 Subject: [PATCH 28/56] [docs] fixup codeowners (#2529) Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2529 Reviewed-by: MaranBr Reviewed-by: Shinmegumi --- docs/CODEOWNERS | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/CODEOWNERS b/docs/CODEOWNERS index 503f9ec1fe..dcb97a5e83 100644 --- a/docs/CODEOWNERS +++ b/docs/CODEOWNERS @@ -10,17 +10,18 @@ /.ci @crueter # cmake -*.cmake @crueter -*/CMakeLists.txt @crueter -*.in @crueter +*.cmake @crueter +*CMakeLists.txt @crueter +*.in @crueter # individual stuff -/src/web_service @AleksandrPopovich -/src/dynarmic @Lizzie -/src/core @Lizzie @Maufeat @PavelBARABANOV @MrPurple666 -/src/core/hle @Maufeat @PavelBARABANOV @SDK-Chan -/src/*_room @AleksandrPopovich -/src/video_core @CamilleLaVey @MaranBr @Wildcard @weakboson +src/web_service @AleksandrPopovich +src/dynarmic @Lizzie +src/core @Lizzie @Maufeat @PavelBARABANOV @MrPurple666 @JPikachu +src/core/hle @Maufeat @PavelBARABANOV @SDK-Chan +src/core/arm @Lizzie @MrPurple666 +src/*_room @AleksandrPopovich +src/video_core @CamilleLaVey @MaranBr @Wildcard @weakboson # Global owners/triage * @CamilleLaVey @Maufeat @crueter @MrPurple666 @MaranBr @Lizzie \ No newline at end of file From 214637ced866d66bfa24bb46c8d9a8cfb4f23907 Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 19 Sep 2025 16:38:17 +0000 Subject: [PATCH 29/56] Fix license headers Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 2 ++ src/dynarmic/tests/CMakeLists.txt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 3d85f1de29..3155a1cda1 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -1,3 +1,5 @@ +# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later include(TargetArchitectureSpecificSources) add_library(dynarmic diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index f8f420e2d6..4ace6c2afd 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -1,3 +1,5 @@ +# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later include(TargetArchitectureSpecificSources) add_executable(dynarmic_tests From 725407b989b5f9021ae7f1aae664f7204cc94a47 Mon Sep 17 00:00:00 2001 From: MaranBr Date: Sat, 20 Sep 2025 14:17:07 +0200 Subject: [PATCH 30/56] [video_core] Add ability for integrated devices to control the amount of memory used by the emulator (#2528) This adds the ability for integrated devices to control the amount of memory used by the emulator. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2528 Reviewed-by: Lizzie Reviewed-by: Shinmegumi Co-authored-by: MaranBr Co-committed-by: MaranBr --- src/qt_common/shared_translation.cpp | 5 +---- src/video_core/vulkan_common/vulkan_device.cpp | 15 ++++++--------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/qt_common/shared_translation.cpp b/src/qt_common/shared_translation.cpp index cdc05e60e0..eb413f28e9 100644 --- a/src/qt_common/shared_translation.cpp +++ b/src/qt_common/shared_translation.cpp @@ -246,10 +246,7 @@ std::unique_ptr InitializeTranslations(QObject* parent) INSERT(Settings, vram_usage_mode, tr("VRAM Usage Mode:"), - tr("Selects whether the emulator should prefer to conserve memory or make maximum usage " - "of available video memory for performance.\nHas no effect on integrated graphics. " - "Aggressive mode may severely impact the performance of other applications such as " - "recording software.")); + tr("Selects whether the emulator should prefer to conserve memory or make maximum usage of available video memory for performance.\nAggressive mode may severely impact the performance of other applications such as recording software.")); INSERT(Settings, skip_cpu_inner_invalidation, tr("Skip CPU Inner Invalidation"), diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 6d7c33099b..41917a1b90 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1395,23 +1395,20 @@ void Device::CollectPhysicalMemoryInfo() { } device_access_memory += mem_properties.memoryHeaps[element].size; } - if (!is_integrated) { + if (is_integrated) { + const s64 available_memory = static_cast(device_access_memory - device_initial_usage); + const u64 memory_size = Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Aggressive ? 6_GiB : 4_GiB; + device_access_memory = static_cast(std::max(std::min(available_memory - 8_GiB, memory_size), std::min(local_memory, memory_size))); + } else { const u64 reserve_memory = std::min(device_access_memory / 8, 1_GiB); device_access_memory -= reserve_memory; - if (Settings::values.vram_usage_mode.GetValue() != Settings::VramUsageMode::Aggressive) { // Account for resolution scaling in memory limits const size_t normal_memory = 6_GiB; const size_t scaler_memory = 1_GiB * Settings::values.resolution_info.ScaleUp(1); - device_access_memory = - std::min(device_access_memory, normal_memory + scaler_memory); + device_access_memory = std::min(device_access_memory, normal_memory + scaler_memory); } - - return; } - const s64 available_memory = static_cast(device_access_memory - device_initial_usage); - device_access_memory = static_cast(std::max( - std::min(available_memory - 8_GiB, 6_GiB), std::min(local_memory, 6_GiB))); } void Device::CollectToolingInfo() { From a0e47b816be2c0eb09c0fc854eb3b58d7a740de5 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:54:53 +0000 Subject: [PATCH 31/56] [dynarmic] reduce opt pass latency Signed-off-by: lizzie --- src/dynarmic/docs/RegisterAllocator.md | 40 +- .../docs/ReturnStackBufferOptimization.md | 162 +- src/dynarmic/src/dynarmic/CMakeLists.txt | 199 +-- .../backend/arm64/a32_address_space.cpp | 18 +- .../backend/arm64/a64_address_space.cpp | 19 +- .../backend/riscv64/a32_address_space.cpp | 16 +- .../dynarmic/backend/x64/a32_interface.cpp | 16 +- .../dynarmic/backend/x64/a64_interface.cpp | 18 +- .../src/dynarmic/common/memory_pool.cpp | 13 - .../src/dynarmic/common/memory_pool.h | 61 - src/dynarmic/src/dynarmic/ir/basic_block.cpp | 7 +- src/dynarmic/src/dynarmic/ir/basic_block.h | 3 - src/dynarmic/src/dynarmic/ir/ir_emitter.cpp | 21 - .../ir/opt/a32_constant_memory_reads_pass.cpp | 70 - .../ir/opt/a32_get_set_elimination_pass.cpp | 382 ----- .../ir/opt/a64_callback_config_pass.cpp | 57 - .../ir/opt/a64_get_set_elimination_pass.cpp | 165 -- .../ir/opt/a64_merge_interpret_blocks.cpp | 57 - .../ir/opt/constant_propagation_pass.cpp | 559 ------ .../ir/opt/dead_code_elimination_pass.cpp | 23 - .../dynarmic/ir/opt/identity_removal_pass.cpp | 44 - src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h | 127 -- .../src/dynarmic/ir/opt/naming_pass.cpp | 18 - src/dynarmic/src/dynarmic/ir/opt/passes.h | 47 - .../src/dynarmic/ir/opt/polyfill_pass.cpp | 218 --- .../src/dynarmic/ir/opt/verification_pass.cpp | 51 - src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 1502 +++++++++++++++++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 34 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 10 +- src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 2 +- src/dynarmic/tests/CMakeLists.txt | 146 +- src/dynarmic/tests/print_info.cpp | 2 +- 32 files changed, 1808 insertions(+), 2299 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.cpp delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.h delete mode 100644 src/dynarmic/src/dynarmic/ir/ir_emitter.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/passes.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.h diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index fea6f19e6a..5a7210c791 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -16,19 +16,31 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun The member functions on `RegAlloc` are just a combination of the above concepts. +The following registers are reserved for internal use and should NOT participate in register allocation: +- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. +- `%rsp`: Stack pointer. + +The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. + +Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration. + ### `Scratch` - Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) - Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) +```c++ +Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr); +Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm); +``` At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope. ### Pure `Use` - Xbyak::Reg64 UseGpr(Argument& arg); - Xbyak::Xmm UseXmm(Argument& arg); - OpArg UseOpArg(Argument& arg); - void Use(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseGpr(Argument& arg); +Xbyak::Xmm UseXmm(Argument& arg); +OpArg UseOpArg(Argument& arg); +void Use(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it @@ -39,9 +51,11 @@ This register **must not** have it's value changed. ### `UseScratch` - Xbyak::Reg64 UseScratchGpr(Argument& arg); - Xbyak::Xmm UseScratchXmm(Argument& arg); - void UseScratch(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseScratchGpr(Argument& arg); +Xbyak::Xmm UseScratchXmm(Argument& arg); +void UseScratch(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it @@ -55,7 +69,9 @@ You are free to modify the value in the register. The register is discarded at t A `Define` is the defintion of a value. This is the only time when a value may be set. - void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +```c++ +void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +``` By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the value to the specified register `reg`. @@ -64,7 +80,9 @@ value to the specified register `reg`. Adding a `Define` to an existing value. - void DefineValue(IR::Inst* inst, Argument& arg); +```c++ +void DefineValue(IR::Inst* inst, Argument& arg); +``` You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are emitted. diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index 6ffe41bcc6..0e72c3bce8 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -23,15 +23,17 @@ One complication dynarmic has is that a compiled block is not uniquely identifia the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block. - u64 LocationDescriptor::UniqueHash() const { - // This value MUST BE UNIQUE. - // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint - u64 pc_u64 = u64(arm_pc) << 32; - u64 fpscr_u64 = u64(fpscr.Value()); - u64 t_u64 = cpsr.T() ? 1 : 0; - u64 e_u64 = cpsr.E() ? 2 : 0; - return pc_u64 | fpscr_u64 | t_u64 | e_u64; - } +```c++ +u64 LocationDescriptor::UniqueHash() const { + // This value MUST BE UNIQUE. + // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint + u64 pc_u64 = u64(arm_pc) << 32; + u64 fpscr_u64 = u64(fpscr.Value()); + u64 t_u64 = cpsr.T() ? 1 : 0; + u64 e_u64 = cpsr.E() ? 2 : 0; + return pc_u64 | fpscr_u64 | t_u64 | e_u64; +} +``` ## Our implementation isn't actually a stack @@ -49,97 +51,107 @@ host addresses for the corresponding the compiled blocks. size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8 showed degraded performance. - struct JitState { - // ... +```c++ +struct JitState { + // ... - static constexpr size_t RSBSize = 8; // MUST be a power of 2. - u32 rsb_ptr = 0; - std::array rsb_location_descriptors; - std::array rsb_codeptrs; - void ResetRSB(); + static constexpr size_t RSBSize = 8; // MUST be a power of 2. + u32 rsb_ptr = 0; + std::array rsb_location_descriptors; + std::array rsb_codeptrs; + void ResetRSB(); - // ... - }; + // ... +}; +``` ### RSB Push We insert our prediction at the insertion point iff the RSB doesn't already contain a prediction with the same `UniqueHash`. - void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { + using namespace Xbyak::util; - ASSERT(inst->GetArg(0).IsImmediate()); - u64 imm64 = inst->GetArg(0).GetU64(); + ASSERT(inst->GetArg(0).IsImmediate()); + u64 imm64 = inst->GetArg(0).GetU64(); - Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); - Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); - Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); - u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() - ? u64(unique_hash_to_code_ptr[imm64]) - : u64(code->GetReturnFromRunCodeAddress()); + Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); + Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); + Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); + u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() + ? u64(unique_hash_to_code_ptr[imm64]) + : u64(code->GetReturnFromRunCodeAddress()); - code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); - code->add(index_reg, 1); - code->and_(index_reg, u32(JitState::RSBSize - 1)); + code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); + code->add(index_reg, 1); + code->and_(index_reg, u32(JitState::RSBSize - 1)); - code->mov(loc_desc_reg, u64(imm64)); - CodePtr patch_location = code->getCurr(); - patch_unique_hash_locations[imm64].emplace_back(patch_location); - code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. - code->EnsurePatchLocationSize(patch_location, 10); + code->mov(loc_desc_reg, u64(imm64)); + CodePtr patch_location = code->getCurr(); + patch_unique_hash_locations[imm64].emplace_back(patch_location); + code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. + code->EnsurePatchLocationSize(patch_location, 10); - Xbyak::Label label; - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->je(label, code->T_SHORT); - } - - code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); - code->L(label); + Xbyak::Label label; + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->je(label, code->T_SHORT); } + code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); + code->L(label); +} +``` + In pseudocode: - for (i := 0 .. RSBSize-1) - if (rsb_location_descriptors[i] == imm64) - goto label; - rsb_ptr++; - rsb_ptr %= RSBSize; - rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash - rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; - label: +```c++ + for (i := 0 .. RSBSize-1) + if (rsb_location_descriptors[i] == imm64) + goto label; + rsb_ptr++; + rsb_ptr %= RSBSize; + rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash + rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; +label: +``` ## RSB Pop To check if a predicition is in the RSB, we linearly scan the RSB. - void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { + using namespace Xbyak::util; - // This calculation has to match up with IREmitter::PushRSB - code->mov(ecx, MJitStateReg(Arm::Reg::PC)); - code->shl(rcx, 32); - code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); - code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); - code->or_(rbx, rcx); + // This calculation has to match up with IREmitter::PushRSB + code->mov(ecx, MJitStateReg(Arm::Reg::PC)); + code->shl(rcx, 32); + code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); + code->or_(rbx, rcx); - code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); - } - - code->jmp(rax); + code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); } + code->jmp(rax); +} +``` + In pseudocode: - rbx := ComputeUniqueHash() - rax := ReturnToDispatch - for (i := 0 .. RSBSize-1) - if (rbx == rsb_location_descriptors[i]) - rax = rsb_codeptrs[i] - goto rax \ No newline at end of file +```c++ +rbx := ComputeUniqueHash() +rax := ReturnToDispatch +for (i := 0 .. RSBSize-1) + if (rbx == rsb_location_descriptors[i]) + rax = rsb_codeptrs[i] +goto rax +``` diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index b74626bcd5..ee06ccf19a 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -56,8 +56,6 @@ add_library(dynarmic common/lut_from_list.h common/math_util.cpp common/math_util.h - common/memory_pool.cpp - common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h @@ -78,7 +76,6 @@ add_library(dynarmic ir/basic_block.cpp ir/basic_block.h ir/cond.h - ir/ir_emitter.cpp ir/ir_emitter.h ir/location_descriptor.cpp ir/location_descriptor.h @@ -87,78 +84,59 @@ add_library(dynarmic ir/opcodes.cpp ir/opcodes.h ir/opcodes.inc - ir/opt/constant_propagation_pass.cpp - ir/opt/dead_code_elimination_pass.cpp - ir/opt/identity_removal_pass.cpp - ir/opt/ir_matcher.h - ir/opt/naming_pass.cpp - ir/opt/passes.h - ir/opt/polyfill_pass.cpp - ir/opt/verification_pass.cpp + ir/opt_passes.cpp + ir/opt_passes.h ir/terminal.h ir/type.cpp ir/type.h ir/value.cpp ir/value.h + # A32 + frontend/A32/a32_ir_emitter.cpp + frontend/A32/a32_ir_emitter.h + frontend/A32/a32_location_descriptor.cpp + frontend/A32/a32_location_descriptor.h + frontend/A32/decoder/arm.h + frontend/A32/decoder/arm.inc + frontend/A32/decoder/asimd.h + frontend/A32/decoder/asimd.inc + frontend/A32/decoder/thumb16.h + frontend/A32/decoder/thumb16.inc + frontend/A32/decoder/thumb32.h + frontend/A32/decoder/thumb32.inc + frontend/A32/decoder/vfp.h + frontend/A32/decoder/vfp.inc + frontend/A32/disassembler/disassembler.h + frontend/A32/disassembler/disassembler_arm.cpp + frontend/A32/disassembler/disassembler_thumb.cpp + frontend/A32/FPSCR.h + frontend/A32/ITState.h + frontend/A32/PSR.h + frontend/A32/translate/a32_translate.cpp + frontend/A32/translate/a32_translate.h + frontend/A32/translate/conditional_state.cpp + frontend/A32/translate/conditional_state.h + frontend/A32/translate/translate_arm.cpp + frontend/A32/translate/translate_thumb.cpp + interface/A32/a32.h + interface/A32/arch_version.h + interface/A32/config.h + interface/A32/coprocessor.h + interface/A32/coprocessor_util.h + interface/A32/disassembler.h + # A64 + frontend/A64/a64_ir_emitter.cpp + frontend/A64/a64_ir_emitter.h + frontend/A64/a64_location_descriptor.cpp + frontend/A64/a64_location_descriptor.h + frontend/A64/decoder/a64.h + frontend/A64/decoder/a64.inc + frontend/A64/translate/a64_translate.cpp + frontend/A64/translate/a64_translate.h + interface/A64/a64.h + interface/A64/config.h ) -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A32/a32_ir_emitter.cpp - frontend/A32/a32_ir_emitter.h - frontend/A32/a32_location_descriptor.cpp - frontend/A32/a32_location_descriptor.h - frontend/A32/decoder/arm.h - frontend/A32/decoder/arm.inc - frontend/A32/decoder/asimd.h - frontend/A32/decoder/asimd.inc - frontend/A32/decoder/thumb16.h - frontend/A32/decoder/thumb16.inc - frontend/A32/decoder/thumb32.h - frontend/A32/decoder/thumb32.inc - frontend/A32/decoder/vfp.h - frontend/A32/decoder/vfp.inc - frontend/A32/disassembler/disassembler.h - frontend/A32/disassembler/disassembler_arm.cpp - frontend/A32/disassembler/disassembler_thumb.cpp - frontend/A32/FPSCR.h - frontend/A32/ITState.h - frontend/A32/PSR.h - frontend/A32/translate/a32_translate.cpp - frontend/A32/translate/a32_translate.h - frontend/A32/translate/conditional_state.cpp - frontend/A32/translate/conditional_state.h - frontend/A32/translate/translate_arm.cpp - frontend/A32/translate/translate_thumb.cpp - interface/A32/a32.h - interface/A32/arch_version.h - interface/A32/config.h - interface/A32/coprocessor.h - interface/A32/coprocessor_util.h - interface/A32/disassembler.h - ir/opt/a32_constant_memory_reads_pass.cpp - ir/opt/a32_get_set_elimination_pass.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A64/a64_ir_emitter.cpp - frontend/A64/a64_ir_emitter.h - frontend/A64/a64_location_descriptor.cpp - frontend/A64/a64_location_descriptor.h - frontend/A64/decoder/a64.h - frontend/A64/decoder/a64.inc - frontend/A64/translate/a64_translate.cpp - frontend/A64/translate/a64_translate.h - interface/A64/a64.h - interface/A64/config.h - ir/opt/a64_callback_config_pass.cpp - ir/opt/a64_get_set_elimination_pass.cpp - ir/opt/a64_merge_interpret_blocks.cpp - ) -endif() - if ("x86_64" IN_LIST ARCHITECTURE) # Newer versions of xbyak (>= 7.25.0) have stricter checks that currently # fail in dynarmic @@ -215,29 +193,21 @@ if ("x86_64" IN_LIST ARCHITECTURE) common/spin_lock_x64.h common/x64_disassemble.cpp common/x64_disassemble.h + # A32 + backend/x64/a32_emit_x64.cpp + backend/x64/a32_emit_x64.h + backend/x64/a32_emit_x64_memory.cpp + backend/x64/a32_interface.cpp + backend/x64/a32_jitstate.cpp + backend/x64/a32_jitstate.h + # A64 + backend/x64/a64_emit_x64.cpp + backend/x64/a64_emit_x64.h + backend/x64/a64_emit_x64_memory.cpp + backend/x64/a64_interface.cpp + backend/x64/a64_jitstate.cpp + backend/x64/a64_jitstate.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a32_emit_x64.cpp - backend/x64/a32_emit_x64.h - backend/x64/a32_emit_x64_memory.cpp - backend/x64/a32_interface.cpp - backend/x64/a32_jitstate.cpp - backend/x64/a32_jitstate.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a64_emit_x64.cpp - backend/x64/a64_emit_x64.h - backend/x64/a64_emit_x64_memory.cpp - backend/x64/a64_interface.cpp - backend/x64/a64_jitstate.cpp - backend/x64/a64_jitstate.h - ) - endif() endif() if ("arm64" IN_LIST ARCHITECTURE) @@ -281,25 +251,17 @@ if ("arm64" IN_LIST ARCHITECTURE) backend/arm64/verbose_debugging_output.h common/spin_lock_arm64.cpp common/spin_lock_arm64.h + # A32 + backend/arm64/a32_address_space.cpp + backend/arm64/a32_address_space.h + backend/arm64/a32_core.h + backend/arm64/a32_interface.cpp + # A64 + backend/arm64/a64_address_space.cpp + backend/arm64/a64_address_space.h + backend/arm64/a64_core.h + backend/arm64/a64_interface.cpp ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a32_address_space.cpp - backend/arm64/a32_address_space.h - backend/arm64/a32_core.h - backend/arm64/a32_interface.cpp - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a64_address_space.cpp - backend/arm64/a64_address_space.h - backend/arm64/a64_core.h - backend/arm64/a64_interface.cpp - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -328,21 +290,14 @@ if ("riscv" IN_LIST ARCHITECTURE) backend/riscv64/reg_alloc.cpp backend/riscv64/reg_alloc.h backend/riscv64/stack_layout.h + # A32 + backend/riscv64/a32_address_space.cpp + backend/riscv64/a32_address_space.h + backend/riscv64/a32_core.h + backend/riscv64/a32_interface.cpp + backend/riscv64/code_block.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - backend/riscv64/a32_address_space.cpp - backend/riscv64/a32_address_space.h - backend/riscv64/a32_core.h - backend/riscv64/a32_interface.cpp - backend/riscv64/code_block.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") - endif() + message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") endif() if (WIN32) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index bbdf925ff4..8e48fa3687 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -16,7 +16,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -163,21 +163,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index d4fe9a9cb7..2b50ad9ea3 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -331,22 +331,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index efa211618b..0bb9591411 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/backend/riscv64/stack_layout.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::RV64 { @@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index b116ec180e..382eb70f3f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -29,7 +29,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/location_descriptor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A32 { @@ -217,19 +217,7 @@ private: block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index ddd2327395..c65b582982 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -25,7 +25,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/a64.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A64 { @@ -275,21 +275,7 @@ private: const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block).entrypoint; } diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.cpp b/src/dynarmic/src/dynarmic/common/memory_pool.cpp deleted file mode 100644 index f41dd92af5..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/common/memory_pool.h" - -#include - -namespace Dynarmic::Common { - - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.h b/src/dynarmic/src/dynarmic/common/memory_pool.h deleted file mode 100644 index c99316e107..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -namespace Dynarmic::Common { - -/// @tparam object_size Byte-size of objects to construct -/// @tparam slab_size Number of objects to have per slab -template -class Pool { -public: - inline Pool() noexcept { - AllocateNewSlab(); - } - inline ~Pool() noexcept { - std::free(current_slab); - for (char* slab : slabs) { - std::free(slab); - } - } - - Pool(const Pool&) = delete; - Pool(Pool&&) = delete; - - Pool& operator=(const Pool&) = delete; - Pool& operator=(Pool&&) = delete; - - /// @brief Returns a pointer to an `object_size`-bytes block of memory. - [[nodiscard]] void* Alloc() noexcept { - if (remaining == 0) { - slabs.push_back(current_slab); - AllocateNewSlab(); - } - void* ret = static_cast(current_ptr); - current_ptr += object_size; - remaining--; - return ret; - } -private: - /// @brief Allocates a completely new memory slab. - /// Used when an entirely new slab is needed - /// due the current one running out of usable space. - void AllocateNewSlab() noexcept { - current_slab = static_cast(std::malloc(object_size * slab_size)); - current_ptr = current_slab; - remaining = slab_size; - } - - std::vector slabs; - char* current_slab = nullptr; - char* current_ptr = nullptr; - size_t remaining = 0; -}; - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index b00ab3cb20..a13a7ebfc9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -15,8 +15,6 @@ #include #include "dynarmic/common/assert.h" - -#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -27,8 +25,7 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL}, - instruction_alloc_pool{std::make_unique>()} + cond{Cond::AL} { } @@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); + IR::Inst* inst = new IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index e88dc92fc4..2f2d9ab6de 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -21,7 +21,6 @@ #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" #include "dynarmic/ir/dense_list.h" -#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -174,8 +173,6 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; - /// Memory pool for instruction list - std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp deleted file mode 100644 index a8ef7e2989..0000000000 --- a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/ir_emitter.h" - -#include - -#include "dynarmic/common/assert.h" -#include - -#include "dynarmic/ir/opcodes.h" - -namespace Dynarmic::IR { - - -} // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp deleted file mode 100644 index 9699f18345..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/interface/A32/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { - for (auto& inst : block) { - switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp deleted file mode 100644 index 499b38b120..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp +++ /dev/null @@ -1,382 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A32/a32_ir_emitter.h" -#include "dynarmic/frontend/A32/a32_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -namespace { - -void FlagsPass(IR::Block& block) { - using Iterator = std::reverse_iterator; - - struct FlagInfo { - bool set_not_required = false; - bool has_value_request = false; - Iterator value_request = {}; - }; - struct ValuelessFlagInfo { - bool set_not_required = false; - }; - ValuelessFlagInfo nzcvq; - ValuelessFlagInfo nzcv; - ValuelessFlagInfo nz; - FlagInfo c_flag; - FlagInfo ge; - - auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(value); - } - info.has_value_request = false; - - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_get = [](FlagInfo& info, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(IR::Value{&*inst}); - } - info.has_value_request = true; - info.value_request = inst; - }; - - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetCFlag: { - do_get(c_flag, inst); - break; - } - case IR::Opcode::A32SetCpsrNZCV: { - if (c_flag.has_value_request) { - ir.SetInsertionPointBefore(inst.base()); // base is one ahead - IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); - c_flag.value_request->ReplaceUsesWith(c); - c_flag.has_value_request = false; - break; // This case will be executed again because of the above - } - - do_set_valueless(nzcv, inst); - - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVRaw: { - if (c_flag.has_value_request) { - nzcv.set_not_required = false; - } - - do_set_valueless(nzcv, inst); - - nzcvq = {}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVQ: { - if (c_flag.has_value_request) { - nzcvq.set_not_required = false; - } - - do_set_valueless(nzcvq, inst); - - nzcv = {.set_not_required = true}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZ: { - do_set_valueless(nz, inst); - - nzcvq = {}; - nzcv = {}; - break; - } - case IR::Opcode::A32SetCpsrNZC: { - if (c_flag.has_value_request) { - c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); - c_flag.has_value_request = false; - } - - if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { - const auto nz_value = inst->GetArg(0); - - inst->Invalidate(); - - ir.SetInsertionPointBefore(inst.base()); - ir.SetCpsrNZ(IR::NZCV{nz_value}); - - nzcvq = {}; - nzcv = {}; - nz = {.set_not_required = true}; - break; - } - - if (nz.set_not_required && c_flag.set_not_required) { - inst->Invalidate(); - } else if (nz.set_not_required) { - inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); - } - nz.set_not_required = true; - c_flag.set_not_required = true; - - nzcv = {}; - nzcvq = {}; - break; - } - case IR::Opcode::A32SetGEFlags: { - do_set(ge, inst->GetArg(0), inst); - break; - } - case IR::Opcode::A32GetGEFlags: { - do_get(ge, inst); - break; - } - case IR::Opcode::A32SetGEFlagsCompressed: { - ge = {.set_not_required = true}; - break; - } - case IR::Opcode::A32OrQFlag: { - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcvq = {}; - nzcv = {}; - nz = {}; - c_flag = {}; - ge = {}; - } - break; - } - } - } -} - -void RegisterPass(IR::Block& block) { - using Iterator = IR::Block::iterator; - - struct RegInfo { - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array reg_info; - - const auto do_get = [](RegInfo& info, Iterator get_inst) { - if (info.register_value.IsEmpty()) { - info.register_value = IR::Value(&*get_inst); - return; - } - get_inst->ReplaceUsesWith(info.register_value); - }; - - const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { - if (info.last_set_instruction) { - (*info.last_set_instruction)->Invalidate(); - } - info = { - .register_value = value, - .last_set_instruction = set_inst, - }; - }; - - enum class ExtValueType { - Empty, - Single, - Double, - VectorDouble, - VectorQuad, - }; - struct ExtRegInfo { - ExtValueType value_type = {}; - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array ext_reg_info; - - const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { - if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = IR::Value(&*get_inst), - .last_set_instruction = std::nullopt, - }; - } - return; - } - get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); - }; - - const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { - if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - if (std::data(infos)[0].get().last_set_instruction) { - (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); - } - } - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = value, - .last_set_instruction = set_inst, - }; - } - }; - - // Location and version don't matter here. - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - ASSERT(reg != A32::Reg::PC); - const size_t reg_index = static_cast(reg); - do_get(reg_info[reg_index], inst); - break; - } - case IR::Opcode::A32SetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - if (reg == A32::Reg::PC) { - break; - } - const auto reg_index = static_cast(reg); - do_set(reg_info[reg_index], inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); - break; - } - case IR::Opcode::A32SetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - break; - } - case IR::Opcode::A32SetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst->GetArg(1), - inst); - break; - } - case IR::Opcode::A32GetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - do_ext_get(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_get(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst); - } - break; - } - case IR::Opcode::A32SetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - ir.SetInsertionPointAfter(inst); - const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); - do_ext_set(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - stored_value, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_set(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst->GetArg(1), - inst); - } - break; - } - default: { - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - ext_reg_info = {}; - } - break; - } - } - } -} - -} // namespace - -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { - FlagsPass(block); - RegisterPass(block); -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp deleted file mode 100644 index 79d9769520..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/frontend/A64/a64_ir_emitter.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { - if (conf.hook_data_cache_operations) { - return; - } - - for (auto& inst : block) { - if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { - continue; - } - - const auto op = static_cast(inst.GetArg(1).GetU64()); - if (op == A64::DataCacheOperation::ZeroByVA) { - A64::IREmitter ir{block}; - ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; - ir.SetInsertionPointBefore(&inst); - - size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); - IR::U64 addr{inst.GetArg(2)}; - - const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); - while (bytes >= 16) { - ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(16)); - bytes -= 16; - } - - while (bytes >= 8) { - ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(8)); - bytes -= 8; - } - - while (bytes >= 4) { - ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(4)); - bytes -= 4; - } - } - inst.Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp deleted file mode 100644 index 53e3b27176..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -void A64GetSetElimination(IR::Block& block) { - using Iterator = IR::Block::iterator; - - enum class TrackingType { - W, - X, - S, - D, - Q, - SP, - NZCV, - NZCVRaw, - }; - struct RegisterInfo { - IR::Value register_value; - TrackingType tracking_type; - bool set_instruction_present = false; - Iterator last_set_instruction; - }; - std::array reg_info; - std::array vec_info; - RegisterInfo sp_info; - RegisterInfo nzcv_info; - - const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { - if (info.set_instruction_present) { - info.last_set_instruction->Invalidate(); - block.Instructions().erase(info.last_set_instruction); - } - - info.register_value = value; - info.tracking_type = tracking_type; - info.set_instruction_present = true; - info.last_set_instruction = set_inst; - }; - - const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { - const auto do_nothing = [&] { - info = {}; - info.register_value = IR::Value(&*get_inst); - info.tracking_type = tracking_type; - }; - - if (info.register_value.IsEmpty()) { - do_nothing(); - return; - } - - if (info.tracking_type == tracking_type) { - get_inst->ReplaceUsesWith(info.register_value); - return; - } - - do_nothing(); - }; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A64GetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::W); - break; - } - case IR::Opcode::A64GetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::X); - break; - } - case IR::Opcode::A64GetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::S); - break; - } - case IR::Opcode::A64GetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::D); - break; - } - case IR::Opcode::A64GetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64GetSP: { - do_get(sp_info, inst, TrackingType::SP); - break; - } - case IR::Opcode::A64GetNZCVRaw: { - do_get(nzcv_info, inst, TrackingType::NZCVRaw); - break; - } - case IR::Opcode::A64SetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); - break; - } - case IR::Opcode::A64SetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); - break; - } - case IR::Opcode::A64SetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); - break; - } - case IR::Opcode::A64SetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); - break; - } - case IR::Opcode::A64SetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64SetSP: { - do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); - break; - } - case IR::Opcode::A64SetNZCV: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); - break; - } - case IR::Opcode::A64SetNZCVRaw: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcv_info = {}; - } - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - vec_info = {}; - sp_info = {}; - } - break; - } - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp deleted file mode 100644 index 25b7ef0ff1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_location_descriptor.h" -#include "dynarmic/frontend/A64/translate/a64_translate.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { - const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { - const auto instruction = cb->MemoryReadCode(location.PC()); - if (!instruction) - return false; - - IR::Block new_block{location}; - A64::TranslateSingleInstruction(new_block, location, *instruction); - - if (!new_block.Instructions().empty()) - return false; - - const IR::Terminal terminal = new_block.GetTerminal(); - if (auto term = boost::get(&terminal)) { - return term->next == location; - } - - return false; - }; - - IR::Terminal terminal = block.GetTerminal(); - auto term = boost::get(&terminal); - if (!term) - return; - - A64::LocationDescriptor location{term->next}; - size_t num_instructions = 1; - - while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { - num_instructions++; - } - - term->num_instructions = num_instructions; - block.ReplaceTerminal(terminal); - block.CycleCount() += num_instructions - 1; -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp deleted file mode 100644 index 86ebca87d2..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp +++ /dev/null @@ -1,559 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/assert.h" -#include -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/common/safe_ops.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -using Op = Dynarmic::IR::Opcode; - -namespace { - -// Tiny helper to avoid the need to store based off the opcode -// bit size all over the place within folding functions. -void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { - if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); - } else { - inst.ReplaceUsesWith(IR::Value{value}); - } -} - -IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; -} - -template -bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - - const bool is_lhs_immediate = lhs.IsImmediate(); - const bool is_rhs_immediate = rhs.IsImmediate(); - - if (is_lhs_immediate && is_rhs_immediate) { - const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); - ReplaceUsesWith(inst, is_32_bit, result); - return false; - } - - if (is_lhs_immediate && !is_rhs_immediate) { - const IR::Inst* rhs_inst = rhs.GetInstRecursive(); - if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, rhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } else { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - } - } - - if (!is_lhs_immediate && is_rhs_immediate) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } - } - - return true; -} - -void FoldAdd(IR::Inst& inst, bool is_32_bit) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - if (lhs.IsImmediate() && !rhs.IsImmediate()) { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - FoldAdd(inst, is_32_bit); - return; - } - - if (inst.HasAssociatedPseudoOperation()) { - return; - } - - if (!lhs.IsImmediate() && rhs.IsImmediate()) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { - const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); - if (combined == 0) { - inst.ReplaceUsesWith(lhs_inst->GetArg(0)); - return; - } - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - return; - } - if (rhs.IsZero() && carry.IsZero()) { - inst.ReplaceUsesWith(lhs); - return; - } - } - - if (inst.AreAllArgsImmediates()) { - const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); - return; - } -} - -/// Folds AND operations based on the following: -/// -/// 1. imm_x & imm_y -> result -/// 2. x & 0 -> 0 -/// 3. 0 & y -> 0 -/// 4. x & y -> y (where x has all bits set to 1) -/// 5. x & y -> x (where y has all bits set to 1) -/// -void FoldAND(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.HasAllBitsSet()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -/// Folds byte reversal opcodes based on the following: -/// -/// 1. imm -> swap(imm) -/// -void FoldByteReverse(IR::Inst& inst, Op op) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - if (op == Op::ByteReverseWord) { - const u32 result = mcl::bit::swap_bytes_32(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else if (op == Op::ByteReverseHalf) { - const u16 result = mcl::bit::swap_bytes_16(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else { - const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); - inst.ReplaceUsesWith(IR::Value{result}); - } -} - -/// Folds division operations based on the following: -/// -/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) -/// 2. imm_x / imm_y -> result -/// 3. x / 1 -> x -/// -void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { - const auto rhs = inst.GetArg(1); - - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - return; - } - - const auto lhs = inst.GetArg(0); - if (lhs.IsImmediate() && rhs.IsImmediate()) { - if (is_signed) { - const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); - ReplaceUsesWith(inst, is_32_bit, static_cast(result)); - } else { - const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); - } - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(IR::Value{lhs}); - } -} - -// Folds EOR operations based on the following: -// -// 1. imm_x ^ imm_y -> result -// 2. x ^ 0 -> x -// 3. 0 ^ y -> y -// -void FoldEOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -void FoldLeastSignificantByte(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantHalf(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldMostSignificantBit(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); -} - -void FoldMostSignificantWord(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - if (carry_inst) { - carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); - } - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); -} - -// Folds multiplication operations based on the following: -// -// 1. imm_x * imm_y -> result -// 2. x * 0 -> 0 -// 3. 0 * y -> 0 -// 4. x * 1 -> x -// 5. 1 * y -> y -// -void FoldMultiply(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -// Folds NOT operations if the contained value is an immediate. -void FoldNOT(IR::Inst& inst, bool is_32_bit) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - const u64 result = ~operand.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -// Folds OR operations based on the following: -// -// 1. imm_x | imm_y -> result -// 2. x | 0 -> x -// 3. 0 | y -> y -// -void FoldOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -bool FoldShifts(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - // The 32-bit variants can contain 3 arguments, while the - // 64-bit variants only contain 2. - if (inst.NumArgs() == 3 && !carry_inst) { - inst.SetArg(2, IR::Value(false)); - } - - const auto shift_amount = inst.GetArg(1); - - if (shift_amount.IsZero()) { - if (carry_inst) { - carry_inst->ReplaceUsesWith(inst.GetArg(2)); - } - inst.ReplaceUsesWith(inst.GetArg(0)); - return false; - } - - if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { - inst.SetArg(2, IR::Value(false)); - } - - if (!inst.AreAllArgsImmediates() || carry_inst) { - return false; - } - - return true; -} - -void FoldSignExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSignExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSub(IR::Inst& inst, bool is_32_bit) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -void FoldZeroExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldZeroExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{value}); -} -} // Anonymous namespace - -void ConstantPropagation(IR::Block& block) { - for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - - switch (opcode) { - case Op::LeastSignificantWord: - FoldLeastSignificantWord(inst); - break; - case Op::MostSignificantWord: - FoldMostSignificantWord(inst); - break; - case Op::LeastSignificantHalf: - FoldLeastSignificantHalf(inst); - break; - case Op::LeastSignificantByte: - FoldLeastSignificantByte(inst); - break; - case Op::MostSignificantBit: - FoldMostSignificantBit(inst); - break; - case Op::IsZero32: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); - } - break; - case Op::IsZero64: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); - } - break; - case Op::LogicalShiftLeft32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeft64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeftMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftLeftMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::LogicalShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::ArithmeticShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::ArithmeticShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::RotateRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); - } - break; - case Op::RotateRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); - } - break; - case Op::Add32: - case Op::Add64: - FoldAdd(inst, opcode == Op::Add32); - break; - case Op::Sub32: - case Op::Sub64: - FoldSub(inst, opcode == Op::Sub32); - break; - case Op::Mul32: - case Op::Mul64: - FoldMultiply(inst, opcode == Op::Mul32); - break; - case Op::SignedDiv32: - case Op::SignedDiv64: - FoldDivide(inst, opcode == Op::SignedDiv32, true); - break; - case Op::UnsignedDiv32: - case Op::UnsignedDiv64: - FoldDivide(inst, opcode == Op::UnsignedDiv32, false); - break; - case Op::And32: - case Op::And64: - FoldAND(inst, opcode == Op::And32); - break; - case Op::Eor32: - case Op::Eor64: - FoldEOR(inst, opcode == Op::Eor32); - break; - case Op::Or32: - case Op::Or64: - FoldOR(inst, opcode == Op::Or32); - break; - case Op::Not32: - case Op::Not64: - FoldNOT(inst, opcode == Op::Not32); - break; - case Op::SignExtendByteToWord: - case Op::SignExtendHalfToWord: - FoldSignExtendXToWord(inst); - break; - case Op::SignExtendByteToLong: - case Op::SignExtendHalfToLong: - case Op::SignExtendWordToLong: - FoldSignExtendXToLong(inst); - break; - case Op::ZeroExtendByteToWord: - case Op::ZeroExtendHalfToWord: - FoldZeroExtendXToWord(inst); - break; - case Op::ZeroExtendByteToLong: - case Op::ZeroExtendHalfToLong: - case Op::ZeroExtendWordToLong: - FoldZeroExtendXToLong(inst); - break; - case Op::ByteReverseWord: - case Op::ByteReverseHalf: - case Op::ByteReverseDual: - FoldByteReverse(inst, opcode); - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp deleted file mode 100644 index bda9f6efd1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void DeadCodeElimination(IR::Block& block) { - // We iterate over the instructions in reverse order. - // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp deleted file mode 100644 index e87fcc335b..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void IdentityRemovalPass(IR::Block& block) { - std::vector to_invalidate; - - auto iter = block.begin(); - while (iter != block.end()) { - IR::Inst& inst = *iter; - - const size_t num_args = inst.NumArgs(); - for (size_t i = 0; i < num_args; i++) { - while (true) { - IR::Value arg = inst.GetArg(i); - if (!arg.IsIdentity()) - break; - inst.SetArg(i, arg.GetInst()->GetArg(0)); - } - } - - if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { - iter = block.Instructions().erase(inst); - to_invalidate.push_back(&inst); - } else { - ++iter; - } - } - - for (IR::Inst* inst : to_invalidate) { - inst->Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h b/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h deleted file mode 100644 index 5eb1a55100..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h +++ /dev/null @@ -1,127 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization::IRMatcher { - -struct CaptureValue { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value); - } -}; - -struct CaptureInst { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return std::tuple(value.GetInstRecursive()); - } -}; - -struct CaptureUImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsU64()); - } -}; - -struct CaptureSImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsS64()); - } -}; - -template -struct UImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsU64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct SImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsS64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct Inst { -public: - using ReturnType = mp::concat, typename Args::ReturnType...>; - - static std::optional Match(const IR::Inst& inst) { - if (inst.GetOpcode() != Opcode) - return std::nullopt; - if (inst.HasAssociatedPseudoOperation()) - return std::nullopt; - return MatchArgs<0>(inst); - } - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return Match(*value.GetInstRecursive()); - } - -private: - template - static auto MatchArgs(const IR::Inst& inst) -> std::optional>, std::tuple<>>>> { - if constexpr (I >= sizeof...(Args)) { - return std::tuple(); - } else { - using Arg = mp::get>; - - if (const auto arg = Arg::Match(inst.GetArg(I))) { - if (const auto rest = MatchArgs(inst)) { - return std::tuple_cat(*arg, *rest); - } - } - - return std::nullopt; - } - } -}; - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t); -} - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t); -} - -} // namespace Dynarmic::Optimization::IRMatcher diff --git a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp deleted file mode 100644 index a766bdc83f..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2023 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" - -namespace Dynarmic::Optimization { - -void NamingPass(IR::Block& block) { - unsigned name = 1; - for (auto& inst : block) { - inst.SetName(name++); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/passes.h b/src/dynarmic/src/dynarmic/ir/opt/passes.h deleted file mode 100644 index 703145b556..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/passes.h +++ /dev/null @@ -1,47 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -namespace Dynarmic::A32 { -struct UserCallbacks; -} - -namespace Dynarmic::A64 { -struct UserCallbacks; -struct UserConfig; -} // namespace Dynarmic::A64 - -namespace Dynarmic::IR { -class Block; -} - -namespace Dynarmic::Optimization { - -struct PolyfillOptions { - bool sha256 = false; - bool vector_multiply_widen = false; - - bool operator==(const PolyfillOptions&) const = default; -}; - -struct A32GetSetEliminationOptions { - bool convert_nzc_to_nz = false; - bool convert_nz_to_nzc = false; -}; - -void PolyfillPass(IR::Block& block, const PolyfillOptions& opt); -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb); -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt); -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf); -void A64GetSetElimination(IR::Block& block); -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb); -void ConstantPropagation(IR::Block& block); -void DeadCodeElimination(IR::Block& block); -void IdentityRemovalPass(IR::Block& block); -void VerificationPass(const IR::Block& block); -void NamingPass(IR::Block& block); - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp deleted file mode 100644 index 1aa3aea91e..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2022 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -namespace { - -void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - - const IR::U128 t = ir.VectorExtract(x, y, 32); - - IR::U128 result = ir.ZeroVector(); - for (size_t i = 0; i < 4; i++) { - const IR::U32 modified_element = [&] { - const IR::U32 element = ir.VectorGetElement(32, t, i); - const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); - const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); - const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); - }(); - - result = ir.VectorSetElement(32, result, i, modified_element); - } - result = ir.VectorAdd(32, result, x); - - inst.ReplaceUsesWith(result); -} - -void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 z = (IR::U128)inst.GetArg(2); - - const IR::U128 T0 = ir.VectorExtract(y, z, 32); - - const IR::U128 lower_half = [&] { - const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); - const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); - return ir.VectorZeroUpper(tmp5); - }(); - - const IR::U64 upper_half = [&] { - const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - - // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] - const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); - const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); - - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); - return ir.VectorGetElement(64, tmp5, 0); - }(); - - const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); - - inst.ReplaceUsesWith(result); -} - -IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Eor(ir.And(ir.Eor(y, z), x), z); -} - -IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); -} - -IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 x = (IR::U128)inst.GetArg(0); - IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 w = (IR::U128)inst.GetArg(2); - const bool part1 = inst.GetArg(3).GetU1(); - - for (size_t i = 0; i < 4; i++) { - const IR::U32 low_x = ir.VectorGetElement(32, x, 0); - const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); - const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); - const IR::U32 high_x = ir.VectorGetElement(32, x, 3); - - const IR::U32 low_y = ir.VectorGetElement(32, y, 0); - const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); - const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); - const IR::U32 high_y = ir.VectorGetElement(32, y, 3); - - const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); - const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); - - const IR::U32 t = [&] { - const IR::U32 w_element = ir.VectorGetElement(32, w, i); - const IR::U32 sig = SHAhashSIGMA1(ir, low_y); - - return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); - }(); - - const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); - const IR::U32 new_low_y = ir.Add(t, high_x); - - // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] - const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); - const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); - - x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); - y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); - } - - inst.ReplaceUsesWith(part1 ? x : y); -} - -template -void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 n = (IR::U128)inst.GetArg(0); - IR::U128 m = (IR::U128)inst.GetArg(1); - - const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); - const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); - - const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); - - inst.ReplaceUsesWith(result); -} - -} // namespace - -void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { - if (polyfill == PolyfillOptions{}) { - return; - } - - IR::IREmitter ir{block}; - - for (auto& inst : block) { - ir.SetInsertionPointBefore(&inst); - - switch (inst.GetOpcode()) { - case IR::Opcode::SHA256MessageSchedule0: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule0(ir, inst); - } - break; - case IR::Opcode::SHA256MessageSchedule1: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule1(ir, inst); - } - break; - case IR::Opcode::SHA256Hash: - if (polyfill.sha256) { - PolyfillSHA256Hash(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, false>(ir, inst); - } - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp deleted file mode 100644 index c6c2cff231..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/type.h" - -namespace Dynarmic::Optimization { - -void VerificationPass(const IR::Block& block) { - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const IR::Type t1 = inst.GetArg(i).GetType(); - const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); - if (!IR::AreTypesCompatible(t1, t2)) { - std::puts(IR::DumpBlock(block).c_str()); - ASSERT_FALSE("above block failed validation"); - } - } - } - - ankerl::unordered_dense::map actual_uses; - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const auto arg = inst.GetArg(i); - if (!arg.IsImmediate()) { - actual_uses[arg.GetInst()]++; - } - } - } - - for (const auto& pair : actual_uses) { - ASSERT(pair.first->UseCount() == pair.second); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp new file mode 100644 index 0000000000..383b915839 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -0,0 +1,1502 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include + +#include +#include "boost/container/small_vector.hpp" +#include "dynarmic/frontend/A32/a32_ir_emitter.h" +#include "dynarmic/frontend/A32/a32_location_descriptor.h" +#include "dynarmic/frontend/A32/a32_types.h" +#include "dynarmic/frontend/A64/a64_ir_emitter.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/A32/config.h" +#include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/optimization_flags.h" +#include "dynarmic/common/safe_ops.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/microinstruction.h" +#include "dynarmic/ir/opcodes.h" +#include "dynarmic/ir/opt_passes.h" +#include "dynarmic/ir/type.h" +#include "mcl/bit/swap.hpp" +#include "mcl/bit/rotate.hpp" +#include "mcl/iterator/reverse.hpp" + +namespace Dynarmic::Optimization { + +static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { + for (auto& inst : block) { + switch (inst.GetOpcode()) { + case IR::Opcode::A32ReadMemory8: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory16: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory32: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory64: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + default: + break; + } + } +} + +static void FlagsPass(IR::Block& block) { + using Iterator = std::reverse_iterator; + + struct FlagInfo { + bool set_not_required = false; + bool has_value_request = false; + Iterator value_request = {}; + }; + struct ValuelessFlagInfo { + bool set_not_required = false; + }; + ValuelessFlagInfo nzcvq; + ValuelessFlagInfo nzcv; + ValuelessFlagInfo nz; + FlagInfo c_flag; + FlagInfo ge; + + auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(value); + } + info.has_value_request = false; + + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_get = [](FlagInfo& info, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(IR::Value{&*inst}); + } + info.has_value_request = true; + info.value_request = inst; + }; + + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetCFlag: { + do_get(c_flag, inst); + break; + } + case IR::Opcode::A32SetCpsrNZCV: { + if (c_flag.has_value_request) { + ir.SetInsertionPointBefore(inst.base()); // base is one ahead + IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); + c_flag.value_request->ReplaceUsesWith(c); + c_flag.has_value_request = false; + break; // This case will be executed again because of the above + } + + do_set_valueless(nzcv, inst); + + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVRaw: { + if (c_flag.has_value_request) { + nzcv.set_not_required = false; + } + + do_set_valueless(nzcv, inst); + + nzcvq = {}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVQ: { + if (c_flag.has_value_request) { + nzcvq.set_not_required = false; + } + + do_set_valueless(nzcvq, inst); + + nzcv = {.set_not_required = true}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZ: { + do_set_valueless(nz, inst); + + nzcvq = {}; + nzcv = {}; + break; + } + case IR::Opcode::A32SetCpsrNZC: { + if (c_flag.has_value_request) { + c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); + c_flag.has_value_request = false; + } + + if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { + const auto nz_value = inst->GetArg(0); + + inst->Invalidate(); + + ir.SetInsertionPointBefore(inst.base()); + ir.SetCpsrNZ(IR::NZCV{nz_value}); + + nzcvq = {}; + nzcv = {}; + nz = {.set_not_required = true}; + break; + } + + if (nz.set_not_required && c_flag.set_not_required) { + inst->Invalidate(); + } else if (nz.set_not_required) { + inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); + } + nz.set_not_required = true; + c_flag.set_not_required = true; + + nzcv = {}; + nzcvq = {}; + break; + } + case IR::Opcode::A32SetGEFlags: { + do_set(ge, inst->GetArg(0), inst); + break; + } + case IR::Opcode::A32GetGEFlags: { + do_get(ge, inst); + break; + } + case IR::Opcode::A32SetGEFlagsCompressed: { + ge = {.set_not_required = true}; + break; + } + case IR::Opcode::A32OrQFlag: { + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcvq = {}; + nzcv = {}; + nz = {}; + c_flag = {}; + ge = {}; + } + break; + } + } + } +} + +static void RegisterPass(IR::Block& block) { + using Iterator = IR::Block::iterator; + + struct RegInfo { + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array reg_info; + + const auto do_get = [](RegInfo& info, Iterator get_inst) { + if (info.register_value.IsEmpty()) { + info.register_value = IR::Value(&*get_inst); + return; + } + get_inst->ReplaceUsesWith(info.register_value); + }; + + const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { + if (info.last_set_instruction) { + (*info.last_set_instruction)->Invalidate(); + } + info = { + .register_value = value, + .last_set_instruction = set_inst, + }; + }; + + enum class ExtValueType { + Empty, + Single, + Double, + VectorDouble, + VectorQuad, + }; + struct ExtRegInfo { + ExtValueType value_type = {}; + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array ext_reg_info; + + const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { + if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = IR::Value(&*get_inst), + .last_set_instruction = std::nullopt, + }; + } + return; + } + get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); + }; + + const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { + if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + if (std::data(infos)[0].get().last_set_instruction) { + (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); + } + } + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = value, + .last_set_instruction = set_inst, + }; + } + }; + + // Location and version don't matter here. + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + ASSERT(reg != A32::Reg::PC); + const size_t reg_index = size_t(reg); + do_get(reg_info[reg_index], inst); + break; + } + case IR::Opcode::A32SetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + if (reg == A32::Reg::PC) { + break; + } + const auto reg_index = size_t(reg); + do_set(reg_info[reg_index], inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); + break; + } + case IR::Opcode::A32SetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + break; + } + case IR::Opcode::A32SetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst->GetArg(1), + inst); + break; + } + case IR::Opcode::A32GetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + do_ext_get(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_get(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst); + } + break; + } + case IR::Opcode::A32SetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + ir.SetInsertionPointAfter(inst); + const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); + do_ext_set(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + stored_value, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_set(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst->GetArg(1), + inst); + } + break; + } + default: { + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + ext_reg_info = {}; + } + break; + } + } + } +} + +struct A32GetSetEliminationOptions { + bool convert_nzc_to_nz = false; + bool convert_nz_to_nzc = false; +}; + +static void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { + FlagsPass(block); + RegisterPass(block); +} + +static void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { + if (conf.hook_data_cache_operations) { + return; + } + + for (auto& inst : block) { + if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { + continue; + } + + const auto op = static_cast(inst.GetArg(1).GetU64()); + if (op == A64::DataCacheOperation::ZeroByVA) { + A64::IREmitter ir{block}; + ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; + ir.SetInsertionPointBefore(&inst); + + size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); + IR::U64 addr{inst.GetArg(2)}; + + const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); + while (bytes >= 16) { + ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(16)); + bytes -= 16; + } + + while (bytes >= 8) { + ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(8)); + bytes -= 8; + } + + while (bytes >= 4) { + ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(4)); + bytes -= 4; + } + } + inst.Invalidate(); + } +} + +static void A64GetSetElimination(IR::Block& block) { + using Iterator = IR::Block::iterator; + + enum class TrackingType { + W, + X, + S, + D, + Q, + SP, + NZCV, + NZCVRaw, + }; + struct RegisterInfo { + IR::Value register_value; + TrackingType tracking_type; + bool set_instruction_present = false; + Iterator last_set_instruction; + }; + std::array reg_info; + std::array vec_info; + RegisterInfo sp_info; + RegisterInfo nzcv_info; + + const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { + if (info.set_instruction_present) { + info.last_set_instruction->Invalidate(); + block.Instructions().erase(info.last_set_instruction); + } + + info.register_value = value; + info.tracking_type = tracking_type; + info.set_instruction_present = true; + info.last_set_instruction = set_inst; + }; + + const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { + const auto do_nothing = [&] { + info = {}; + info.register_value = IR::Value(&*get_inst); + info.tracking_type = tracking_type; + }; + + if (info.register_value.IsEmpty()) { + do_nothing(); + return; + } + + if (info.tracking_type == tracking_type) { + get_inst->ReplaceUsesWith(info.register_value); + return; + } + + do_nothing(); + }; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A64GetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::W); + break; + } + case IR::Opcode::A64GetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::X); + break; + } + case IR::Opcode::A64GetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::S); + break; + } + case IR::Opcode::A64GetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::D); + break; + } + case IR::Opcode::A64GetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64GetSP: { + do_get(sp_info, inst, TrackingType::SP); + break; + } + case IR::Opcode::A64GetNZCVRaw: { + do_get(nzcv_info, inst, TrackingType::NZCVRaw); + break; + } + case IR::Opcode::A64SetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); + break; + } + case IR::Opcode::A64SetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); + break; + } + case IR::Opcode::A64SetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); + break; + } + case IR::Opcode::A64SetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); + break; + } + case IR::Opcode::A64SetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64SetSP: { + do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); + break; + } + case IR::Opcode::A64SetNZCV: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); + break; + } + case IR::Opcode::A64SetNZCVRaw: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcv_info = {}; + } + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + vec_info = {}; + sp_info = {}; + } + break; + } + } + } +} + +static void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { + const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { + const auto instruction = cb->MemoryReadCode(location.PC()); + if (!instruction) + return false; + + IR::Block new_block{location}; + A64::TranslateSingleInstruction(new_block, location, *instruction); + + if (!new_block.Instructions().empty()) + return false; + + const IR::Terminal terminal = new_block.GetTerminal(); + if (auto term = boost::get(&terminal)) { + return term->next == location; + } + + return false; + }; + + IR::Terminal terminal = block.GetTerminal(); + auto term = boost::get(&terminal); + if (!term) + return; + + A64::LocationDescriptor location{term->next}; + size_t num_instructions = 1; + + while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { + num_instructions++; + } + + term->num_instructions = num_instructions; + block.ReplaceTerminal(terminal); + block.CycleCount() += num_instructions - 1; +} + +using Op = Dynarmic::IR::Opcode; + +// Tiny helper to avoid the need to store based off the opcode +// bit size all over the place within folding functions. +static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { + if (is_32_bit) { + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + } else { + inst.ReplaceUsesWith(IR::Value{value}); + } +} + +static IR::Value Value(bool is_32_bit, u64 value) { + return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; +} + +template +static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + + const bool is_lhs_immediate = lhs.IsImmediate(); + const bool is_rhs_immediate = rhs.IsImmediate(); + + if (is_lhs_immediate && is_rhs_immediate) { + const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); + ReplaceUsesWith(inst, is_32_bit, result); + return false; + } + + if (is_lhs_immediate && !is_rhs_immediate) { + const IR::Inst* rhs_inst = rhs.GetInstRecursive(); + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, rhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } + } + + return true; +} + +static void FoldAdd(IR::Inst& inst, bool is_32_bit) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + if (lhs.IsImmediate() && !rhs.IsImmediate()) { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + FoldAdd(inst, is_32_bit); + return; + } + + if (inst.HasAssociatedPseudoOperation()) { + return; + } + + if (!lhs.IsImmediate() && rhs.IsImmediate()) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { + const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); + if (combined == 0) { + inst.ReplaceUsesWith(lhs_inst->GetArg(0)); + return; + } + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + return; + } + if (rhs.IsZero() && carry.IsZero()) { + inst.ReplaceUsesWith(lhs); + return; + } + } + + if (inst.AreAllArgsImmediates()) { + const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); + return; + } +} + +/// Folds AND operations based on the following: +/// +/// 1. imm_x & imm_y -> result +/// 2. x & 0 -> 0 +/// 3. 0 & y -> 0 +/// 4. x & y -> y (where x has all bits set to 1) +/// 5. x & y -> x (where y has all bits set to 1) +/// +static void FoldAND(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.HasAllBitsSet()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +/// Folds byte reversal opcodes based on the following: +/// +/// 1. imm -> swap(imm) +/// +static void FoldByteReverse(IR::Inst& inst, Op op) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + if (op == Op::ByteReverseWord) { + const u32 result = mcl::bit::swap_bytes_32(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else if (op == Op::ByteReverseHalf) { + const u16 result = mcl::bit::swap_bytes_16(u16(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } +} + +/// Folds division operations based on the following: +/// +/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) +/// 2. imm_x / imm_y -> result +/// 3. x / 1 -> x +/// +static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { + const auto rhs = inst.GetArg(1); + + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + return; + } + + const auto lhs = inst.GetArg(0); + if (lhs.IsImmediate() && rhs.IsImmediate()) { + if (is_signed) { + const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); + ReplaceUsesWith(inst, is_32_bit, static_cast(result)); + } else { + const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); + } + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(IR::Value{lhs}); + } +} + +// Folds EOR operations based on the following: +// +// 1. imm_x ^ imm_y -> result +// 2. x ^ 0 -> x +// 3. 0 ^ y -> y +// +static void FoldEOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static void FoldLeastSignificantByte(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantHalf(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldMostSignificantBit(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); +} + +static void FoldMostSignificantWord(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + if (carry_inst) { + carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); + } + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); +} + +// Folds multiplication operations based on the following: +// +// 1. imm_x * imm_y -> result +// 2. x * 0 -> 0 +// 3. 0 * y -> 0 +// 4. x * 1 -> x +// 5. 1 * y -> y +// +static void FoldMultiply(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +// Folds NOT operations if the contained value is an immediate. +static void FoldNOT(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + const u64 result = ~operand.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +// Folds OR operations based on the following: +// +// 1. imm_x | imm_y -> result +// 2. x | 0 -> x +// 3. 0 | y -> y +// +static void FoldOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static bool FoldShifts(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + // The 32-bit variants can contain 3 arguments, while the + // 64-bit variants only contain 2. + if (inst.NumArgs() == 3 && !carry_inst) { + inst.SetArg(2, IR::Value(false)); + } + + const auto shift_amount = inst.GetArg(1); + + if (shift_amount.IsZero()) { + if (carry_inst) { + carry_inst->ReplaceUsesWith(inst.GetArg(2)); + } + inst.ReplaceUsesWith(inst.GetArg(0)); + return false; + } + + if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { + inst.SetArg(2, IR::Value(false)); + } + + if (!inst.AreAllArgsImmediates() || carry_inst) { + return false; + } + + return true; +} + +static void FoldSignExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSignExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSub(IR::Inst& inst, bool is_32_bit) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return; + } + + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +static void FoldZeroExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldZeroExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{value}); +} + +static void ConstantPropagation(IR::Block& block) { + for (auto& inst : block) { + const auto opcode = inst.GetOpcode(); + + switch (opcode) { + case Op::LeastSignificantWord: + FoldLeastSignificantWord(inst); + break; + case Op::MostSignificantWord: + FoldMostSignificantWord(inst); + break; + case Op::LeastSignificantHalf: + FoldLeastSignificantHalf(inst); + break; + case Op::LeastSignificantByte: + FoldLeastSignificantByte(inst); + break; + case Op::MostSignificantBit: + FoldMostSignificantBit(inst); + break; + case Op::IsZero32: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); + } + break; + case Op::IsZero64: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); + } + break; + case Op::LogicalShiftLeft32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeft64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeftMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftLeftMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::LogicalShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::ArithmeticShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::ArithmeticShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::RotateRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); + } + break; + case Op::RotateRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); + } + break; + case Op::Add32: + case Op::Add64: + FoldAdd(inst, opcode == Op::Add32); + break; + case Op::Sub32: + case Op::Sub64: + FoldSub(inst, opcode == Op::Sub32); + break; + case Op::Mul32: + case Op::Mul64: + FoldMultiply(inst, opcode == Op::Mul32); + break; + case Op::SignedDiv32: + case Op::SignedDiv64: + FoldDivide(inst, opcode == Op::SignedDiv32, true); + break; + case Op::UnsignedDiv32: + case Op::UnsignedDiv64: + FoldDivide(inst, opcode == Op::UnsignedDiv32, false); + break; + case Op::And32: + case Op::And64: + FoldAND(inst, opcode == Op::And32); + break; + case Op::Eor32: + case Op::Eor64: + FoldEOR(inst, opcode == Op::Eor32); + break; + case Op::Or32: + case Op::Or64: + FoldOR(inst, opcode == Op::Or32); + break; + case Op::Not32: + case Op::Not64: + FoldNOT(inst, opcode == Op::Not32); + break; + case Op::SignExtendByteToWord: + case Op::SignExtendHalfToWord: + FoldSignExtendXToWord(inst); + break; + case Op::SignExtendByteToLong: + case Op::SignExtendHalfToLong: + case Op::SignExtendWordToLong: + FoldSignExtendXToLong(inst); + break; + case Op::ZeroExtendByteToWord: + case Op::ZeroExtendHalfToWord: + FoldZeroExtendXToWord(inst); + break; + case Op::ZeroExtendByteToLong: + case Op::ZeroExtendHalfToLong: + case Op::ZeroExtendWordToLong: + FoldZeroExtendXToLong(inst); + break; + case Op::ByteReverseWord: + case Op::ByteReverseHalf: + case Op::ByteReverseDual: + FoldByteReverse(inst, opcode); + break; + default: + break; + } + } +} + +static void DeadCodeElimination(IR::Block& block) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (auto& inst : mcl::iterator::reverse(block)) { + if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { + inst.Invalidate(); + } + } +} + +static void IdentityRemovalPass(IR::Block& block) { + boost::container::small_vector to_invalidate; + + auto iter = block.begin(); + while (iter != block.end()) { + IR::Inst& inst = *iter; + + const size_t num_args = inst.NumArgs(); + for (size_t i = 0; i < num_args; i++) { + while (true) { + IR::Value arg = inst.GetArg(i); + if (!arg.IsIdentity()) + break; + inst.SetArg(i, arg.GetInst()->GetArg(0)); + } + } + + if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { + iter = block.Instructions().erase(inst); + to_invalidate.push_back(&inst); + } else { + ++iter; + } + } + for (IR::Inst* inst : to_invalidate) { + inst->Invalidate(); + } +} + +static void NamingPass(IR::Block& block) { + u32 name = 1; + for (auto& inst : block) + inst.SetName(name++); +} + +static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + + const IR::U128 t = ir.VectorExtract(x, y, 32); + + IR::U128 result = ir.ZeroVector(); + for (size_t i = 0; i < 4; i++) { + const IR::U32 modified_element = [&] { + const IR::U32 element = ir.VectorGetElement(32, t, i); + const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); + const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); + const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); + }(); + + result = ir.VectorSetElement(32, result, i, modified_element); + } + result = ir.VectorAdd(32, result, x); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 z = (IR::U128)inst.GetArg(2); + + const IR::U128 T0 = ir.VectorExtract(y, z, 32); + + const IR::U128 lower_half = [&] { + const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); + const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); + return ir.VectorZeroUpper(tmp5); + }(); + + const IR::U64 upper_half = [&] { + const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + + // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] + const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); + const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); + + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); + return ir.VectorGetElement(64, tmp5, 0); + }(); + + const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); + + inst.ReplaceUsesWith(result); +} + +static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Eor(ir.And(ir.Eor(y, z), x), z); +} + +static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); +} + +static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 x = (IR::U128)inst.GetArg(0); + IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 w = (IR::U128)inst.GetArg(2); + const bool part1 = inst.GetArg(3).GetU1(); + + for (size_t i = 0; i < 4; i++) { + const IR::U32 low_x = ir.VectorGetElement(32, x, 0); + const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); + const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); + const IR::U32 high_x = ir.VectorGetElement(32, x, 3); + + const IR::U32 low_y = ir.VectorGetElement(32, y, 0); + const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); + const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); + const IR::U32 high_y = ir.VectorGetElement(32, y, 3); + + const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); + const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); + + const IR::U32 t = [&] { + const IR::U32 w_element = ir.VectorGetElement(32, w, i); + const IR::U32 sig = SHAhashSIGMA1(ir, low_y); + + return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); + }(); + + const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); + const IR::U32 new_low_y = ir.Add(t, high_x); + + // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] + const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); + const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); + + x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); + y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); + } + + inst.ReplaceUsesWith(part1 ? x : y); +} + +template +static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 n = (IR::U128)inst.GetArg(0); + IR::U128 m = (IR::U128)inst.GetArg(1); + + const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); + const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); + + const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { + if (polyfill == PolyfillOptions{}) { + return; + } + + IR::IREmitter ir{block}; + + for (auto& inst : block) { + ir.SetInsertionPointBefore(&inst); + + switch (inst.GetOpcode()) { + case IR::Opcode::SHA256MessageSchedule0: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule0(ir, inst); + } + break; + case IR::Opcode::SHA256MessageSchedule1: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule1(ir, inst); + } + break; + case IR::Opcode::SHA256Hash: + if (polyfill.sha256) { + PolyfillSHA256Hash(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, false>(ir, inst); + } + break; + default: + break; + } + } +} + +static void VerificationPass(const IR::Block& block) { + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) { + const IR::Type t1 = inst.GetArg(i).GetType(); + const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); + ASSERT_MSG(IR::AreTypesCompatible(t1, t2), "Block failed:\n{}", IR::DumpBlock(block)); + } + } + ankerl::unordered_dense::map actual_uses; + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) + if (IR::Value const arg = inst.GetArg(i); !arg.IsImmediate()) + actual_uses[arg.GetInst()]++; + } + for (auto const& pair : actual_uses) + ASSERT(pair.first->UseCount() == pair.second); +} + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) [[likely]] { + Optimization::A32GetSetElimination(block, {.convert_nzc_to_nz = true}); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + Optimization::IdentityRemovalPass(block); + Optimization::VerificationPass(block); +} + +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::A64CallbackConfigPass(block, conf); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) [[likely]] { + Optimization::A64GetSetElimination(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { + Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); + } + Optimization::VerificationPass(block); +} + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h new file mode 100644 index 0000000000..1963fad0a0 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -0,0 +1,34 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +namespace Dynarmic::A32 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::A64 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::IR { +class Block; +} + +namespace Dynarmic::Optimization { + +struct PolyfillOptions { + bool sha256 = false; + bool vector_multiply_widen = false; + + bool operator==(const PolyfillOptions&) const = default; +}; + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index 4d14141bbf..ad01e5718b 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -29,7 +29,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; @@ -179,13 +179,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 85d86c7966..29a8f23478 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -6,33 +6,24 @@ add_executable(dynarmic_tests fp/mantissa_util_tests.cpp fp/unpacked_tests.cpp rand_int.h + # A32 + A32/test_arm_disassembler.cpp + A32/test_arm_instructions.cpp + A32/test_coprocessor.cpp + A32/test_svc.cpp + A32/test_thumb_instructions.cpp + A32/testenv.h + decoder_tests.cpp + # A64 + A64/a64.cpp + A64/fibonacci.cpp + A64/fp_min_max.cpp + A64/misaligned_page_table.cpp + A64/test_invalidation.cpp + A64/real_world.cpp + A64/testenv.h ) - -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/test_arm_disassembler.cpp - A32/test_arm_instructions.cpp - A32/test_coprocessor.cpp - A32/test_svc.cpp - A32/test_thumb_instructions.cpp - A32/testenv.h - decoder_tests.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) - - target_sources(dynarmic_tests PRIVATE - A64/a64.cpp - A64/fibonacci.cpp - A64/fp_min_max.cpp - A64/misaligned_page_table.cpp - A64/test_invalidation.cpp - A64/real_world.cpp - A64/testenv.h - ) -endif() +target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) if (DYNARMIC_TESTS_USE_UNICORN) target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn) @@ -40,25 +31,17 @@ if (DYNARMIC_TESTS_USE_UNICORN) target_sources(dynarmic_tests PRIVATE fuzz_util.cpp fuzz_util.h + # A32 + A32/fuzz_arm.cpp + A32/fuzz_thumb.cpp + unicorn_emu/a32_unicorn.cpp + unicorn_emu/a32_unicorn.h + # A64 + A64/fuzz_with_unicorn.cpp + A64/verify_unicorn.cpp + unicorn_emu/a64_unicorn.cpp + unicorn_emu/a64_unicorn.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/fuzz_arm.cpp - A32/fuzz_thumb.cpp - unicorn_emu/a32_unicorn.cpp - unicorn_emu/a32_unicorn.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A64/fuzz_with_unicorn.cpp - A64/verify_unicorn.cpp - unicorn_emu/a64_unicorn.cpp - unicorn_emu/a64_unicorn.h - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -69,9 +52,6 @@ if ("x86_64" IN_LIST ARCHITECTURE) target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak) target_architecture_specific_sources(dynarmic_tests "x86_64" x64_cpu_info.cpp - ) - - target_architecture_specific_sources(dynarmic_tests "x86_64" native/preserve_xmm.cpp ) @@ -85,47 +65,47 @@ endif() include(CreateDirectoryGroups) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_print_info - print_info.cpp - ) +# +# dynarmic_print_info +# +add_executable(dynarmic_print_info + print_info.cpp +) +create_target_directory_groups(dynarmic_print_info) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_print_info PRIVATE . ../src) +target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_print_info) +# +# dynarmic_test_generator +# +add_executable(dynarmic_test_generator + fuzz_util.cpp + fuzz_util.h + test_generator.cpp +) - target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_print_info PRIVATE . ../src) - target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +create_target_directory_groups(dynarmic_test_generator) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_generator - fuzz_util.cpp - fuzz_util.h - test_generator.cpp - ) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_generator PRIVATE . ../src) +target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_test_generator) - - target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_generator PRIVATE . ../src) - target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() - -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_reader - test_reader.cpp - ) - - create_target_directory_groups(dynarmic_test_reader) - - target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_reader PRIVATE . ../src) - target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +# +# dynarmic_test_reader +# +add_executable(dynarmic_test_reader + test_reader.cpp +) +create_target_directory_groups(dynarmic_test_reader) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_reader PRIVATE . ../src) +target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) +# create_target_directory_groups(dynarmic_tests) target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index ef8b87bbd1..3263ca729a 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -34,7 +34,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; From a190e1d673c049aad991f39ac205f30b6dc10071 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:57:48 +0000 Subject: [PATCH 32/56] Fix license headers --- src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index 8e48fa3687..01af361b27 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index 2b50ad9ea3..c4c1c42792 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h index 1963fad0a0..88b8020031 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.h +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD From 1723a5950f4f8201bf3f5fa038feb87fcb5a79ff Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 00:11:40 +0000 Subject: [PATCH 33/56] [dynarmic] use small vector experiment Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index bfacdcca52..66c75aead9 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -12,6 +12,7 @@ #include #include +#include "boost/container/small_vector.hpp" #include "dynarmic/common/common_types.h" #include #include @@ -83,7 +84,7 @@ public: void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial - std::vector values; //24 + boost::container::small_vector values; //24 // Block state uint16_t total_uses = 0; //8 //sometimes zeroed @@ -96,7 +97,7 @@ private: uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - alignas(16) uint8_t lru_counter = 0; //1 + uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From 3f62a50b171cd3c6591d2331f7c00ea22bb622aa Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:07 +0000 Subject: [PATCH 34/56] [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 29eab7908b..919e75b77b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.push_back(inst); ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits::max)()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); + max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); } void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 66c75aead9..2bd274730a 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -78,7 +78,7 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } inline size_t GetMaxBitWidth() const noexcept { - return max_bit_width; + return 1 << max_bit_width; } void AddValue(IR::Inst* inst) noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; @@ -94,10 +94,10 @@ private: uint16_t is_being_used_count = 0; //8 uint16_t current_references = 0; //8 // Value state - uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 + uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6) + uint8_t lru_counter : 2 = 0; //1 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From cbad41aeb7f62f417802a096812fa74fcd324a0c Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:22 +0000 Subject: [PATCH 35/56] [dynarmic] fix tests_reader and tests_generator Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 2 +- src/dynarmic/tests/A32/testenv.h | 1 - src/dynarmic/tests/A64/testenv.h | 1 - src/dynarmic/tests/CMakeLists.txt | 28 ++++++++++++++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index ee06ccf19a..d6be793dc6 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -375,7 +375,7 @@ target_link_libraries(dynarmic ) if (BOOST_NO_HEADERS) -target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) + target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) else() target_link_libraries(dynarmic PRIVATE Boost::headers) endif() diff --git a/src/dynarmic/tests/A32/testenv.h b/src/dynarmic/tests/A32/testenv.h index a6df2017ce..72eaafce14 100644 --- a/src/dynarmic/tests/A32/testenv.h +++ b/src/dynarmic/tests/A32/testenv.h @@ -17,7 +17,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A32/a32.h" -#include "../native/testenv.h" template class A32TestEnv : public Dynarmic::A32::UserCallbacks { diff --git a/src/dynarmic/tests/A64/testenv.h b/src/dynarmic/tests/A64/testenv.h index 31e338b138..fcdadb23e6 100644 --- a/src/dynarmic/tests/A64/testenv.h +++ b/src/dynarmic/tests/A64/testenv.h @@ -12,7 +12,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A64/a64.h" -#include "../native/testenv.h" using Vector = Dynarmic::A64::Vector; diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 29a8f23478..f8f420e2d6 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -72,7 +72,12 @@ add_executable(dynarmic_print_info print_info.cpp ) create_target_directory_groups(dynarmic_print_info) -target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_print_info PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_print_info PRIVATE . ../src) target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -88,7 +93,12 @@ add_executable(dynarmic_test_generator create_target_directory_groups(dynarmic_test_generator) -target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_generator PRIVATE . ../src) target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -100,7 +110,12 @@ add_executable(dynarmic_test_reader test_reader.cpp ) create_target_directory_groups(dynarmic_test_reader) -target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_reader PRIVATE . ../src) target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -108,7 +123,12 @@ target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LIT # create_target_directory_groups(dynarmic_tests) -target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) +target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_tests PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_tests PRIVATE . ../src) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) From a21dc51fa686bc9c07b990325586375b7b65ec82 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 09:10:18 +0000 Subject: [PATCH 36/56] [docs] fastmem draft Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 5 +++++ src/dynarmic/docs/Fastmem.svg | 4 ++++ src/dynarmic/docs/HostToGuest.svg | 4 ++++ 3 files changed, 13 insertions(+) create mode 100644 src/dynarmic/docs/FastMemory.md create mode 100644 src/dynarmic/docs/Fastmem.svg create mode 100644 src/dynarmic/docs/HostToGuest.svg diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md new file mode 100644 index 0000000000..2a7c1a2583 --- /dev/null +++ b/src/dynarmic/docs/FastMemory.md @@ -0,0 +1,5 @@ +# Fast memory (Fastmem) + +![Host to guest translation](./HostToGuest.svg) + +![Fastmem translation](./Fastmem.svg) diff --git a/src/dynarmic/docs/Fastmem.svg b/src/dynarmic/docs/Fastmem.svg new file mode 100644 index 0000000000..a3ed0bb68b --- /dev/null +++ b/src/dynarmic/docs/Fastmem.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
SIGSEGV Trap
Fastmem
Only needs to linearly offset from fastmem arena
Less codegen (SIGSEGV traps)
Is fast only if SIGSEGV handlers are sufficiently fast
\ No newline at end of file diff --git a/src/dynarmic/docs/HostToGuest.svg b/src/dynarmic/docs/HostToGuest.svg new file mode 100644 index 0000000000..6a15a44b46 --- /dev/null +++ b/src/dynarmic/docs/HostToGuest.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
Resolver
Host to Guest translation
Looks up correct PTE
Translates each address 
Is slow
\ No newline at end of file From 9a5c139aa3c248d328d502f14c44e91a7390cfa8 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 10:16:22 +0000 Subject: [PATCH 37/56] [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 6 +- .../frontend/decoder/decoder_detail.h | 65 ++++++++----------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index d2035d0fe0..b74817a611 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,12 +210,8 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 438b7b691c..682b0cfbf6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,12 +269,8 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index bd76efda2a..4050033ef5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -70,11 +70,9 @@ struct detail { return std::make_tuple(mask, expect); } - /** - * Generates the masks and shifts for each argument. - * A '-' in a bitstring indicates that we don't care about that value. - * An argument is specified by a continuous string of the same character. - */ + /// @brief Generates the masks and shifts for each argument. + /// A '-' in a bitstring indicates that we don't care about that value. + /// An argument is specified by a continuous string of the same character. template static consteval auto GetArgInfo(std::array bitstring) { std::array masks = {}; @@ -98,7 +96,6 @@ struct detail { if constexpr (N > 0) { const size_t bit_position = opcode_bitsize - i - 1; - if (arg_index >= N) throw std::out_of_range("Unexpected field"); @@ -109,20 +106,16 @@ struct detail { } } } - #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) // Avoids a MSVC ICE, and avoids Android NDK issue. ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); #endif - return std::make_tuple(masks, shifts); } - /** - * This struct's Make member function generates a lambda which decodes an instruction based on - * the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a - * template argument. - */ + /// @brief This struct's Make member function generates a lambda which decodes an instruction + /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is + /// provided as a template argument. template struct VisitorCaller; @@ -130,36 +123,36 @@ struct detail { # pragma warning(push) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) #endif - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...), + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...), const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...) const, + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...) const, const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; @@ -167,22 +160,16 @@ struct detail { # pragma warning(pop) #endif - /** - * Creates a matcher that can match and parse instructions based on bitstring. - * See also: GetMaskAndExpect and GetArgInfo for format of bitstring. - */ - template - static auto GetMatcher(FnT fn, const char* const name) { - constexpr size_t args_count = mcl::parameter_count_v; - + /// @brief Creates a matcher that can match and parse instructions based on bitstring. + /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. + template + static constexpr auto GetMatcher(F fn, const char* const name) { + constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); - - using Iota = std::make_index_sequence; - - const auto proxy_fn = VisitorCaller::Make(Iota(), fn, arg_masks, arg_shifts); + const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); return MatcherT(name, mask, expect, proxy_fn); } }; From d2661e6ba1c2cbe2d9d43b2aaff288ddac7c3a50 Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 4 Sep 2025 07:24:49 +0000 Subject: [PATCH 38/56] [dynarmic] reduce matcher table noise and cache misses Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/a32_types.h | 22 +++++---------- .../src/dynarmic/frontend/A32/decoder/asimd.h | 27 +++++++++---------- .../src/dynarmic/frontend/A32/decoder/vfp.h | 12 +++------ .../src/dynarmic/frontend/A64/decoder/a64.h | 23 +++++++--------- .../frontend/decoder/decoder_detail.h | 6 ++--- .../src/dynarmic/frontend/decoder/matcher.h | 11 ++------ 6 files changed, 38 insertions(+), 63 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index 3f4501a528..2a0cc25751 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -9,12 +9,9 @@ #pragma once #include -#include - #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/ir/cond.h" @@ -89,24 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) { inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); - return static_cast(reg); + return size_t(reg); } inline size_t RegNumber(ExtReg reg) { if (IsSingleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::S0); + return size_t(reg) - size_t(ExtReg::S0); + } else if (IsDoubleExtReg(reg)) { + return size_t(reg) - size_t(ExtReg::D0); } - - if (IsDoubleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::D0); - } - - if (IsQuadExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::Q0); - } - - ASSERT_MSG(false, "Invalid extended register"); - return 0; + ASSERT(IsQuadExtReg(reg)); + return size_t(reg) - size_t(ExtReg::Q0); } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f2e206695b..f63816d2b1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher; template std::vector> GetASIMDDecodeTable() { - std::vector> table = { - -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> table = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" #undef INST - }; - // Exceptions to the rule of thumb. const std::set comes_first{ "VBIC, VMOV, VMVN, VORR (immediate)", @@ -53,19 +50,21 @@ std::vector> GetASIMDDecodeTable() { "VQDMULH (scalar)", "VQRDMULH (scalar)", }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_first.count(matcher.GetName()) > 0; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_last.count(matcher.GetName()) == 0; + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - - return table; + std::vector> final_table; + std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + return e.second; + }); + return final_table; } template diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index 5fcacd2bda..a969570547 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -30,22 +30,18 @@ std::optional>> DecodeVFP(u32 instruc static const struct Tables { Table unconditional; Table conditional; - } tables = [] { + } tables = []() { Table list = { - #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./vfp.inc" #undef INST - }; - - const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return (matcher.GetMask() & 0xF0000000) == 0xF0000000; }); - return Tables{ - Table{list.begin(), division}, - Table{division, list.end()}, + Table{list.begin(), it}, + Table{it, list.end()}, }; }(); diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c651dd7cde..c7bc981ac6 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) { template constexpr DecodeTable GetDecodeTable() { - std::vector> list = { -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" #undef INST }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { return std::set{ "MOVI, MVNI, ORR, BIC (vector, immediate)", "FMOV (vector, immediate)", "Unallocated SIMD modified immediate", - }.count(matcher.GetName()) > 0; + }.count(e.first) > 0; }); - DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); + for (auto const& e : list) { + const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e.second); } } } diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 4050033ef5..0d9da6169c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -163,18 +163,18 @@ struct detail { /// @brief Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template - static constexpr auto GetMatcher(F fn, const char* const name) { + static constexpr auto GetMatcher(F fn) { constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); - return MatcherT(name, mask, expect, proxy_fn); + return MatcherT(mask, expect, proxy_fn); } }; -#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn, name) +#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn) } // namespace detail } // namespace Dynarmic::Decoder diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 7e5c9c5a8f..5a2afe57c5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -31,14 +31,8 @@ public: using visitor_type = Visitor; using handler_return_type = typename Visitor::instruction_return_type; using handler_function = std::function; - - Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) - : name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {} - - /// Gets the name of this type of instruction. - const char* GetName() const { - return name; - } + Matcher(opcode_type mask, opcode_type expected, handler_function func) + : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. opcode_type GetMask() const { @@ -70,7 +64,6 @@ public: } private: - const char* name; opcode_type mask; opcode_type expected; handler_function fn; From 4f2b63d54453dcf1a5c901dc9f3afc494a2e6aae Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 20:59:23 +0000 Subject: [PATCH 39/56] [dynarmic] fix tests Signed-off-by: lizzie --- .../src/dynarmic/frontend/decoder/matcher.h | 40 +++++++----------- src/dynarmic/tests/A32/fuzz_arm.cpp | 1 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 1 + .../tests/A32/test_arm_instructions.cpp | 1 + src/dynarmic/tests/A32/test_coprocessor.cpp | 1 + src/dynarmic/tests/A32/test_svc.cpp | 1 + .../tests/A32/test_thumb_instructions.cpp | 1 + src/dynarmic/tests/A64/a64.cpp | 1 + src/dynarmic/tests/A64/fp_min_max.cpp | 1 + src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 1 + .../tests/A64/misaligned_page_table.cpp | 1 + src/dynarmic/tests/A64/real_world.cpp | 1 + src/dynarmic/tests/A64/test_invalidation.cpp | 1 + src/dynarmic/tests/decoder_tests.cpp | 2 + src/dynarmic/tests/native/preserve_xmm.cpp | 1 + src/dynarmic/tests/print_info.cpp | 42 ++++--------------- 16 files changed, 39 insertions(+), 58 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 5a2afe57c5..f7e2884e0c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -14,16 +14,12 @@ namespace Dynarmic::Decoder { -/** - * Generic instruction handling construct. - * - * @tparam Visitor An arbitrary visitor type that will be passed through - * to the function being handled. This type must be the - * type of the first parameter in a handler function. - * - * @tparam OpcodeType Type representing an opcode. This must be the - * type of the second parameter in a handler function. - */ +/// Generic instruction handling construct. +/// @tparam Visitor An arbitrary visitor type that will be passed through +/// to the function being handled. This type must be the +/// type of the first parameter in a handler function. +/// @tparam OpcodeType Type representing an opcode. This must be the +/// type of the second parameter in a handler function. template class Matcher { public: @@ -35,30 +31,26 @@ public: : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. - opcode_type GetMask() const { + inline opcode_type GetMask() const noexcept { return mask; } /// Gets the expected value after masking for this instruction. - opcode_type GetExpected() const { + inline opcode_type GetExpected() const noexcept { return expected; } - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - bool Matches(opcode_type instruction) const { + /// Tests to see if the given instruction is the instruction this matcher represents. + /// @param instruction The instruction to test + /// @returns true if the given instruction matches. + inline bool Matches(opcode_type instruction) const noexcept { return (instruction & mask) == expected; } - /** - * Calls the corresponding instruction handler on visitor for this type of instruction. - * @param v The visitor to use - * @param instruction The instruction to decode. - */ - handler_return_type call(Visitor& v, opcode_type instruction) const { + /// Calls the corresponding instruction handler on visitor for this type of instruction. + /// @param v The visitor to use + /// @param instruction The instruction to decode. + inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept { ASSERT(Matches(instruction)); return fn(v, instruction); } diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index 087ce54813..5ee6d2bf02 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -24,6 +24,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index ad01e5718b..7f64cb0ccb 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -22,6 +22,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/FPSCR.h" #include "dynarmic/frontend/A32/PSR.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" diff --git a/src/dynarmic/tests/A32/test_arm_instructions.cpp b/src/dynarmic/tests/A32/test_arm_instructions.cpp index 0411877823..c007a18299 100644 --- a/src/dynarmic/tests/A32/test_arm_instructions.cpp +++ b/src/dynarmic/tests/A32/test_arm_instructions.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" diff --git a/src/dynarmic/tests/A32/test_coprocessor.cpp b/src/dynarmic/tests/A32/test_coprocessor.cpp index 98da0e5d34..49cb42bdf3 100644 --- a/src/dynarmic/tests/A32/test_coprocessor.cpp +++ b/src/dynarmic/tests/A32/test_coprocessor.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/tests/A32/test_svc.cpp b/src/dynarmic/tests/A32/test_svc.cpp index 8b55d6537c..998566130e 100644 --- a/src/dynarmic/tests/A32/test_svc.cpp +++ b/src/dynarmic/tests/A32/test_svc.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A32/test_thumb_instructions.cpp b/src/dynarmic/tests/A32/test_thumb_instructions.cpp index 3501d5419f..d509acdd8d 100644 --- a/src/dynarmic/tests/A32/test_thumb_instructions.cpp +++ b/src/dynarmic/tests/A32/test_thumb_instructions.cpp @@ -10,6 +10,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A32/a32.h" static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) { diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index 40eff1f071..24e92d1210 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -7,6 +7,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/A64/fp_min_max.cpp b/src/dynarmic/tests/A64/fp_min_max.cpp index d8b45db807..1669b63071 100644 --- a/src/dynarmic/tests/A64/fp_min_max.cpp +++ b/src/dynarmic/tests/A64/fp_min_max.cpp @@ -12,6 +12,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp index 3322af06e7..45ba728917 100644 --- a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp +++ b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp @@ -19,6 +19,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a64_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A64/misaligned_page_table.cpp b/src/dynarmic/tests/A64/misaligned_page_table.cpp index 8235e14a67..ecba5a3efa 100644 --- a/src/dynarmic/tests/A64/misaligned_page_table.cpp +++ b/src/dynarmic/tests/A64/misaligned_page_table.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_access_via_page_table is set", "[a64]") { diff --git a/src/dynarmic/tests/A64/real_world.cpp b/src/dynarmic/tests/A64/real_world.cpp index 07532d95af..a083f16d61 100644 --- a/src/dynarmic/tests/A64/real_world.cpp +++ b/src/dynarmic/tests/A64/real_world.cpp @@ -5,6 +5,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/test_invalidation.cpp b/src/dynarmic/tests/A64/test_invalidation.cpp index 168043c1cb..8a63776fa4 100644 --- a/src/dynarmic/tests/A64/test_invalidation.cpp +++ b/src/dynarmic/tests/A64/test_invalidation.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index e545309960..777ea8b510 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,6 +20,7 @@ using namespace Dynarmic; +/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -76,3 +77,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { } while (x != 0); } } +*/ diff --git a/src/dynarmic/tests/native/preserve_xmm.cpp b/src/dynarmic/tests/native/preserve_xmm.cpp index 0f69697b7a..7421252063 100644 --- a/src/dynarmic/tests/native/preserve_xmm.cpp +++ b/src/dynarmic/tests/native/preserve_xmm.cpp @@ -6,6 +6,7 @@ #include #include "../A64/testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 3263ca729a..0c89e780ad 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -32,6 +32,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/frontend/A64/translate/impl/impl.h" #include "dynarmic/interface/A32/a32.h" +#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/opt_passes.h" @@ -39,20 +40,20 @@ using namespace Dynarmic; const char* GetNameOfA32Instruction(u32 instruction) { - if (auto vfp_decoder = A32::DecodeVFP(instruction)) { + /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { return vfp_decoder->get().GetName(); } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { return asimd_decoder->get().GetName(); } else if (auto decoder = A32::DecodeArm(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } const char* GetNameOfA64Instruction(u32 instruction) { - if (auto decoder = A64::Decode(instruction)) { + /*if (auto decoder = A64::Decode(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } @@ -64,18 +65,9 @@ void PrintA32Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -88,18 +80,9 @@ void PrintA64Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A64::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A64::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -115,18 +98,9 @@ void PrintThumbInstruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } From 7c547daca5391f338d08c82493ab607c07fd57d9 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:07:18 +0000 Subject: [PATCH 40/56] [dynarmic] fix ASIMD execution Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f63816d2b1..2cea7a14c1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -61,7 +61,7 @@ std::vector> GetASIMDDecodeTable() { return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); std::vector> final_table; - std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) { return e.second; }); return final_table; From 683c574c332116b8cdf3e31296790c992546f245 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:40:13 +0000 Subject: [PATCH 41/56] [dynarmic] add back encoding names (for print_info) Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/decoder/arm.h | 20 ++++++++++--- .../src/dynarmic/frontend/A32/decoder/asimd.h | 26 ++++++++++++----- .../dynarmic/frontend/A32/decoder/thumb16.h | 24 +++++++++++----- .../dynarmic/frontend/A32/decoder/thumb32.h | 24 +++++++++++----- .../src/dynarmic/frontend/A32/decoder/vfp.h | 24 +++++++++++----- .../A32/translate/translate_thumb.cpp | 2 +- .../src/dynarmic/frontend/A64/decoder/a64.h | 20 ++++++++++--- src/dynarmic/tests/decoder_tests.cpp | 28 ++++++------------- src/dynarmic/tests/print_info.cpp | 22 +++++++-------- 9 files changed, 122 insertions(+), 68 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index 0257c28ddb..c6f034ae21 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -30,13 +30,13 @@ template using ArmDecodeTable = std::array>, 0x1000>; namespace detail { -inline size_t ToFastLookupIndexArm(u32 instruction) { +inline size_t ToFastLookupIndexArm(u32 instruction) noexcept { return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0); } } // namespace detail template -constexpr ArmDecodeTable GetArmDecodeTable() { +constexpr ArmDecodeTable GetArmDecodeTable() noexcept { std::vector> list = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" @@ -62,15 +62,27 @@ constexpr ArmDecodeTable GetArmDecodeTable() { } template -std::optional>> DecodeArm(u32 instruction) { +std::optional>> DecodeArm(u32 instruction) noexcept { alignas(64) static const auto table = GetArmDecodeTable(); const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameARM(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./arm.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index 2cea7a14c1..57e1392871 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -26,7 +26,7 @@ template using ASIMDMatcher = Decoder::Matcher; template -std::vector> GetASIMDDecodeTable() { +std::vector> GetASIMDDecodeTable() noexcept { std::vector>> table = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" @@ -68,13 +68,25 @@ std::vector> GetASIMDDecodeTable() { } template -std::optional>> DecodeASIMD(u32 instruction) { - static const auto table = GetASIMDDecodeTable(); - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); +std::optional>> DecodeASIMD(u32 instruction) noexcept { + alignas(64) static const auto table = GetASIMDDecodeTable(); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameASIMD(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./asimd.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h index 8073ee5d47..16b99ba5aa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h @@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher; template std::optional>> DecodeThumb16(u16 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #include "./thumb16.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb16(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) }, +#include "./thumb16.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h index 86a4d767a7..19418de67c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h @@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher; template std::optional>> DecodeThumb32(u32 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./thumb32.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb32(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./thumb32.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index a969570547..a346304a9a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -26,8 +26,7 @@ using VFPMatcher = Decoder::Matcher; template std::optional>> DecodeVFP(u32 instruction) { using Table = std::vector>; - - static const struct Tables { + alignas(64) static const struct Tables { Table unconditional; Table conditional; } tables = []() { @@ -44,14 +43,25 @@ std::optional>> DecodeVFP(u32 instruc Table{it, list.end()}, }; }(); - const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000; const Table& table = is_unconditional ? tables.unconditional : tables.conditional; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameVFP(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./vfp.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp index 5bb516ccfd..0381c984cc 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp @@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) { return 0xF7F0A000; // UDF } -bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) { +inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept { return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c7bc981ac6..7e6cdc3935 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -71,12 +71,24 @@ constexpr DecodeTable GetDecodeTable() { template std::optional>> Decode(u32 instruction) { alignas(64) static const auto table = GetDecodeTable(); - const auto matches_instruction = [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }; const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; - auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); + auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetName(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./a64.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A64 diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index 777ea8b510..4ad9d90833 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,7 +20,6 @@ using namespace Dynarmic; -/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -37,22 +36,12 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto is_decode_error = [&get_ir](const A32::ASIMDMatcher& matcher, u32 instruction) { const auto block = get_ir(matcher, instruction); - - for (const auto& ir_inst : block) { - if (ir_inst.GetOpcode() == IR::Opcode::A32ExceptionRaised) { - if (static_cast(ir_inst.GetArg(1).GetU64()) == A32::Exception::DecodeError) { - return true; - } - } - } - return false; + return std::find_if(block.cbegin(), block.cend(), [](auto const& e) { + return e.GetOpcode() == IR::Opcode::A32ExceptionRaised && A32::Exception(e.GetArg(1).GetU64()) == A32::Exception::DecodeError; + }) != block.cend(); }; for (auto iter = table.cbegin(); iter != table.cend(); ++iter) { - if (std::strncmp(iter->GetName(), "UNALLOCATED", 11) == 0) { - continue; - } - const u32 expect = iter->GetExpected(); const u32 mask = iter->GetMask(); u32 x = 0; @@ -60,15 +49,17 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const u32 instruction = expect | x; const bool iserr = is_decode_error(*iter, instruction); - const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { return m.Matches(instruction); }); + const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { + return m.Matches(instruction); + }); const bool altiserr = is_decode_error(*alternative, instruction); INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction); INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect); INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x); - INFO("Name: " << iter->GetName()); + INFO("Name: " << *A32::GetNameASIMD(instruction)); INFO("iserr: " << iserr); - INFO("alternative: " << alternative->GetName()); + //INFO("alternative: " << alternative->GetName()); INFO("altiserr: " << altiserr); REQUIRE(((!iserr && alternative == iter) || (iserr && alternative != iter && !altiserr))); @@ -76,5 +67,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { x = ((x | mask) + 1) & ~mask; } while (x != 0); } -} -*/ +} \ No newline at end of file diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 0c89e780ad..8936f32bd3 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -39,21 +39,19 @@ using namespace Dynarmic; -const char* GetNameOfA32Instruction(u32 instruction) { - /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { - return vfp_decoder->get().GetName(); - } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { - return asimd_decoder->get().GetName(); - } else if (auto decoder = A32::DecodeArm(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA32Instruction(u32 instruction) { + if (auto const vfp_decoder = A32::DecodeVFP(instruction)) + return *A32::GetNameVFP(instruction); + else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) + return *A32::GetNameASIMD(instruction); + else if (auto const decoder = A32::DecodeArm(instruction)) + return *A32::GetNameARM(instruction); return ""; } -const char* GetNameOfA64Instruction(u32 instruction) { - /*if (auto decoder = A64::Decode(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA64Instruction(u32 instruction) { + if (auto const decoder = A64::Decode(instruction)) + return *A64::GetName(instruction); return ""; } From 57e9f4a32ddc8aa80cc1fe808824e822c0858090 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 14:08:17 +0000 Subject: [PATCH 42/56] [dynarmic] use ARCHITECTURE_ macros instead of MCL ones Signed-off-by: lizzie --- .../src/dynarmic/backend/exception_handler.h | 18 +- .../backend/exception_handler_generic.cpp | 6 +- .../backend/exception_handler_macos.cpp | 12 +- .../backend/exception_handler_macos_mig.c | 4 +- .../backend/exception_handler_posix.cpp | 184 ++++++++---------- .../backend/exception_handler_windows.cpp | 4 +- src/dynarmic/src/dynarmic/common/context.h | 120 ++++++++++++ 7 files changed, 220 insertions(+), 128 deletions(-) create mode 100644 src/dynarmic/src/dynarmic/common/context.h diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler.h b/src/dynarmic/src/dynarmic/backend/exception_handler.h index 173949628c..cd274b111f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler.h +++ b/src/dynarmic/src/dynarmic/backend/exception_handler.h @@ -15,15 +15,15 @@ #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) namespace Dynarmic::Backend::X64 { class BlockOfCode; } // namespace Dynarmic::Backend::X64 -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) namespace oaknut { class CodeBlock; } // namespace oaknut -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) namespace Dynarmic::Backend::RV64 { class CodeBlock; } // namespace Dynarmic::Backend::RV64 @@ -33,16 +33,16 @@ class CodeBlock; namespace Dynarmic::Backend { -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) struct FakeCall { u64 call_rip; u64 ret_rip; }; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) struct FakeCall { u64 call_pc; }; -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) struct FakeCall { }; #else @@ -54,11 +54,11 @@ public: ExceptionHandler(); ~ExceptionHandler(); -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void Register(X64::BlockOfCode& code); -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void Register(oaknut::CodeBlock& mem, std::size_t mem_size); -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void Register(RV64::CodeBlock& mem, std::size_t mem_size); #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp index ad7df25ca6..985536d9f0 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp @@ -13,15 +13,15 @@ struct ExceptionHandler::Impl final { ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode&) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock&, std::size_t) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock&, std::size_t) { // Do nothing } diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp index 52bcf5972f..76e517f05b 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp @@ -25,7 +25,7 @@ #include "dynarmic/backend/exception_handler.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" # define mig_external extern "C" @@ -36,7 +36,7 @@ using dynarmic_thread_state_t = x86_thread_state64_t; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include # define mig_external extern "C" @@ -133,7 +133,7 @@ void MachHandler::MessagePump() { } } -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -151,7 +151,7 @@ kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { return KERN_SUCCESS; } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) kern_return_t MachHandler::HandleRequest(arm_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -269,13 +269,13 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { const u64 code_begin = mcl::bit_cast(code.getCode()); const u64 code_end = code_begin + code.GetTotalCodeSize(); impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c index 762a80ca42..25678ab115 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/mig/mach_exc_server.c" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/arm64/mig/mach_exc_server.c" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index d0653eceab..3f0643c087 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -7,36 +7,20 @@ */ #include -#include -#include #include -#include #include -#include -#ifdef __APPLE__ -# include -# include -#else -# include -# ifndef __OpenBSD__ -# include -# endif -# ifdef __sun__ -# include -# endif -#endif - -#include - #include "dynarmic/backend/exception_handler.h" +#include "dynarmic/common/assert.h" +#include "dynarmic/common/context.h" +#include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) -# include "dynarmic/backend/x64/block_of_code.h" -#elif defined(MCL_ARCHITECTURE_ARM64) -# include +#if defined(ARCHITECTURE_x86_64) +# include "dynarmic/backend/x64/block_of_code.h" +#elif defined(ARCHITECTURE_arm64) +# include # include "dynarmic/backend/arm64/abi.h" -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) # include "dynarmic/backend/riscv64/code_block.h" #else # error "Invalid architecture" @@ -127,41 +111,68 @@ void RegisterHandler() { } } -void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { - DEBUG_ASSERT(sig == SIGSEGV || sig == SIGBUS); -#ifndef MCL_ARCHITECTURE_RISCV - ucontext_t* ucontext = reinterpret_cast(raw_context); -#ifndef __OpenBSD__ - auto& mctx = ucontext->uc_mcontext; -#endif -#endif +SigHandler::SigHandler() { + const size_t signal_stack_size = std::max(SIGSTKSZ, 2 * 1024 * 1024); -#if defined(MCL_ARCHITECTURE_X86_64) -# if defined(__APPLE__) -# define CTX_RIP (mctx->__ss.__rip) -# define CTX_RSP (mctx->__ss.__rsp) -# elif defined(__linux__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# elif defined(__FreeBSD__) -# define CTX_RIP (mctx.mc_rip) -# define CTX_RSP (mctx.mc_rsp) -# elif defined(__NetBSD__) -# define CTX_RIP (mctx.__gregs[_REG_RIP]) -# define CTX_RSP (mctx.__gregs[_REG_RSP]) -# elif defined(__OpenBSD__) -# define CTX_RIP (ucontext->sc_rip) -# define CTX_RSP (ucontext->sc_rsp) -# elif defined(__sun__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# else -# error "Unknown platform" -# endif + signal_stack_memory = std::malloc(signal_stack_size); + + stack_t signal_stack; + signal_stack.ss_sp = signal_stack_memory; + signal_stack.ss_size = signal_stack_size; + signal_stack.ss_flags = 0; + if (sigaltstack(&signal_stack, nullptr) != 0) { + fmt::print(stderr, "dynarmic: POSIX SigHandler: init failure at sigaltstack\n"); + supports_fast_mem = false; + return; + } + + struct sigaction sa; + sa.sa_handler = nullptr; + sa.sa_sigaction = &SigHandler::SigAction; + sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, &old_sa_segv) != 0) { + fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGSEGV handler\n"); + supports_fast_mem = false; + return; + } +#ifdef __APPLE__ + if (sigaction(SIGBUS, &sa, &old_sa_bus) != 0) { + fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGBUS handler\n"); + supports_fast_mem = false; + return; + } +#endif +} + +SigHandler::~SigHandler() { + std::free(signal_stack_memory); +} + +void SigHandler::AddCodeBlock(CodeBlockInfo cbi) { + std::lock_guard guard(code_block_infos_mutex); + if (auto const iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) + code_block_infos.erase(iter); + code_block_infos.push_back(cbi); +} + +void SigHandler::RemoveCodeBlock(u64 host_pc) { + std::lock_guard guard(code_block_infos_mutex); + const auto iter = FindCodeBlockInfo(host_pc); + if (iter != code_block_infos.end()) + code_block_infos.erase(iter); +} + +void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { + ASSERT(sig == SIGSEGV || sig == SIGBUS); + CTX_DECLARE(raw_context); +#if defined(ARCHITECTURE_x86_64) { - std::shared_lock guard(sig_handler->code_block_infos_mutex); - if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->second.cb(CTX_RIP); + std::lock_guard guard(sig_handler->code_block_infos_mutex); + + const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); + if (iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->cb(CTX_RIP); CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; @@ -169,58 +180,19 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { } } fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); -#elif defined(MCL_ARCHITECTURE_ARM64) -# if defined(__APPLE__) -# define CTX_PC (mctx->__ss.__pc) -# define CTX_SP (mctx->__ss.__sp) -# define CTX_LR (mctx->__ss.__lr) -# define CTX_X(i) (mctx->__ss.__x[i]) -# define CTX_Q(i) (mctx->__ns.__v[i]) -# elif defined(__linux__) -# define CTX_PC (mctx.pc) -# define CTX_SP (mctx.sp) -# define CTX_LR (mctx.regs[30]) -# define CTX_X(i) (mctx.regs[i]) -# define CTX_Q(i) (fpctx->vregs[i]) - [[maybe_unused]] const auto fpctx = [&mctx] { - _aarch64_ctx* header = (_aarch64_ctx*)&mctx.__reserved; - while (header->magic != FPSIMD_MAGIC) { - ASSERT(header->magic && header->size); - header = (_aarch64_ctx*)((char*)header + header->size); - } - return (fpsimd_context*)header; - }(); -# elif defined(__FreeBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__NetBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__OpenBSD__) -# define CTX_PC (ucontext->sc_elr) -# define CTX_SP (ucontext->sc_sp) -# define CTX_LR (ucontext->sc_lr) -# define CTX_X(i) (ucontext->sc_x[i]) -# define CTX_Q(i) (ucontext->sc_q[i]) -# else -# error "Unknown platform" -# endif +#elif defined(ARCHITECTURE_arm64) + CTX_DECLARE(raw_context); { - std::shared_lock guard(sig_handler->code_block_infos_mutex); - if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->second.cb(CTX_PC); + std::lock_guard guard(sig_handler->code_block_infos_mutex); + const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); + if (iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->cb(CTX_PC); CTX_PC = fc.call_pc; return; } } fmt::print(stderr, "Unhandled {} at pc {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_PC); -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) ASSERT_FALSE("Unimplemented"); #else # error "Invalid architecture" @@ -269,15 +241,15 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { impl = std::make_unique(mcl::bit_cast(code.getCode()), code.GetTotalCodeSize()); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp index 719c007594..9e76cae912 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/exception_handler_windows.cpp" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/exception_handler_generic.cpp" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/common/context.h b/src/dynarmic/src/dynarmic/common/context.h new file mode 100644 index 0000000000..0eb128449c --- /dev/null +++ b/src/dynarmic/src/dynarmic/common/context.h @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#ifdef __APPLE__ +# include +# include +#else +# include +# ifndef __OpenBSD__ +# include +# endif +# ifdef __sun__ +# include +# endif +# ifdef __linux__ +# include +# endif +#endif + +#ifdef ARCHITECTURE_x86_64 +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; +# endif +#elif defined(ARCHITECTURE_arm64) +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; \ + [[maybe_unused]] const auto fpctx = GetFloatingPointState(mctx); +# endif +#endif + +#if defined(ARCHITECTURE_x86_64) +# if defined(__APPLE__) +# define CTX_RIP (mctx->__ss.__rip) +# define CTX_RSP (mctx->__ss.__rsp) +# elif defined(__linux__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# elif defined(__FreeBSD__) +# define CTX_RIP (mctx.mc_rip) +# define CTX_RSP (mctx.mc_rsp) +# elif defined(__NetBSD__) +# define CTX_RIP (mctx.__gregs[_REG_RIP]) +# define CTX_RSP (mctx.__gregs[_REG_RSP]) +# elif defined(__OpenBSD__) +# define CTX_RIP (ucontext->sc_rip) +# define CTX_RSP (ucontext->sc_rsp) +# elif defined(__sun__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# else +# error "Unknown platform" +# endif +#elif defined(ARCHITECTURE_arm64) +# if defined(__APPLE__) +# define CTX_PC (mctx->__ss.__pc) +# define CTX_SP (mctx->__ss.__sp) +# define CTX_LR (mctx->__ss.__lr) +# define CTX_PSTATE (mctx->__ss.__cpsr) +# define CTX_X(i) (mctx->__ss.__x[i]) +# define CTX_Q(i) (mctx->__ns.__v[i]) +# define CTX_FPSR (mctx->__ns.__fpsr) +# define CTX_FPCR (mctx->__ns.__fpcr) +# elif defined(__linux__) +# define CTX_PC (mctx.pc) +# define CTX_SP (mctx.sp) +# define CTX_LR (mctx.regs[30]) +# define CTX_PSTATE (mctx.pstate) +# define CTX_X(i) (mctx.regs[i]) +# define CTX_Q(i) (fpctx->vregs[i]) +# define CTX_FPSR (fpctx->fpsr) +# define CTX_FPCR (fpctx->fpcr) +# elif defined(__FreeBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__NetBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__OpenBSD__) +# define CTX_PC (ucontext->sc_elr) +# define CTX_SP (ucontext->sc_sp) +# define CTX_LR (ucontext->sc_lr) +# define CTX_X(i) (ucontext->sc_x[i]) +# define CTX_Q(i) (ucontext->sc_q[i]) +# else +# error "Unknown platform" +# endif +#else +# error "unimplemented" +#endif + +#ifdef ARCHITECTURE_arm64 +#ifdef __APPLE__ +inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) { + return &(host_ctx->__ns); +} +#elif defined(__linux__) +inline fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { + _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved); + while (header->magic != FPSIMD_MAGIC) + header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast(header) + header->size); + return reinterpret_cast(header); +} +#endif +#endif From 4d1bee40b46b9ee0c86e1c7c56d0d60e94b270b3 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 17:03:36 +0000 Subject: [PATCH 43/56] [dynarmic] fix android Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index 3f0643c087..dea7a0c4b0 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -181,7 +181,6 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { } fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) - CTX_DECLARE(raw_context); { std::lock_guard guard(sig_handler->code_block_infos_mutex); const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); From a2471f0defed09cebb28596a38decef980e968f7 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 02:47:04 +0000 Subject: [PATCH 44/56] [dynarmic, cmake][ remove unusd frontends var Signed-off-by: lizzie --- src/dynarmic/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 38457deb50..5fbb03b80e 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -39,9 +39,6 @@ option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF) option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF) option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT}) option(DYNARMIC_ENABLE_LTO "Enable LTO" OFF) -if (NOT DEFINED DYNARMIC_FRONTENDS) - set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable") -endif() # Default to a Release build if (NOT CMAKE_BUILD_TYPE) From ea842bb33484afc3df4c4e7a2546c9699bad497a Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 03:36:35 +0000 Subject: [PATCH 45/56] [dynarmic, docs] fastmem docs Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md index 2a7c1a2583..c4f57996ba 100644 --- a/src/dynarmic/docs/FastMemory.md +++ b/src/dynarmic/docs/FastMemory.md @@ -1,5 +1,19 @@ # Fast memory (Fastmem) +The main way of accessing memory in JITed programs is via an invoked function, say "Read()" and "Write()". On our translator, such functions usually take a sizable amounts of code space (push + call + pop). Trash the i-cache (due to an indirect call) and overall make code emission more bloated. + +The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput. + +Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. + ![Host to guest translation](./HostToGuest.svg) ![Fastmem translation](./Fastmem.svg) + +In x86_64 for example, when a page fault occurs, the CPU will transmit via control registers and the stack (see `IRETQ`) the appropriate arguments for a page fault handler, the OS then will transform that into something that can be sent into userspace. + +Most modern OSes implement kernel-page-table-isolation, which means a set of system calls will invoke a context switch (not often used syscalls), whereas others are handled by the same process address space (the smaller kernel portion, often used syscalls) without needing a context switch. This effect can be negated on systems with PCID (up to 4096 unique IDs). + +Signal dispatching takes a performance hit from reloading `%cr3` - but Linux does something more clever to avoid reloads: VDSO will take care of the entire thing in the same address space. Making dispatching as costly as an indirect call - without the hazards of increased code size. + +The main downside from this is the constant i-cache trashing and pipeline hazards introduced by the VDSO signal handlers. However on most benchmarks fastmem does perform faster than without (Linux only). This also abuses the fact of continous address space emulation by using an arena - which can then be potentially transparently mapped into a hugepage, reducing TLB walk times. From bcd24f35459f048eb0bb57fe19e57e39259d933f Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:29 +0000 Subject: [PATCH 46/56] [dynarmic] remove use of mcl reverse iterator Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/abi.cpp | 8 ++- src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 65 ++++++++----------- 3 files changed, 33 insertions(+), 42 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1e673338a8..1691bbb3b7 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -764,7 +764,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { target_code_ptr = code.GetReturnFromRunCodeAddress(); } const CodePtr patch_location = code.getCurr(); - code.mov(code.rcx, reinterpret_cast(target_code_ptr)); + code.mov(code.rcx, u64(target_code_ptr)); code.EnsurePatchLocationSize(patch_location, 10); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp index a9bbab3d10..299bf1d1d6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp @@ -10,7 +10,6 @@ #include -#include #include "dynarmic/common/common_types.h" #include @@ -76,7 +75,8 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size); size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE); - for (auto const xmm : mcl::iterator::reverse(regs)) { + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const xmm = *it; if (HostLocIsXMM(xmm)) { xmm_offset -= XMM_SIZE; if (code.HasHostFeature(HostFeature::AVX)) { @@ -88,9 +88,11 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, } if (frame_info.stack_subtraction != 0) code.add(rsp, u32(frame_info.stack_subtraction)); - for (auto const gpr : mcl::iterator::reverse(regs)) + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const gpr = *it; if (HostLocIsGPR(gpr)) code.pop(HostLocToReg64(gpr)); + } } void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) { diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 383b915839..88fc34fa10 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -28,7 +28,6 @@ #include "dynarmic/ir/type.h" #include "mcl/bit/swap.hpp" #include "mcl/bit/rotate.hpp" -#include "mcl/iterator/reverse.hpp" namespace Dynarmic::Optimization { @@ -36,50 +35,42 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { for (auto& inst : block) { switch (inst.GetOpcode()) { case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } @@ -1205,11 +1196,9 @@ static void ConstantPropagation(IR::Block& block) { static void DeadCodeElimination(IR::Block& block) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } + for (auto it = block.rbegin(); it != block.rend(); ++it) + if (!it->HasUses() && !MayHaveSideEffects(it->GetOpcode())) + it->Invalidate(); } static void IdentityRemovalPass(IR::Block& block) { From 560b93a8de9f3a400f11d91a6971c72117570c5c Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:41 +0000 Subject: [PATCH 47/56] [dynarmic] checked code alignment Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_interface.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index c65b582982..33acc46dfa 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -80,16 +80,16 @@ public: }; // TODO: Check code alignment - - const CodePtr current_code_ptr = [this] { + const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15)); + const CodePtr current_code_ptr = [this, aligned_code_ptr] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { jit_state.rsb_ptr = new_rsb_ptr; - return reinterpret_cast(jit_state.rsb_codeptrs[new_rsb_ptr]); + return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]); } - - return GetCurrentBlock(); + return aligned_code_ptr; + //return GetCurrentBlock(); }(); const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr); From b47cbefa11f93e6338028d39e56045552173bd28 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:31:12 +0000 Subject: [PATCH 48/56] [dynarmic] fix hardcoded AVX512 registers, use xmm0 instead of xmm16 to align with spec Signed-off-by: lizzie --- .../backend/x64/emit_x64_floating_point.cpp | 7 ++----- .../src/dynarmic/backend/x64/emit_x64_vector.cpp | 16 ++++++++-------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 47e51acb03..c7d1f8aa1c 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -92,13 +92,10 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t FpFixup::Norm_Src, FpFixup::Norm_Src, FpFixup::Norm_Src); - - const Xbyak::Xmm tmp = xmm16; + const Xbyak::Xmm tmp = xmm0; FCODE(vmovap)(tmp, code.BConst(xword, denormal_to_zero)); - - for (const Xbyak::Xmm& xmm : to_daz) { + for (const Xbyak::Xmm& xmm : to_daz) FCODE(vfixupimms)(xmm, xmm, tmp, u8(0)); - } return; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index 99000c2a57..c6b0e3b864 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vpxorq(right_shift, right_shift, right_shift); @@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm c = xmm16; + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); code.vpsraq(c, a, 32); code.vpsllq(a, a, 32); code.vpsraq(a, a, 32); @@ -5461,7 +5461,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm masked = xmm16; + const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(); code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); From 37386890d81338e1610ad89f47623399e639d4ad Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 10:33:12 +0000 Subject: [PATCH 49/56] [dynarmic] unconditional branches always take Signed-off-by: lizzie --- src/dynarmic/docs/Design.md | 49 +++++++++++++------ src/dynarmic/docs/RegisterAllocator.md | 3 ++ .../dynarmic/backend/x64/block_of_code.cpp | 4 +- .../src/dynarmic/backend/x64/reg_alloc.cpp | 7 +-- .../A64/translate/impl/a64_branch.cpp | 9 ++-- 5 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/dynarmic/docs/Design.md b/src/dynarmic/docs/Design.md index 3c0deb5972..ffa8ccecdb 100644 --- a/src/dynarmic/docs/Design.md +++ b/src/dynarmic/docs/Design.md @@ -273,52 +273,73 @@ Exclusive OR (i.e.: XOR) ### Callback: {Read,Write}Memory{8,16,32,64} - ReadMemory8( vaddr) - ReadMemory16( vaddr) - ReadMemory32( vaddr) - ReadMemory64( vaddr) - WriteMemory8( vaddr, value_to_store) - WriteMemory16( vaddr, value_to_store) - WriteMemory32( vaddr, value_to_store) - WriteMemory64( vaddr, value_to_store) +```c++ + ReadMemory8( vaddr) + ReadMemory16( vaddr) + ReadMemory32( vaddr) + ReadMemory64( vaddr) + WriteMemory8( vaddr, value_to_store) + WriteMemory16( vaddr, value_to_store) + WriteMemory32( vaddr, value_to_store) + WriteMemory64( vaddr, value_to_store) +``` Memory access. ### Terminal: Interpret - SetTerm(IR::Term::Interpret{next}) +```c++ +SetTerm(IR::Term::Interpret{next}) +``` This terminal instruction calls the interpreter, starting at `next`. The interpreter must interpret exactly one instruction. ### Terminal: ReturnToDispatch - SetTerm(IR::Term::ReturnToDispatch{}) +```c++ +SetTerm(IR::Term::ReturnToDispatch{}) +``` This terminal instruction returns control to the dispatcher. The dispatcher will use the value in R15 to determine what comes next. ### Terminal: LinkBlock - SetTerm(IR::Term::LinkBlock{next}) +```c++ +SetTerm(IR::Term::LinkBlock{next}) +``` This terminal instruction jumps to the basic block described by `next` if we have enough cycles remaining. If we do not have enough cycles remaining, we return to the dispatcher, which will return control to the host. +### Terminal: LinkBlockFast + +```c++ +SetTerm(IR::Term::LinkBlockFast{next}) +``` + +This terminal instruction jumps to the basic block described by `next` unconditionally. +This promises guarantees that must be held at runtime - i.e that the program wont hang, + ### Terminal: PopRSBHint - SetTerm(IR::Term::PopRSBHint{}) +```c++ +SetTerm(IR::Term::PopRSBHint{}) +``` This terminal instruction checks the top of the Return Stack Buffer against R15. If RSB lookup fails, control is returned to the dispatcher. This is an optimization for faster function calls. A backend that doesn't support this optimization or doesn't have a RSB may choose to implement this exactly as -ReturnToDispatch. +`ReturnToDispatch`. ### Terminal: If - SetTerm(IR::Term::If{cond, term_then, term_else}) +```c++ +SetTerm(IR::Term::If{cond, term_then, term_else}) +``` This terminal instruction conditionally executes one terminal or another depending on the run-time state of the ARM flags. diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index 5a7210c791..f5bbaaf168 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -19,6 +19,9 @@ The member functions on `RegAlloc` are just a combination of the above concepts. The following registers are reserved for internal use and should NOT participate in register allocation: - `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. - `%rsp`: Stack pointer. +- `%r15`: JIT pointer +- `%r14`: Page table pointer. +- `%r13`: Fastmem pointer. The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index d5d5f089ff..31b10ec6d5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); - lock(); - or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); + lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); SwitchMxcsrOnEntry(); jmp(ABI_PARAM2); @@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function rcp) { } xor_(eax, eax); - lock(); xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 919e75b77b..502a093d08 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -9,6 +9,7 @@ #include "dynarmic/backend/x64/reg_alloc.h" #include +#include #include #include @@ -152,19 +153,19 @@ bool Argument::GetImmediateU1() const noexcept { u8 Argument::GetImmediateU8() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100); + ASSERT(imm <= u64(std::numeric_limits::max())); return u8(imm); } u16 Argument::GetImmediateU16() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x10000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u16(imm); } u32 Argument::GetImmediateU32() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100000000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u32(imm); } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp index faf0686231..1bb0be823a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp @@ -20,9 +20,12 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) { bool TranslatorVisitor::B_uncond(Imm<26> imm26) { const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend(); const u64 target = ir.PC() + offset; - - //ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); - ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + // Pattern to halt execution (B .) + if (target == ir.PC()) { + ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + return false; + } + ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); return false; } From 3b95c20a2a51c16319d79992c4ded4c79331236e Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 11:04:34 +0000 Subject: [PATCH 50/56] [dynarmic] Implement constant folding for CountLeadingZeros, add readXX constnat folding for A64 Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 48 +++++++++++++++------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 88fc34fa10..750a8a496b 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -31,10 +31,11 @@ namespace Dynarmic::Optimization { -static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { +static void ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { for (auto& inst : block) { switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { + case IR::Opcode::A32ReadMemory8: + case IR::Opcode::A64ReadMemory8: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -44,7 +45,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory16: { + case IR::Opcode::A32ReadMemory16: + case IR::Opcode::A64ReadMemory16: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -54,7 +56,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory32: { + case IR::Opcode::A32ReadMemory32: + case IR::Opcode::A64ReadMemory32: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -64,7 +67,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory64: { + case IR::Opcode::A32ReadMemory64: + case IR::Opcode::A64ReadMemory64: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -667,14 +671,14 @@ using Op = Dynarmic::IR::Opcode; // bit size all over the place within folding functions. static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + inst.ReplaceUsesWith(IR::Value{u32(value)}); } else { inst.ReplaceUsesWith(IR::Value{value}); } } static IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; + return is_32_bit ? IR::Value{u32(value)} : IR::Value{value}; } template @@ -800,6 +804,23 @@ static void FoldByteReverse(IR::Inst& inst, Op op) { } } +/// Folds leading zero population count +/// +/// 1. imm -> countl_zero(imm) +/// +static void FoldCountLeadingZeros(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + if (operand.IsImmediate()) { + if (is_32_bit) { + const u32 result = std::countl_zero(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = std::countl_zero(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } + } +} + /// Folds division operations based on the following: /// /// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) @@ -1020,8 +1041,7 @@ static void FoldZeroExtendXToLong(IR::Inst& inst) { static void ConstantPropagation(IR::Block& block) { for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - + auto const opcode = inst.GetOpcode(); switch (opcode) { case Op::LeastSignificantWord: FoldLeastSignificantWord(inst); @@ -1110,12 +1130,12 @@ static void ConstantPropagation(IR::Block& block) { break; case Op::ArithmeticShiftRightMasked32: if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + ReplaceUsesWith(inst, true, s32(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); } break; case Op::ArithmeticShiftRightMasked64: if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + ReplaceUsesWith(inst, false, s64(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); } break; case Op::RotateRightMasked32: @@ -1187,6 +1207,10 @@ static void ConstantPropagation(IR::Block& block) { case Op::ByteReverseDual: FoldByteReverse(inst, opcode); break; + case Op::CountLeadingZeros32: + case Op::CountLeadingZeros64: + FoldCountLeadingZeros(inst, opcode == Op::CountLeadingZeros32); + break; default: break; } @@ -1462,7 +1486,7 @@ void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization: Optimization::DeadCodeElimination(block); } if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { - Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantMemoryReads(block, conf.callbacks); Optimization::ConstantPropagation(block); Optimization::DeadCodeElimination(block); } From 22ec37db75b8d1cddaf6d2d0b2ee9480fc447c08 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 14:46:35 +0000 Subject: [PATCH 51/56] [dynarmic] regalloc use scratchimpl that uses all instead of iteraiting Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 4 +-- .../src/dynarmic/backend/x64/reg_alloc.cpp | 30 ++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1691bbb3b7..fa1f071fbf 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { auto const opcode = inst.GetOpcode(); // Call the relevant Emit* member function. switch (opcode) { -#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch; +#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch; #define A32OPC(name, type, ...) -#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch; +#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch; #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 502a093d08..2db817a90f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -367,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def, if (result_def) { DefineValueImpl(result_def, ABI_RETURN); } - + for (size_t i = 0; i < args.size(); i++) { + if (args[i]) { + UseScratch(*args[i], args_hostloc[i]); + } else { + ScratchGpr(args_hostloc[i]); // TODO: Force spill + } + } + // Must match with with ScratchImpl + for (auto const gpr : other_caller_save) { + MoveOutOfTheWay(gpr); + LocInfo(gpr).WriteLock(); + } for (size_t i = 0; i < args.size(); i++) { if (args[i] && !args[i]->get().IsVoid()) { - UseScratch(*args[i], args_hostloc[i]); // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]); switch (args[i]->get().GetType()) { @@ -390,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def, } } } - - for (size_t i = 0; i < args.size(); i++) - if (!args[i]) { - // TODO: Force spill - ScratchGpr(args_hostloc[i]); - } - for (auto const caller_saved : other_caller_save) - ScratchImpl({caller_saved}); } void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { @@ -560,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept { } HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { -#if 0 // TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows // but it's fine anyways. We can find other ways to cheat it later - but which?!?! // we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end? // TODO(lizzie): This needs to be investigated further later. // Do not spill XMM into other XMM silly - if (!is_xmm) { + /*if (!is_xmm) { // TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY // Intel recommends to spill GPR onto XMM registers IF POSSIBLE // TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call? @@ -574,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) return loc; - } -#endif + }*/ + // TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits + // of spilling on XMM versus the potential cost of using XMM registers..... // Otherwise go to stack spilling for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) From 189b1a18508d87a855e799a4eb37458958faf87d Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 18:29:39 +0000 Subject: [PATCH 52/56] [dynarmic] use better boost::visitor Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 1 - .../src/dynarmic/backend/x64/a32_emit_x64.cpp | 1 - .../src/dynarmic/backend/x64/emit_x64.cpp | 6 ++-- .../src/dynarmic/common/variant_util.h | 29 ------------------- src/dynarmic/tests/A32/fuzz_arm.cpp | 4 +-- 5 files changed, 5 insertions(+), 36 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/variant_util.h diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index d6be793dc6..3d85f1de29 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -61,7 +61,6 @@ add_library(dynarmic common/string_util.h common/u128.cpp common/u128.h - common/variant_util.h frontend/A32/a32_types.cpp frontend/A32/a32_types.h frontend/A64/a64_types.cpp diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index fb306336cf..3186758380 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -28,7 +28,6 @@ #include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 3bc93e6fd5..0bb2604025 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -11,6 +11,7 @@ #include #include "dynarmic/common/assert.h" +#include #include #include #include "dynarmic/common/common_types.h" @@ -21,7 +22,6 @@ #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/verbose_debugging_output.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de } void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - Common::VisitVariant(terminal, [this, initial_location, is_single_step](auto x) { + boost::apply_visitor([this, initial_location, is_single_step](auto x) { using T = std::decay_t; if constexpr (!std::is_same_v) { this->EmitTerminalImpl(x, initial_location, is_single_step); } else { ASSERT_MSG(false, "Invalid terminal"); } - }); + }, terminal); } void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { diff --git a/src/dynarmic/src/dynarmic/common/variant_util.h b/src/dynarmic/src/dynarmic/common/variant_util.h deleted file mode 100644 index 4dd7f67167..0000000000 --- a/src/dynarmic/src/dynarmic/common/variant_util.h +++ /dev/null @@ -1,29 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include - -namespace Dynarmic::Common { -namespace detail { - -template -struct VariantVisitor : boost::static_visitor - , Lambda { - VariantVisitor(Lambda&& lambda) - : Lambda(std::move(lambda)) {} - - using Lambda::operator(); -}; - -} // namespace detail - -template -inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) { - return boost::apply_visitor(detail::VariantVisitor(std::move(lambda)), variant); -} - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index 5ee6d2bf02..bef473a491 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -47,7 +47,7 @@ using namespace Dynarmic; template bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) { - return Common::VisitVariant(terminal, [&](auto t) -> bool { + return boost::apply_visitor([&](auto t) -> bool { using T = std::decay_t; if constexpr (std::is_same_v) { return false; @@ -73,7 +73,7 @@ bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) { ASSERT_MSG(false, "Invalid terminal type"); return false; } - }); + }, terminal); } bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) { From b7944e2bcfb46efaee87bb6c8dd1903c210b92ea Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 16 Sep 2025 20:20:21 +0000 Subject: [PATCH 53/56] [dynarmic] fix exception posix handler Signed-off-by: lizzie --- .../backend/exception_handler_posix.cpp | 81 ++++--------------- 1 file changed, 14 insertions(+), 67 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index dea7a0c4b0..f1f208179f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -7,14 +7,16 @@ */ #include +#include +#include #include +#include #include +#include #include "dynarmic/backend/exception_handler.h" #include "dynarmic/common/assert.h" #include "dynarmic/common/context.h" -#include #include "dynarmic/common/common_types.h" - #if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" #elif defined(ARCHITECTURE_arm64) @@ -111,68 +113,14 @@ void RegisterHandler() { } } -SigHandler::SigHandler() { - const size_t signal_stack_size = std::max(SIGSTKSZ, 2 * 1024 * 1024); - - signal_stack_memory = std::malloc(signal_stack_size); - - stack_t signal_stack; - signal_stack.ss_sp = signal_stack_memory; - signal_stack.ss_size = signal_stack_size; - signal_stack.ss_flags = 0; - if (sigaltstack(&signal_stack, nullptr) != 0) { - fmt::print(stderr, "dynarmic: POSIX SigHandler: init failure at sigaltstack\n"); - supports_fast_mem = false; - return; - } - - struct sigaction sa; - sa.sa_handler = nullptr; - sa.sa_sigaction = &SigHandler::SigAction; - sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART; - sigemptyset(&sa.sa_mask); - if (sigaction(SIGSEGV, &sa, &old_sa_segv) != 0) { - fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGSEGV handler\n"); - supports_fast_mem = false; - return; - } -#ifdef __APPLE__ - if (sigaction(SIGBUS, &sa, &old_sa_bus) != 0) { - fmt::print(stderr, "dynarmic: POSIX SigHandler: could not set SIGBUS handler\n"); - supports_fast_mem = false; - return; - } -#endif -} - -SigHandler::~SigHandler() { - std::free(signal_stack_memory); -} - -void SigHandler::AddCodeBlock(CodeBlockInfo cbi) { - std::lock_guard guard(code_block_infos_mutex); - if (auto const iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) - code_block_infos.erase(iter); - code_block_infos.push_back(cbi); -} - -void SigHandler::RemoveCodeBlock(u64 host_pc) { - std::lock_guard guard(code_block_infos_mutex); - const auto iter = FindCodeBlockInfo(host_pc); - if (iter != code_block_infos.end()) - code_block_infos.erase(iter); -} - void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { - ASSERT(sig == SIGSEGV || sig == SIGBUS); + DEBUG_ASSERT(sig == SIGSEGV || sig == SIGBUS); CTX_DECLARE(raw_context); #if defined(ARCHITECTURE_x86_64) { - std::lock_guard guard(sig_handler->code_block_infos_mutex); - - const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); - if (iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->cb(CTX_RIP); + std::shared_lock guard(sig_handler->code_block_infos_mutex); + if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->second.cb(CTX_RIP); CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; @@ -182,10 +130,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) { - std::lock_guard guard(sig_handler->code_block_infos_mutex); - const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); - if (iter != sig_handler->code_block_infos.end()) { - FakeCall fc = iter->cb(CTX_PC); + std::shared_lock guard(sig_handler->code_block_infos_mutex); + if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) { + FakeCall fc = iter->second.cb(CTX_PC); CTX_PC = fc.call_pc; return; } @@ -240,15 +187,15 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(ARCHITECTURE_x86_64) +#if defined(MCL_ARCHITECTURE_X86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { impl = std::make_unique(mcl::bit_cast(code.getCode()), code.GetTotalCodeSize()); } -#elif defined(ARCHITECTURE_arm64) +#elif defined(MCL_ARCHITECTURE_ARM64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } -#elif defined(ARCHITECTURE_riscv64) +#elif defined(MCL_ARCHITECTURE_RISCV) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { impl = std::make_unique(mcl::bit_cast(mem.ptr()), size); } From c030cd7cf5f99104bb9b1662dec669b3a33cd253 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 17 Sep 2025 20:51:37 +0000 Subject: [PATCH 54/56] [dynarmic] Allow to skip verification pass Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/interface/optimization_flags.h | 2 ++ src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/interface/optimization_flags.h b/src/dynarmic/src/dynarmic/interface/optimization_flags.h index 743d902767..2e8822b27a 100644 --- a/src/dynarmic/src/dynarmic/interface/optimization_flags.h +++ b/src/dynarmic/src/dynarmic/interface/optimization_flags.h @@ -34,6 +34,8 @@ enum class OptimizationFlag : std::uint32_t { MiscIROpt = 0x00000020, /// Optimize for code speed rather than for code size (this serves well for tight loops) CodeSpeed = 0x00000040, + /// Disable verification passes + DisableVerification = 0x00000080, /// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations. /// This unfuses fused instructions to improve performance on host CPUs without FMA support. diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 750a8a496b..e9175f0e6b 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -1491,7 +1491,9 @@ void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization: Optimization::DeadCodeElimination(block); } Optimization::IdentityRemovalPass(block); - Optimization::VerificationPass(block); + if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { + Optimization::VerificationPass(block); + } } void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { @@ -1509,7 +1511,9 @@ void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization: if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); } - Optimization::VerificationPass(block); + if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { + Optimization::VerificationPass(block); + } } } // namespace Dynarmic::Optimization From 17933b15c76aec4cd44b4fd4a45b505afca9d93c Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 17 Sep 2025 21:27:23 +0000 Subject: [PATCH 55/56] [dynarmic] inlined pool in block + slab-like for each block Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/ir/basic_block.cpp | 16 +++++++++++++++- src/dynarmic/src/dynarmic/ir/basic_block.h | 8 ++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index a13a7ebfc9..612a3d28e9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -37,7 +37,21 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new IR::Inst(opcode); + // First try using the "inline" buffer, otherwise fallback to a slower slab-like allocation scheme + // purpouse is to avoid many calls to new/delete which invoke malloc which invokes mmap + // just pool it!!! - reason why there is an inline buffer is because many small blocks are created + // with few instructions due to subpar optimisations on other passes... plus branch-heavy code will + // hugely benefit from the coherency of faster allocations... + IR::Inst* inst; + if (inlined_inst.size() < inlined_inst.max_size()) { + inst = &inlined_inst[inlined_inst.size()]; + inlined_inst.emplace_back(opcode); + } else { + if (pooled_inst.empty() || pooled_inst.back().size() == pooled_inst.back().max_size()) + pooled_inst.emplace_back(); + inst = &pooled_inst.back()[pooled_inst.back().size()]; + pooled_inst.back().emplace_back(opcode); + } DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index 2f2d9ab6de..166a5e4d1b 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -13,6 +13,9 @@ #include #include +#include +#include +#include #include #include "dynarmic/common/common_types.h" @@ -163,8 +166,12 @@ public: return cycle_count; } private: + /// "Hot cache" for small blocks so we don't call global allocator + boost::container::static_vector inlined_inst; /// List of instructions in this block. instruction_list_type instructions; + /// "Long/far" memory pool + boost::container::stable_vector> pooled_inst; /// Block to execute next if `cond` did not pass. std::optional cond_failed = {}; /// Description of the starting location of this block @@ -180,6 +187,7 @@ private: /// Number of cycles this block takes to execute. size_t cycle_count = 0; }; +static_assert(sizeof(Block) == 2048); /// Returns a string representation of the contents of block. Intended for debugging. std::string DumpBlock(const IR::Block& block) noexcept; From 7bfa16bf0df5689f3afb18e6d257576f9f9265c7 Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 19 Sep 2025 16:38:17 +0000 Subject: [PATCH 56/56] Fix license headers Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 2 ++ src/dynarmic/tests/CMakeLists.txt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 3d85f1de29..3155a1cda1 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -1,3 +1,5 @@ +# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later include(TargetArchitectureSpecificSources) add_library(dynarmic diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index f8f420e2d6..4ace6c2afd 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -1,3 +1,5 @@ +# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later include(TargetArchitectureSpecificSources) add_executable(dynarmic_tests