From 2d9d7fafcb9cfd7338b63c962355a752bc481532 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:54:53 +0000 Subject: [PATCH 01/17] [dynarmic] reduce opt pass latency Signed-off-by: lizzie --- src/dynarmic/CMakeLists.txt | 3 - src/dynarmic/docs/RegisterAllocator.md | 40 +- .../docs/ReturnStackBufferOptimization.md | 162 +- src/dynarmic/src/dynarmic/CMakeLists.txt | 199 +-- .../backend/arm64/a32_address_space.cpp | 18 +- .../backend/arm64/a64_address_space.cpp | 19 +- .../backend/riscv64/a32_address_space.cpp | 16 +- .../dynarmic/backend/x64/a32_interface.cpp | 16 +- .../dynarmic/backend/x64/a64_interface.cpp | 18 +- .../src/dynarmic/common/memory_pool.cpp | 13 - .../src/dynarmic/common/memory_pool.h | 61 - src/dynarmic/src/dynarmic/ir/basic_block.cpp | 7 +- src/dynarmic/src/dynarmic/ir/basic_block.h | 3 - src/dynarmic/src/dynarmic/ir/ir_emitter.cpp | 21 - .../ir/opt/a32_constant_memory_reads_pass.cpp | 70 - .../ir/opt/a32_get_set_elimination_pass.cpp | 382 ----- .../ir/opt/a64_callback_config_pass.cpp | 57 - .../ir/opt/a64_get_set_elimination_pass.cpp | 165 -- .../ir/opt/a64_merge_interpret_blocks.cpp | 57 - .../ir/opt/constant_propagation_pass.cpp | 559 ------ .../ir/opt/dead_code_elimination_pass.cpp | 23 - .../dynarmic/ir/opt/identity_removal_pass.cpp | 44 - src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h | 127 -- .../src/dynarmic/ir/opt/naming_pass.cpp | 18 - src/dynarmic/src/dynarmic/ir/opt/passes.h | 47 - .../src/dynarmic/ir/opt/polyfill_pass.cpp | 218 --- .../src/dynarmic/ir/opt/verification_pass.cpp | 51 - src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 1502 +++++++++++++++++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 34 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 10 +- src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 2 +- src/dynarmic/tests/CMakeLists.txt | 146 +- src/dynarmic/tests/print_info.cpp | 2 +- 33 files changed, 1808 insertions(+), 2302 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.cpp delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.h delete mode 100644 src/dynarmic/src/dynarmic/ir/ir_emitter.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/passes.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.h diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 5c28435f72..471a08c703 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -23,9 +23,6 @@ option(DYNARMIC_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON) option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF) option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF) option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT}) -if (NOT DEFINED DYNARMIC_FRONTENDS) - set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable") -endif() # Default to a Release build if (NOT CMAKE_BUILD_TYPE) diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index fea6f19e6a..5a7210c791 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -16,19 +16,31 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun The member functions on `RegAlloc` are just a combination of the above concepts. +The following registers are reserved for internal use and should NOT participate in register allocation: +- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. +- `%rsp`: Stack pointer. + +The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. + +Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration. + ### `Scratch` - Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) - Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) +```c++ +Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr); +Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm); +``` At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope. ### Pure `Use` - Xbyak::Reg64 UseGpr(Argument& arg); - Xbyak::Xmm UseXmm(Argument& arg); - OpArg UseOpArg(Argument& arg); - void Use(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseGpr(Argument& arg); +Xbyak::Xmm UseXmm(Argument& arg); +OpArg UseOpArg(Argument& arg); +void Use(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it @@ -39,9 +51,11 @@ This register **must not** have it's value changed. ### `UseScratch` - Xbyak::Reg64 UseScratchGpr(Argument& arg); - Xbyak::Xmm UseScratchXmm(Argument& arg); - void UseScratch(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseScratchGpr(Argument& arg); +Xbyak::Xmm UseScratchXmm(Argument& arg); +void UseScratch(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it @@ -55,7 +69,9 @@ You are free to modify the value in the register. The register is discarded at t A `Define` is the defintion of a value. This is the only time when a value may be set. - void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +```c++ +void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +``` By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the value to the specified register `reg`. @@ -64,7 +80,9 @@ value to the specified register `reg`. Adding a `Define` to an existing value. - void DefineValue(IR::Inst* inst, Argument& arg); +```c++ +void DefineValue(IR::Inst* inst, Argument& arg); +``` You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are emitted. diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index 6ffe41bcc6..0e72c3bce8 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -23,15 +23,17 @@ One complication dynarmic has is that a compiled block is not uniquely identifia the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block. - u64 LocationDescriptor::UniqueHash() const { - // This value MUST BE UNIQUE. - // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint - u64 pc_u64 = u64(arm_pc) << 32; - u64 fpscr_u64 = u64(fpscr.Value()); - u64 t_u64 = cpsr.T() ? 1 : 0; - u64 e_u64 = cpsr.E() ? 2 : 0; - return pc_u64 | fpscr_u64 | t_u64 | e_u64; - } +```c++ +u64 LocationDescriptor::UniqueHash() const { + // This value MUST BE UNIQUE. + // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint + u64 pc_u64 = u64(arm_pc) << 32; + u64 fpscr_u64 = u64(fpscr.Value()); + u64 t_u64 = cpsr.T() ? 1 : 0; + u64 e_u64 = cpsr.E() ? 2 : 0; + return pc_u64 | fpscr_u64 | t_u64 | e_u64; +} +``` ## Our implementation isn't actually a stack @@ -49,97 +51,107 @@ host addresses for the corresponding the compiled blocks. size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8 showed degraded performance. - struct JitState { - // ... +```c++ +struct JitState { + // ... - static constexpr size_t RSBSize = 8; // MUST be a power of 2. - u32 rsb_ptr = 0; - std::array rsb_location_descriptors; - std::array rsb_codeptrs; - void ResetRSB(); + static constexpr size_t RSBSize = 8; // MUST be a power of 2. + u32 rsb_ptr = 0; + std::array rsb_location_descriptors; + std::array rsb_codeptrs; + void ResetRSB(); - // ... - }; + // ... +}; +``` ### RSB Push We insert our prediction at the insertion point iff the RSB doesn't already contain a prediction with the same `UniqueHash`. - void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { + using namespace Xbyak::util; - ASSERT(inst->GetArg(0).IsImmediate()); - u64 imm64 = inst->GetArg(0).GetU64(); + ASSERT(inst->GetArg(0).IsImmediate()); + u64 imm64 = inst->GetArg(0).GetU64(); - Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); - Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); - Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); - u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() - ? u64(unique_hash_to_code_ptr[imm64]) - : u64(code->GetReturnFromRunCodeAddress()); + Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); + Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); + Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); + u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() + ? u64(unique_hash_to_code_ptr[imm64]) + : u64(code->GetReturnFromRunCodeAddress()); - code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); - code->add(index_reg, 1); - code->and_(index_reg, u32(JitState::RSBSize - 1)); + code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); + code->add(index_reg, 1); + code->and_(index_reg, u32(JitState::RSBSize - 1)); - code->mov(loc_desc_reg, u64(imm64)); - CodePtr patch_location = code->getCurr(); - patch_unique_hash_locations[imm64].emplace_back(patch_location); - code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. - code->EnsurePatchLocationSize(patch_location, 10); + code->mov(loc_desc_reg, u64(imm64)); + CodePtr patch_location = code->getCurr(); + patch_unique_hash_locations[imm64].emplace_back(patch_location); + code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. + code->EnsurePatchLocationSize(patch_location, 10); - Xbyak::Label label; - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->je(label, code->T_SHORT); - } - - code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); - code->L(label); + Xbyak::Label label; + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->je(label, code->T_SHORT); } + code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); + code->L(label); +} +``` + In pseudocode: - for (i := 0 .. RSBSize-1) - if (rsb_location_descriptors[i] == imm64) - goto label; - rsb_ptr++; - rsb_ptr %= RSBSize; - rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash - rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; - label: +```c++ + for (i := 0 .. RSBSize-1) + if (rsb_location_descriptors[i] == imm64) + goto label; + rsb_ptr++; + rsb_ptr %= RSBSize; + rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash + rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; +label: +``` ## RSB Pop To check if a predicition is in the RSB, we linearly scan the RSB. - void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { + using namespace Xbyak::util; - // This calculation has to match up with IREmitter::PushRSB - code->mov(ecx, MJitStateReg(Arm::Reg::PC)); - code->shl(rcx, 32); - code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); - code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); - code->or_(rbx, rcx); + // This calculation has to match up with IREmitter::PushRSB + code->mov(ecx, MJitStateReg(Arm::Reg::PC)); + code->shl(rcx, 32); + code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); + code->or_(rbx, rcx); - code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); - } - - code->jmp(rax); + code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); } + code->jmp(rax); +} +``` + In pseudocode: - rbx := ComputeUniqueHash() - rax := ReturnToDispatch - for (i := 0 .. RSBSize-1) - if (rbx == rsb_location_descriptors[i]) - rax = rsb_codeptrs[i] - goto rax \ No newline at end of file +```c++ +rbx := ComputeUniqueHash() +rax := ReturnToDispatch +for (i := 0 .. RSBSize-1) + if (rbx == rsb_location_descriptors[i]) + rax = rsb_codeptrs[i] +goto rax +``` diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index e060989f82..6948bf39fb 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -56,8 +56,6 @@ add_library(dynarmic common/lut_from_list.h common/math_util.cpp common/math_util.h - common/memory_pool.cpp - common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h @@ -78,7 +76,6 @@ add_library(dynarmic ir/basic_block.cpp ir/basic_block.h ir/cond.h - ir/ir_emitter.cpp ir/ir_emitter.h ir/location_descriptor.cpp ir/location_descriptor.h @@ -87,78 +84,59 @@ add_library(dynarmic ir/opcodes.cpp ir/opcodes.h ir/opcodes.inc - ir/opt/constant_propagation_pass.cpp - ir/opt/dead_code_elimination_pass.cpp - ir/opt/identity_removal_pass.cpp - ir/opt/ir_matcher.h - ir/opt/naming_pass.cpp - ir/opt/passes.h - ir/opt/polyfill_pass.cpp - ir/opt/verification_pass.cpp + ir/opt_passes.cpp + ir/opt_passes.h ir/terminal.h ir/type.cpp ir/type.h ir/value.cpp ir/value.h + # A32 + frontend/A32/a32_ir_emitter.cpp + frontend/A32/a32_ir_emitter.h + frontend/A32/a32_location_descriptor.cpp + frontend/A32/a32_location_descriptor.h + frontend/A32/decoder/arm.h + frontend/A32/decoder/arm.inc + frontend/A32/decoder/asimd.h + frontend/A32/decoder/asimd.inc + frontend/A32/decoder/thumb16.h + frontend/A32/decoder/thumb16.inc + frontend/A32/decoder/thumb32.h + frontend/A32/decoder/thumb32.inc + frontend/A32/decoder/vfp.h + frontend/A32/decoder/vfp.inc + frontend/A32/disassembler/disassembler.h + frontend/A32/disassembler/disassembler_arm.cpp + frontend/A32/disassembler/disassembler_thumb.cpp + frontend/A32/FPSCR.h + frontend/A32/ITState.h + frontend/A32/PSR.h + frontend/A32/translate/a32_translate.cpp + frontend/A32/translate/a32_translate.h + frontend/A32/translate/conditional_state.cpp + frontend/A32/translate/conditional_state.h + frontend/A32/translate/translate_arm.cpp + frontend/A32/translate/translate_thumb.cpp + interface/A32/a32.h + interface/A32/arch_version.h + interface/A32/config.h + interface/A32/coprocessor.h + interface/A32/coprocessor_util.h + interface/A32/disassembler.h + # A64 + frontend/A64/a64_ir_emitter.cpp + frontend/A64/a64_ir_emitter.h + frontend/A64/a64_location_descriptor.cpp + frontend/A64/a64_location_descriptor.h + frontend/A64/decoder/a64.h + frontend/A64/decoder/a64.inc + frontend/A64/translate/a64_translate.cpp + frontend/A64/translate/a64_translate.h + interface/A64/a64.h + interface/A64/config.h ) -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A32/a32_ir_emitter.cpp - frontend/A32/a32_ir_emitter.h - frontend/A32/a32_location_descriptor.cpp - frontend/A32/a32_location_descriptor.h - frontend/A32/decoder/arm.h - frontend/A32/decoder/arm.inc - frontend/A32/decoder/asimd.h - frontend/A32/decoder/asimd.inc - frontend/A32/decoder/thumb16.h - frontend/A32/decoder/thumb16.inc - frontend/A32/decoder/thumb32.h - frontend/A32/decoder/thumb32.inc - frontend/A32/decoder/vfp.h - frontend/A32/decoder/vfp.inc - frontend/A32/disassembler/disassembler.h - frontend/A32/disassembler/disassembler_arm.cpp - frontend/A32/disassembler/disassembler_thumb.cpp - frontend/A32/FPSCR.h - frontend/A32/ITState.h - frontend/A32/PSR.h - frontend/A32/translate/a32_translate.cpp - frontend/A32/translate/a32_translate.h - frontend/A32/translate/conditional_state.cpp - frontend/A32/translate/conditional_state.h - frontend/A32/translate/translate_arm.cpp - frontend/A32/translate/translate_thumb.cpp - interface/A32/a32.h - interface/A32/arch_version.h - interface/A32/config.h - interface/A32/coprocessor.h - interface/A32/coprocessor_util.h - interface/A32/disassembler.h - ir/opt/a32_constant_memory_reads_pass.cpp - ir/opt/a32_get_set_elimination_pass.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A64/a64_ir_emitter.cpp - frontend/A64/a64_ir_emitter.h - frontend/A64/a64_location_descriptor.cpp - frontend/A64/a64_location_descriptor.h - frontend/A64/decoder/a64.h - frontend/A64/decoder/a64.inc - frontend/A64/translate/a64_translate.cpp - frontend/A64/translate/a64_translate.h - interface/A64/a64.h - interface/A64/config.h - ir/opt/a64_callback_config_pass.cpp - ir/opt/a64_get_set_elimination_pass.cpp - ir/opt/a64_merge_interpret_blocks.cpp - ) -endif() - if ("x86_64" IN_LIST ARCHITECTURE) target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1) target_link_libraries(dynarmic @@ -211,29 +189,21 @@ if ("x86_64" IN_LIST ARCHITECTURE) common/spin_lock_x64.h common/x64_disassemble.cpp common/x64_disassemble.h + # A32 + backend/x64/a32_emit_x64.cpp + backend/x64/a32_emit_x64.h + backend/x64/a32_emit_x64_memory.cpp + backend/x64/a32_interface.cpp + backend/x64/a32_jitstate.cpp + backend/x64/a32_jitstate.h + # A64 + backend/x64/a64_emit_x64.cpp + backend/x64/a64_emit_x64.h + backend/x64/a64_emit_x64_memory.cpp + backend/x64/a64_interface.cpp + backend/x64/a64_jitstate.cpp + backend/x64/a64_jitstate.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a32_emit_x64.cpp - backend/x64/a32_emit_x64.h - backend/x64/a32_emit_x64_memory.cpp - backend/x64/a32_interface.cpp - backend/x64/a32_jitstate.cpp - backend/x64/a32_jitstate.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a64_emit_x64.cpp - backend/x64/a64_emit_x64.h - backend/x64/a64_emit_x64_memory.cpp - backend/x64/a64_interface.cpp - backend/x64/a64_jitstate.cpp - backend/x64/a64_jitstate.h - ) - endif() endif() if ("arm64" IN_LIST ARCHITECTURE) @@ -277,25 +247,17 @@ if ("arm64" IN_LIST ARCHITECTURE) backend/arm64/verbose_debugging_output.h common/spin_lock_arm64.cpp common/spin_lock_arm64.h + # A32 + backend/arm64/a32_address_space.cpp + backend/arm64/a32_address_space.h + backend/arm64/a32_core.h + backend/arm64/a32_interface.cpp + # A64 + backend/arm64/a64_address_space.cpp + backend/arm64/a64_address_space.h + backend/arm64/a64_core.h + backend/arm64/a64_interface.cpp ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a32_address_space.cpp - backend/arm64/a32_address_space.h - backend/arm64/a32_core.h - backend/arm64/a32_interface.cpp - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a64_address_space.cpp - backend/arm64/a64_address_space.h - backend/arm64/a64_core.h - backend/arm64/a64_interface.cpp - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -324,21 +286,14 @@ if ("riscv" IN_LIST ARCHITECTURE) backend/riscv64/reg_alloc.cpp backend/riscv64/reg_alloc.h backend/riscv64/stack_layout.h + # A32 + backend/riscv64/a32_address_space.cpp + backend/riscv64/a32_address_space.h + backend/riscv64/a32_core.h + backend/riscv64/a32_interface.cpp + backend/riscv64/code_block.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - backend/riscv64/a32_address_space.cpp - backend/riscv64/a32_address_space.h - backend/riscv64/a32_core.h - backend/riscv64/a32_interface.cpp - backend/riscv64/code_block.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") - endif() + message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") endif() if (WIN32) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index bbdf925ff4..8e48fa3687 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -16,7 +16,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -163,21 +163,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index d4fe9a9cb7..2b50ad9ea3 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -331,22 +331,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index efa211618b..0bb9591411 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/backend/riscv64/stack_layout.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::RV64 { @@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index b116ec180e..382eb70f3f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -29,7 +29,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/location_descriptor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A32 { @@ -217,19 +217,7 @@ private: block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index ddd2327395..c65b582982 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -25,7 +25,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/a64.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A64 { @@ -275,21 +275,7 @@ private: const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block).entrypoint; } diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.cpp b/src/dynarmic/src/dynarmic/common/memory_pool.cpp deleted file mode 100644 index f41dd92af5..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/common/memory_pool.h" - -#include - -namespace Dynarmic::Common { - - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.h b/src/dynarmic/src/dynarmic/common/memory_pool.h deleted file mode 100644 index c99316e107..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -namespace Dynarmic::Common { - -/// @tparam object_size Byte-size of objects to construct -/// @tparam slab_size Number of objects to have per slab -template -class Pool { -public: - inline Pool() noexcept { - AllocateNewSlab(); - } - inline ~Pool() noexcept { - std::free(current_slab); - for (char* slab : slabs) { - std::free(slab); - } - } - - Pool(const Pool&) = delete; - Pool(Pool&&) = delete; - - Pool& operator=(const Pool&) = delete; - Pool& operator=(Pool&&) = delete; - - /// @brief Returns a pointer to an `object_size`-bytes block of memory. - [[nodiscard]] void* Alloc() noexcept { - if (remaining == 0) { - slabs.push_back(current_slab); - AllocateNewSlab(); - } - void* ret = static_cast(current_ptr); - current_ptr += object_size; - remaining--; - return ret; - } -private: - /// @brief Allocates a completely new memory slab. - /// Used when an entirely new slab is needed - /// due the current one running out of usable space. - void AllocateNewSlab() noexcept { - current_slab = static_cast(std::malloc(object_size * slab_size)); - current_ptr = current_slab; - remaining = slab_size; - } - - std::vector slabs; - char* current_slab = nullptr; - char* current_ptr = nullptr; - size_t remaining = 0; -}; - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index b00ab3cb20..a13a7ebfc9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -15,8 +15,6 @@ #include #include "dynarmic/common/assert.h" - -#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -27,8 +25,7 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL}, - instruction_alloc_pool{std::make_unique>()} + cond{Cond::AL} { } @@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); + IR::Inst* inst = new IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index e88dc92fc4..2f2d9ab6de 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -21,7 +21,6 @@ #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" #include "dynarmic/ir/dense_list.h" -#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -174,8 +173,6 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; - /// Memory pool for instruction list - std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp deleted file mode 100644 index a8ef7e2989..0000000000 --- a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/ir_emitter.h" - -#include - -#include "dynarmic/common/assert.h" -#include - -#include "dynarmic/ir/opcodes.h" - -namespace Dynarmic::IR { - - -} // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp deleted file mode 100644 index 9699f18345..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/interface/A32/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { - for (auto& inst : block) { - switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp deleted file mode 100644 index 499b38b120..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp +++ /dev/null @@ -1,382 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A32/a32_ir_emitter.h" -#include "dynarmic/frontend/A32/a32_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -namespace { - -void FlagsPass(IR::Block& block) { - using Iterator = std::reverse_iterator; - - struct FlagInfo { - bool set_not_required = false; - bool has_value_request = false; - Iterator value_request = {}; - }; - struct ValuelessFlagInfo { - bool set_not_required = false; - }; - ValuelessFlagInfo nzcvq; - ValuelessFlagInfo nzcv; - ValuelessFlagInfo nz; - FlagInfo c_flag; - FlagInfo ge; - - auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(value); - } - info.has_value_request = false; - - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_get = [](FlagInfo& info, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(IR::Value{&*inst}); - } - info.has_value_request = true; - info.value_request = inst; - }; - - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetCFlag: { - do_get(c_flag, inst); - break; - } - case IR::Opcode::A32SetCpsrNZCV: { - if (c_flag.has_value_request) { - ir.SetInsertionPointBefore(inst.base()); // base is one ahead - IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); - c_flag.value_request->ReplaceUsesWith(c); - c_flag.has_value_request = false; - break; // This case will be executed again because of the above - } - - do_set_valueless(nzcv, inst); - - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVRaw: { - if (c_flag.has_value_request) { - nzcv.set_not_required = false; - } - - do_set_valueless(nzcv, inst); - - nzcvq = {}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVQ: { - if (c_flag.has_value_request) { - nzcvq.set_not_required = false; - } - - do_set_valueless(nzcvq, inst); - - nzcv = {.set_not_required = true}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZ: { - do_set_valueless(nz, inst); - - nzcvq = {}; - nzcv = {}; - break; - } - case IR::Opcode::A32SetCpsrNZC: { - if (c_flag.has_value_request) { - c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); - c_flag.has_value_request = false; - } - - if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { - const auto nz_value = inst->GetArg(0); - - inst->Invalidate(); - - ir.SetInsertionPointBefore(inst.base()); - ir.SetCpsrNZ(IR::NZCV{nz_value}); - - nzcvq = {}; - nzcv = {}; - nz = {.set_not_required = true}; - break; - } - - if (nz.set_not_required && c_flag.set_not_required) { - inst->Invalidate(); - } else if (nz.set_not_required) { - inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); - } - nz.set_not_required = true; - c_flag.set_not_required = true; - - nzcv = {}; - nzcvq = {}; - break; - } - case IR::Opcode::A32SetGEFlags: { - do_set(ge, inst->GetArg(0), inst); - break; - } - case IR::Opcode::A32GetGEFlags: { - do_get(ge, inst); - break; - } - case IR::Opcode::A32SetGEFlagsCompressed: { - ge = {.set_not_required = true}; - break; - } - case IR::Opcode::A32OrQFlag: { - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcvq = {}; - nzcv = {}; - nz = {}; - c_flag = {}; - ge = {}; - } - break; - } - } - } -} - -void RegisterPass(IR::Block& block) { - using Iterator = IR::Block::iterator; - - struct RegInfo { - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array reg_info; - - const auto do_get = [](RegInfo& info, Iterator get_inst) { - if (info.register_value.IsEmpty()) { - info.register_value = IR::Value(&*get_inst); - return; - } - get_inst->ReplaceUsesWith(info.register_value); - }; - - const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { - if (info.last_set_instruction) { - (*info.last_set_instruction)->Invalidate(); - } - info = { - .register_value = value, - .last_set_instruction = set_inst, - }; - }; - - enum class ExtValueType { - Empty, - Single, - Double, - VectorDouble, - VectorQuad, - }; - struct ExtRegInfo { - ExtValueType value_type = {}; - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array ext_reg_info; - - const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { - if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = IR::Value(&*get_inst), - .last_set_instruction = std::nullopt, - }; - } - return; - } - get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); - }; - - const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { - if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - if (std::data(infos)[0].get().last_set_instruction) { - (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); - } - } - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = value, - .last_set_instruction = set_inst, - }; - } - }; - - // Location and version don't matter here. - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - ASSERT(reg != A32::Reg::PC); - const size_t reg_index = static_cast(reg); - do_get(reg_info[reg_index], inst); - break; - } - case IR::Opcode::A32SetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - if (reg == A32::Reg::PC) { - break; - } - const auto reg_index = static_cast(reg); - do_set(reg_info[reg_index], inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); - break; - } - case IR::Opcode::A32SetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - break; - } - case IR::Opcode::A32SetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst->GetArg(1), - inst); - break; - } - case IR::Opcode::A32GetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - do_ext_get(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_get(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst); - } - break; - } - case IR::Opcode::A32SetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - ir.SetInsertionPointAfter(inst); - const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); - do_ext_set(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - stored_value, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_set(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst->GetArg(1), - inst); - } - break; - } - default: { - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - ext_reg_info = {}; - } - break; - } - } - } -} - -} // namespace - -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { - FlagsPass(block); - RegisterPass(block); -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp deleted file mode 100644 index 79d9769520..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/frontend/A64/a64_ir_emitter.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { - if (conf.hook_data_cache_operations) { - return; - } - - for (auto& inst : block) { - if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { - continue; - } - - const auto op = static_cast(inst.GetArg(1).GetU64()); - if (op == A64::DataCacheOperation::ZeroByVA) { - A64::IREmitter ir{block}; - ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; - ir.SetInsertionPointBefore(&inst); - - size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); - IR::U64 addr{inst.GetArg(2)}; - - const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); - while (bytes >= 16) { - ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(16)); - bytes -= 16; - } - - while (bytes >= 8) { - ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(8)); - bytes -= 8; - } - - while (bytes >= 4) { - ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(4)); - bytes -= 4; - } - } - inst.Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp deleted file mode 100644 index 53e3b27176..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -void A64GetSetElimination(IR::Block& block) { - using Iterator = IR::Block::iterator; - - enum class TrackingType { - W, - X, - S, - D, - Q, - SP, - NZCV, - NZCVRaw, - }; - struct RegisterInfo { - IR::Value register_value; - TrackingType tracking_type; - bool set_instruction_present = false; - Iterator last_set_instruction; - }; - std::array reg_info; - std::array vec_info; - RegisterInfo sp_info; - RegisterInfo nzcv_info; - - const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { - if (info.set_instruction_present) { - info.last_set_instruction->Invalidate(); - block.Instructions().erase(info.last_set_instruction); - } - - info.register_value = value; - info.tracking_type = tracking_type; - info.set_instruction_present = true; - info.last_set_instruction = set_inst; - }; - - const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { - const auto do_nothing = [&] { - info = {}; - info.register_value = IR::Value(&*get_inst); - info.tracking_type = tracking_type; - }; - - if (info.register_value.IsEmpty()) { - do_nothing(); - return; - } - - if (info.tracking_type == tracking_type) { - get_inst->ReplaceUsesWith(info.register_value); - return; - } - - do_nothing(); - }; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A64GetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::W); - break; - } - case IR::Opcode::A64GetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::X); - break; - } - case IR::Opcode::A64GetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::S); - break; - } - case IR::Opcode::A64GetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::D); - break; - } - case IR::Opcode::A64GetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64GetSP: { - do_get(sp_info, inst, TrackingType::SP); - break; - } - case IR::Opcode::A64GetNZCVRaw: { - do_get(nzcv_info, inst, TrackingType::NZCVRaw); - break; - } - case IR::Opcode::A64SetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); - break; - } - case IR::Opcode::A64SetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); - break; - } - case IR::Opcode::A64SetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); - break; - } - case IR::Opcode::A64SetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); - break; - } - case IR::Opcode::A64SetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64SetSP: { - do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); - break; - } - case IR::Opcode::A64SetNZCV: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); - break; - } - case IR::Opcode::A64SetNZCVRaw: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcv_info = {}; - } - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - vec_info = {}; - sp_info = {}; - } - break; - } - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp deleted file mode 100644 index 25b7ef0ff1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_location_descriptor.h" -#include "dynarmic/frontend/A64/translate/a64_translate.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { - const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { - const auto instruction = cb->MemoryReadCode(location.PC()); - if (!instruction) - return false; - - IR::Block new_block{location}; - A64::TranslateSingleInstruction(new_block, location, *instruction); - - if (!new_block.Instructions().empty()) - return false; - - const IR::Terminal terminal = new_block.GetTerminal(); - if (auto term = boost::get(&terminal)) { - return term->next == location; - } - - return false; - }; - - IR::Terminal terminal = block.GetTerminal(); - auto term = boost::get(&terminal); - if (!term) - return; - - A64::LocationDescriptor location{term->next}; - size_t num_instructions = 1; - - while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { - num_instructions++; - } - - term->num_instructions = num_instructions; - block.ReplaceTerminal(terminal); - block.CycleCount() += num_instructions - 1; -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp deleted file mode 100644 index 86ebca87d2..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp +++ /dev/null @@ -1,559 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/assert.h" -#include -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/common/safe_ops.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -using Op = Dynarmic::IR::Opcode; - -namespace { - -// Tiny helper to avoid the need to store based off the opcode -// bit size all over the place within folding functions. -void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { - if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); - } else { - inst.ReplaceUsesWith(IR::Value{value}); - } -} - -IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; -} - -template -bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - - const bool is_lhs_immediate = lhs.IsImmediate(); - const bool is_rhs_immediate = rhs.IsImmediate(); - - if (is_lhs_immediate && is_rhs_immediate) { - const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); - ReplaceUsesWith(inst, is_32_bit, result); - return false; - } - - if (is_lhs_immediate && !is_rhs_immediate) { - const IR::Inst* rhs_inst = rhs.GetInstRecursive(); - if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, rhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } else { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - } - } - - if (!is_lhs_immediate && is_rhs_immediate) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } - } - - return true; -} - -void FoldAdd(IR::Inst& inst, bool is_32_bit) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - if (lhs.IsImmediate() && !rhs.IsImmediate()) { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - FoldAdd(inst, is_32_bit); - return; - } - - if (inst.HasAssociatedPseudoOperation()) { - return; - } - - if (!lhs.IsImmediate() && rhs.IsImmediate()) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { - const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); - if (combined == 0) { - inst.ReplaceUsesWith(lhs_inst->GetArg(0)); - return; - } - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - return; - } - if (rhs.IsZero() && carry.IsZero()) { - inst.ReplaceUsesWith(lhs); - return; - } - } - - if (inst.AreAllArgsImmediates()) { - const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); - return; - } -} - -/// Folds AND operations based on the following: -/// -/// 1. imm_x & imm_y -> result -/// 2. x & 0 -> 0 -/// 3. 0 & y -> 0 -/// 4. x & y -> y (where x has all bits set to 1) -/// 5. x & y -> x (where y has all bits set to 1) -/// -void FoldAND(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.HasAllBitsSet()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -/// Folds byte reversal opcodes based on the following: -/// -/// 1. imm -> swap(imm) -/// -void FoldByteReverse(IR::Inst& inst, Op op) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - if (op == Op::ByteReverseWord) { - const u32 result = mcl::bit::swap_bytes_32(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else if (op == Op::ByteReverseHalf) { - const u16 result = mcl::bit::swap_bytes_16(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else { - const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); - inst.ReplaceUsesWith(IR::Value{result}); - } -} - -/// Folds division operations based on the following: -/// -/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) -/// 2. imm_x / imm_y -> result -/// 3. x / 1 -> x -/// -void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { - const auto rhs = inst.GetArg(1); - - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - return; - } - - const auto lhs = inst.GetArg(0); - if (lhs.IsImmediate() && rhs.IsImmediate()) { - if (is_signed) { - const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); - ReplaceUsesWith(inst, is_32_bit, static_cast(result)); - } else { - const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); - } - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(IR::Value{lhs}); - } -} - -// Folds EOR operations based on the following: -// -// 1. imm_x ^ imm_y -> result -// 2. x ^ 0 -> x -// 3. 0 ^ y -> y -// -void FoldEOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -void FoldLeastSignificantByte(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantHalf(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldMostSignificantBit(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); -} - -void FoldMostSignificantWord(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - if (carry_inst) { - carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); - } - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); -} - -// Folds multiplication operations based on the following: -// -// 1. imm_x * imm_y -> result -// 2. x * 0 -> 0 -// 3. 0 * y -> 0 -// 4. x * 1 -> x -// 5. 1 * y -> y -// -void FoldMultiply(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -// Folds NOT operations if the contained value is an immediate. -void FoldNOT(IR::Inst& inst, bool is_32_bit) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - const u64 result = ~operand.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -// Folds OR operations based on the following: -// -// 1. imm_x | imm_y -> result -// 2. x | 0 -> x -// 3. 0 | y -> y -// -void FoldOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -bool FoldShifts(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - // The 32-bit variants can contain 3 arguments, while the - // 64-bit variants only contain 2. - if (inst.NumArgs() == 3 && !carry_inst) { - inst.SetArg(2, IR::Value(false)); - } - - const auto shift_amount = inst.GetArg(1); - - if (shift_amount.IsZero()) { - if (carry_inst) { - carry_inst->ReplaceUsesWith(inst.GetArg(2)); - } - inst.ReplaceUsesWith(inst.GetArg(0)); - return false; - } - - if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { - inst.SetArg(2, IR::Value(false)); - } - - if (!inst.AreAllArgsImmediates() || carry_inst) { - return false; - } - - return true; -} - -void FoldSignExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSignExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSub(IR::Inst& inst, bool is_32_bit) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -void FoldZeroExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldZeroExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{value}); -} -} // Anonymous namespace - -void ConstantPropagation(IR::Block& block) { - for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - - switch (opcode) { - case Op::LeastSignificantWord: - FoldLeastSignificantWord(inst); - break; - case Op::MostSignificantWord: - FoldMostSignificantWord(inst); - break; - case Op::LeastSignificantHalf: - FoldLeastSignificantHalf(inst); - break; - case Op::LeastSignificantByte: - FoldLeastSignificantByte(inst); - break; - case Op::MostSignificantBit: - FoldMostSignificantBit(inst); - break; - case Op::IsZero32: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); - } - break; - case Op::IsZero64: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); - } - break; - case Op::LogicalShiftLeft32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeft64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeftMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftLeftMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::LogicalShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::ArithmeticShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::ArithmeticShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::RotateRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); - } - break; - case Op::RotateRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); - } - break; - case Op::Add32: - case Op::Add64: - FoldAdd(inst, opcode == Op::Add32); - break; - case Op::Sub32: - case Op::Sub64: - FoldSub(inst, opcode == Op::Sub32); - break; - case Op::Mul32: - case Op::Mul64: - FoldMultiply(inst, opcode == Op::Mul32); - break; - case Op::SignedDiv32: - case Op::SignedDiv64: - FoldDivide(inst, opcode == Op::SignedDiv32, true); - break; - case Op::UnsignedDiv32: - case Op::UnsignedDiv64: - FoldDivide(inst, opcode == Op::UnsignedDiv32, false); - break; - case Op::And32: - case Op::And64: - FoldAND(inst, opcode == Op::And32); - break; - case Op::Eor32: - case Op::Eor64: - FoldEOR(inst, opcode == Op::Eor32); - break; - case Op::Or32: - case Op::Or64: - FoldOR(inst, opcode == Op::Or32); - break; - case Op::Not32: - case Op::Not64: - FoldNOT(inst, opcode == Op::Not32); - break; - case Op::SignExtendByteToWord: - case Op::SignExtendHalfToWord: - FoldSignExtendXToWord(inst); - break; - case Op::SignExtendByteToLong: - case Op::SignExtendHalfToLong: - case Op::SignExtendWordToLong: - FoldSignExtendXToLong(inst); - break; - case Op::ZeroExtendByteToWord: - case Op::ZeroExtendHalfToWord: - FoldZeroExtendXToWord(inst); - break; - case Op::ZeroExtendByteToLong: - case Op::ZeroExtendHalfToLong: - case Op::ZeroExtendWordToLong: - FoldZeroExtendXToLong(inst); - break; - case Op::ByteReverseWord: - case Op::ByteReverseHalf: - case Op::ByteReverseDual: - FoldByteReverse(inst, opcode); - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp deleted file mode 100644 index bda9f6efd1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void DeadCodeElimination(IR::Block& block) { - // We iterate over the instructions in reverse order. - // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp deleted file mode 100644 index e87fcc335b..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void IdentityRemovalPass(IR::Block& block) { - std::vector to_invalidate; - - auto iter = block.begin(); - while (iter != block.end()) { - IR::Inst& inst = *iter; - - const size_t num_args = inst.NumArgs(); - for (size_t i = 0; i < num_args; i++) { - while (true) { - IR::Value arg = inst.GetArg(i); - if (!arg.IsIdentity()) - break; - inst.SetArg(i, arg.GetInst()->GetArg(0)); - } - } - - if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { - iter = block.Instructions().erase(inst); - to_invalidate.push_back(&inst); - } else { - ++iter; - } - } - - for (IR::Inst* inst : to_invalidate) { - inst->Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h b/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h deleted file mode 100644 index 5eb1a55100..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h +++ /dev/null @@ -1,127 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization::IRMatcher { - -struct CaptureValue { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value); - } -}; - -struct CaptureInst { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return std::tuple(value.GetInstRecursive()); - } -}; - -struct CaptureUImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsU64()); - } -}; - -struct CaptureSImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsS64()); - } -}; - -template -struct UImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsU64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct SImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsS64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct Inst { -public: - using ReturnType = mp::concat, typename Args::ReturnType...>; - - static std::optional Match(const IR::Inst& inst) { - if (inst.GetOpcode() != Opcode) - return std::nullopt; - if (inst.HasAssociatedPseudoOperation()) - return std::nullopt; - return MatchArgs<0>(inst); - } - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return Match(*value.GetInstRecursive()); - } - -private: - template - static auto MatchArgs(const IR::Inst& inst) -> std::optional>, std::tuple<>>>> { - if constexpr (I >= sizeof...(Args)) { - return std::tuple(); - } else { - using Arg = mp::get>; - - if (const auto arg = Arg::Match(inst.GetArg(I))) { - if (const auto rest = MatchArgs(inst)) { - return std::tuple_cat(*arg, *rest); - } - } - - return std::nullopt; - } - } -}; - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t); -} - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t); -} - -} // namespace Dynarmic::Optimization::IRMatcher diff --git a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp deleted file mode 100644 index a766bdc83f..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2023 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" - -namespace Dynarmic::Optimization { - -void NamingPass(IR::Block& block) { - unsigned name = 1; - for (auto& inst : block) { - inst.SetName(name++); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/passes.h b/src/dynarmic/src/dynarmic/ir/opt/passes.h deleted file mode 100644 index 703145b556..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/passes.h +++ /dev/null @@ -1,47 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -namespace Dynarmic::A32 { -struct UserCallbacks; -} - -namespace Dynarmic::A64 { -struct UserCallbacks; -struct UserConfig; -} // namespace Dynarmic::A64 - -namespace Dynarmic::IR { -class Block; -} - -namespace Dynarmic::Optimization { - -struct PolyfillOptions { - bool sha256 = false; - bool vector_multiply_widen = false; - - bool operator==(const PolyfillOptions&) const = default; -}; - -struct A32GetSetEliminationOptions { - bool convert_nzc_to_nz = false; - bool convert_nz_to_nzc = false; -}; - -void PolyfillPass(IR::Block& block, const PolyfillOptions& opt); -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb); -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt); -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf); -void A64GetSetElimination(IR::Block& block); -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb); -void ConstantPropagation(IR::Block& block); -void DeadCodeElimination(IR::Block& block); -void IdentityRemovalPass(IR::Block& block); -void VerificationPass(const IR::Block& block); -void NamingPass(IR::Block& block); - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp deleted file mode 100644 index 1aa3aea91e..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2022 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -namespace { - -void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - - const IR::U128 t = ir.VectorExtract(x, y, 32); - - IR::U128 result = ir.ZeroVector(); - for (size_t i = 0; i < 4; i++) { - const IR::U32 modified_element = [&] { - const IR::U32 element = ir.VectorGetElement(32, t, i); - const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); - const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); - const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); - }(); - - result = ir.VectorSetElement(32, result, i, modified_element); - } - result = ir.VectorAdd(32, result, x); - - inst.ReplaceUsesWith(result); -} - -void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 z = (IR::U128)inst.GetArg(2); - - const IR::U128 T0 = ir.VectorExtract(y, z, 32); - - const IR::U128 lower_half = [&] { - const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); - const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); - return ir.VectorZeroUpper(tmp5); - }(); - - const IR::U64 upper_half = [&] { - const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - - // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] - const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); - const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); - - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); - return ir.VectorGetElement(64, tmp5, 0); - }(); - - const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); - - inst.ReplaceUsesWith(result); -} - -IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Eor(ir.And(ir.Eor(y, z), x), z); -} - -IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); -} - -IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 x = (IR::U128)inst.GetArg(0); - IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 w = (IR::U128)inst.GetArg(2); - const bool part1 = inst.GetArg(3).GetU1(); - - for (size_t i = 0; i < 4; i++) { - const IR::U32 low_x = ir.VectorGetElement(32, x, 0); - const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); - const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); - const IR::U32 high_x = ir.VectorGetElement(32, x, 3); - - const IR::U32 low_y = ir.VectorGetElement(32, y, 0); - const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); - const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); - const IR::U32 high_y = ir.VectorGetElement(32, y, 3); - - const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); - const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); - - const IR::U32 t = [&] { - const IR::U32 w_element = ir.VectorGetElement(32, w, i); - const IR::U32 sig = SHAhashSIGMA1(ir, low_y); - - return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); - }(); - - const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); - const IR::U32 new_low_y = ir.Add(t, high_x); - - // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] - const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); - const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); - - x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); - y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); - } - - inst.ReplaceUsesWith(part1 ? x : y); -} - -template -void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 n = (IR::U128)inst.GetArg(0); - IR::U128 m = (IR::U128)inst.GetArg(1); - - const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); - const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); - - const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); - - inst.ReplaceUsesWith(result); -} - -} // namespace - -void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { - if (polyfill == PolyfillOptions{}) { - return; - } - - IR::IREmitter ir{block}; - - for (auto& inst : block) { - ir.SetInsertionPointBefore(&inst); - - switch (inst.GetOpcode()) { - case IR::Opcode::SHA256MessageSchedule0: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule0(ir, inst); - } - break; - case IR::Opcode::SHA256MessageSchedule1: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule1(ir, inst); - } - break; - case IR::Opcode::SHA256Hash: - if (polyfill.sha256) { - PolyfillSHA256Hash(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, false>(ir, inst); - } - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp deleted file mode 100644 index c6c2cff231..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/type.h" - -namespace Dynarmic::Optimization { - -void VerificationPass(const IR::Block& block) { - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const IR::Type t1 = inst.GetArg(i).GetType(); - const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); - if (!IR::AreTypesCompatible(t1, t2)) { - std::puts(IR::DumpBlock(block).c_str()); - ASSERT_FALSE("above block failed validation"); - } - } - } - - ankerl::unordered_dense::map actual_uses; - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const auto arg = inst.GetArg(i); - if (!arg.IsImmediate()) { - actual_uses[arg.GetInst()]++; - } - } - } - - for (const auto& pair : actual_uses) { - ASSERT(pair.first->UseCount() == pair.second); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp new file mode 100644 index 0000000000..383b915839 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -0,0 +1,1502 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include + +#include +#include "boost/container/small_vector.hpp" +#include "dynarmic/frontend/A32/a32_ir_emitter.h" +#include "dynarmic/frontend/A32/a32_location_descriptor.h" +#include "dynarmic/frontend/A32/a32_types.h" +#include "dynarmic/frontend/A64/a64_ir_emitter.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/A32/config.h" +#include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/optimization_flags.h" +#include "dynarmic/common/safe_ops.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/microinstruction.h" +#include "dynarmic/ir/opcodes.h" +#include "dynarmic/ir/opt_passes.h" +#include "dynarmic/ir/type.h" +#include "mcl/bit/swap.hpp" +#include "mcl/bit/rotate.hpp" +#include "mcl/iterator/reverse.hpp" + +namespace Dynarmic::Optimization { + +static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { + for (auto& inst : block) { + switch (inst.GetOpcode()) { + case IR::Opcode::A32ReadMemory8: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory16: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory32: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory64: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + default: + break; + } + } +} + +static void FlagsPass(IR::Block& block) { + using Iterator = std::reverse_iterator; + + struct FlagInfo { + bool set_not_required = false; + bool has_value_request = false; + Iterator value_request = {}; + }; + struct ValuelessFlagInfo { + bool set_not_required = false; + }; + ValuelessFlagInfo nzcvq; + ValuelessFlagInfo nzcv; + ValuelessFlagInfo nz; + FlagInfo c_flag; + FlagInfo ge; + + auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(value); + } + info.has_value_request = false; + + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_get = [](FlagInfo& info, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(IR::Value{&*inst}); + } + info.has_value_request = true; + info.value_request = inst; + }; + + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetCFlag: { + do_get(c_flag, inst); + break; + } + case IR::Opcode::A32SetCpsrNZCV: { + if (c_flag.has_value_request) { + ir.SetInsertionPointBefore(inst.base()); // base is one ahead + IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); + c_flag.value_request->ReplaceUsesWith(c); + c_flag.has_value_request = false; + break; // This case will be executed again because of the above + } + + do_set_valueless(nzcv, inst); + + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVRaw: { + if (c_flag.has_value_request) { + nzcv.set_not_required = false; + } + + do_set_valueless(nzcv, inst); + + nzcvq = {}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVQ: { + if (c_flag.has_value_request) { + nzcvq.set_not_required = false; + } + + do_set_valueless(nzcvq, inst); + + nzcv = {.set_not_required = true}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZ: { + do_set_valueless(nz, inst); + + nzcvq = {}; + nzcv = {}; + break; + } + case IR::Opcode::A32SetCpsrNZC: { + if (c_flag.has_value_request) { + c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); + c_flag.has_value_request = false; + } + + if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { + const auto nz_value = inst->GetArg(0); + + inst->Invalidate(); + + ir.SetInsertionPointBefore(inst.base()); + ir.SetCpsrNZ(IR::NZCV{nz_value}); + + nzcvq = {}; + nzcv = {}; + nz = {.set_not_required = true}; + break; + } + + if (nz.set_not_required && c_flag.set_not_required) { + inst->Invalidate(); + } else if (nz.set_not_required) { + inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); + } + nz.set_not_required = true; + c_flag.set_not_required = true; + + nzcv = {}; + nzcvq = {}; + break; + } + case IR::Opcode::A32SetGEFlags: { + do_set(ge, inst->GetArg(0), inst); + break; + } + case IR::Opcode::A32GetGEFlags: { + do_get(ge, inst); + break; + } + case IR::Opcode::A32SetGEFlagsCompressed: { + ge = {.set_not_required = true}; + break; + } + case IR::Opcode::A32OrQFlag: { + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcvq = {}; + nzcv = {}; + nz = {}; + c_flag = {}; + ge = {}; + } + break; + } + } + } +} + +static void RegisterPass(IR::Block& block) { + using Iterator = IR::Block::iterator; + + struct RegInfo { + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array reg_info; + + const auto do_get = [](RegInfo& info, Iterator get_inst) { + if (info.register_value.IsEmpty()) { + info.register_value = IR::Value(&*get_inst); + return; + } + get_inst->ReplaceUsesWith(info.register_value); + }; + + const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { + if (info.last_set_instruction) { + (*info.last_set_instruction)->Invalidate(); + } + info = { + .register_value = value, + .last_set_instruction = set_inst, + }; + }; + + enum class ExtValueType { + Empty, + Single, + Double, + VectorDouble, + VectorQuad, + }; + struct ExtRegInfo { + ExtValueType value_type = {}; + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array ext_reg_info; + + const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { + if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = IR::Value(&*get_inst), + .last_set_instruction = std::nullopt, + }; + } + return; + } + get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); + }; + + const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { + if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + if (std::data(infos)[0].get().last_set_instruction) { + (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); + } + } + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = value, + .last_set_instruction = set_inst, + }; + } + }; + + // Location and version don't matter here. + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + ASSERT(reg != A32::Reg::PC); + const size_t reg_index = size_t(reg); + do_get(reg_info[reg_index], inst); + break; + } + case IR::Opcode::A32SetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + if (reg == A32::Reg::PC) { + break; + } + const auto reg_index = size_t(reg); + do_set(reg_info[reg_index], inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); + break; + } + case IR::Opcode::A32SetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + break; + } + case IR::Opcode::A32SetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst->GetArg(1), + inst); + break; + } + case IR::Opcode::A32GetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + do_ext_get(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_get(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst); + } + break; + } + case IR::Opcode::A32SetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + ir.SetInsertionPointAfter(inst); + const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); + do_ext_set(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + stored_value, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_set(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst->GetArg(1), + inst); + } + break; + } + default: { + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + ext_reg_info = {}; + } + break; + } + } + } +} + +struct A32GetSetEliminationOptions { + bool convert_nzc_to_nz = false; + bool convert_nz_to_nzc = false; +}; + +static void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { + FlagsPass(block); + RegisterPass(block); +} + +static void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { + if (conf.hook_data_cache_operations) { + return; + } + + for (auto& inst : block) { + if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { + continue; + } + + const auto op = static_cast(inst.GetArg(1).GetU64()); + if (op == A64::DataCacheOperation::ZeroByVA) { + A64::IREmitter ir{block}; + ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; + ir.SetInsertionPointBefore(&inst); + + size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); + IR::U64 addr{inst.GetArg(2)}; + + const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); + while (bytes >= 16) { + ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(16)); + bytes -= 16; + } + + while (bytes >= 8) { + ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(8)); + bytes -= 8; + } + + while (bytes >= 4) { + ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(4)); + bytes -= 4; + } + } + inst.Invalidate(); + } +} + +static void A64GetSetElimination(IR::Block& block) { + using Iterator = IR::Block::iterator; + + enum class TrackingType { + W, + X, + S, + D, + Q, + SP, + NZCV, + NZCVRaw, + }; + struct RegisterInfo { + IR::Value register_value; + TrackingType tracking_type; + bool set_instruction_present = false; + Iterator last_set_instruction; + }; + std::array reg_info; + std::array vec_info; + RegisterInfo sp_info; + RegisterInfo nzcv_info; + + const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { + if (info.set_instruction_present) { + info.last_set_instruction->Invalidate(); + block.Instructions().erase(info.last_set_instruction); + } + + info.register_value = value; + info.tracking_type = tracking_type; + info.set_instruction_present = true; + info.last_set_instruction = set_inst; + }; + + const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { + const auto do_nothing = [&] { + info = {}; + info.register_value = IR::Value(&*get_inst); + info.tracking_type = tracking_type; + }; + + if (info.register_value.IsEmpty()) { + do_nothing(); + return; + } + + if (info.tracking_type == tracking_type) { + get_inst->ReplaceUsesWith(info.register_value); + return; + } + + do_nothing(); + }; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A64GetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::W); + break; + } + case IR::Opcode::A64GetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::X); + break; + } + case IR::Opcode::A64GetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::S); + break; + } + case IR::Opcode::A64GetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::D); + break; + } + case IR::Opcode::A64GetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64GetSP: { + do_get(sp_info, inst, TrackingType::SP); + break; + } + case IR::Opcode::A64GetNZCVRaw: { + do_get(nzcv_info, inst, TrackingType::NZCVRaw); + break; + } + case IR::Opcode::A64SetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); + break; + } + case IR::Opcode::A64SetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); + break; + } + case IR::Opcode::A64SetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); + break; + } + case IR::Opcode::A64SetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); + break; + } + case IR::Opcode::A64SetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64SetSP: { + do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); + break; + } + case IR::Opcode::A64SetNZCV: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); + break; + } + case IR::Opcode::A64SetNZCVRaw: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcv_info = {}; + } + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + vec_info = {}; + sp_info = {}; + } + break; + } + } + } +} + +static void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { + const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { + const auto instruction = cb->MemoryReadCode(location.PC()); + if (!instruction) + return false; + + IR::Block new_block{location}; + A64::TranslateSingleInstruction(new_block, location, *instruction); + + if (!new_block.Instructions().empty()) + return false; + + const IR::Terminal terminal = new_block.GetTerminal(); + if (auto term = boost::get(&terminal)) { + return term->next == location; + } + + return false; + }; + + IR::Terminal terminal = block.GetTerminal(); + auto term = boost::get(&terminal); + if (!term) + return; + + A64::LocationDescriptor location{term->next}; + size_t num_instructions = 1; + + while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { + num_instructions++; + } + + term->num_instructions = num_instructions; + block.ReplaceTerminal(terminal); + block.CycleCount() += num_instructions - 1; +} + +using Op = Dynarmic::IR::Opcode; + +// Tiny helper to avoid the need to store based off the opcode +// bit size all over the place within folding functions. +static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { + if (is_32_bit) { + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + } else { + inst.ReplaceUsesWith(IR::Value{value}); + } +} + +static IR::Value Value(bool is_32_bit, u64 value) { + return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; +} + +template +static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + + const bool is_lhs_immediate = lhs.IsImmediate(); + const bool is_rhs_immediate = rhs.IsImmediate(); + + if (is_lhs_immediate && is_rhs_immediate) { + const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); + ReplaceUsesWith(inst, is_32_bit, result); + return false; + } + + if (is_lhs_immediate && !is_rhs_immediate) { + const IR::Inst* rhs_inst = rhs.GetInstRecursive(); + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, rhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } + } + + return true; +} + +static void FoldAdd(IR::Inst& inst, bool is_32_bit) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + if (lhs.IsImmediate() && !rhs.IsImmediate()) { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + FoldAdd(inst, is_32_bit); + return; + } + + if (inst.HasAssociatedPseudoOperation()) { + return; + } + + if (!lhs.IsImmediate() && rhs.IsImmediate()) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { + const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); + if (combined == 0) { + inst.ReplaceUsesWith(lhs_inst->GetArg(0)); + return; + } + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + return; + } + if (rhs.IsZero() && carry.IsZero()) { + inst.ReplaceUsesWith(lhs); + return; + } + } + + if (inst.AreAllArgsImmediates()) { + const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); + return; + } +} + +/// Folds AND operations based on the following: +/// +/// 1. imm_x & imm_y -> result +/// 2. x & 0 -> 0 +/// 3. 0 & y -> 0 +/// 4. x & y -> y (where x has all bits set to 1) +/// 5. x & y -> x (where y has all bits set to 1) +/// +static void FoldAND(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.HasAllBitsSet()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +/// Folds byte reversal opcodes based on the following: +/// +/// 1. imm -> swap(imm) +/// +static void FoldByteReverse(IR::Inst& inst, Op op) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + if (op == Op::ByteReverseWord) { + const u32 result = mcl::bit::swap_bytes_32(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else if (op == Op::ByteReverseHalf) { + const u16 result = mcl::bit::swap_bytes_16(u16(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } +} + +/// Folds division operations based on the following: +/// +/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) +/// 2. imm_x / imm_y -> result +/// 3. x / 1 -> x +/// +static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { + const auto rhs = inst.GetArg(1); + + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + return; + } + + const auto lhs = inst.GetArg(0); + if (lhs.IsImmediate() && rhs.IsImmediate()) { + if (is_signed) { + const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); + ReplaceUsesWith(inst, is_32_bit, static_cast(result)); + } else { + const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); + } + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(IR::Value{lhs}); + } +} + +// Folds EOR operations based on the following: +// +// 1. imm_x ^ imm_y -> result +// 2. x ^ 0 -> x +// 3. 0 ^ y -> y +// +static void FoldEOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static void FoldLeastSignificantByte(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantHalf(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldMostSignificantBit(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); +} + +static void FoldMostSignificantWord(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + if (carry_inst) { + carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); + } + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); +} + +// Folds multiplication operations based on the following: +// +// 1. imm_x * imm_y -> result +// 2. x * 0 -> 0 +// 3. 0 * y -> 0 +// 4. x * 1 -> x +// 5. 1 * y -> y +// +static void FoldMultiply(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +// Folds NOT operations if the contained value is an immediate. +static void FoldNOT(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + const u64 result = ~operand.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +// Folds OR operations based on the following: +// +// 1. imm_x | imm_y -> result +// 2. x | 0 -> x +// 3. 0 | y -> y +// +static void FoldOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static bool FoldShifts(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + // The 32-bit variants can contain 3 arguments, while the + // 64-bit variants only contain 2. + if (inst.NumArgs() == 3 && !carry_inst) { + inst.SetArg(2, IR::Value(false)); + } + + const auto shift_amount = inst.GetArg(1); + + if (shift_amount.IsZero()) { + if (carry_inst) { + carry_inst->ReplaceUsesWith(inst.GetArg(2)); + } + inst.ReplaceUsesWith(inst.GetArg(0)); + return false; + } + + if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { + inst.SetArg(2, IR::Value(false)); + } + + if (!inst.AreAllArgsImmediates() || carry_inst) { + return false; + } + + return true; +} + +static void FoldSignExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSignExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSub(IR::Inst& inst, bool is_32_bit) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return; + } + + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +static void FoldZeroExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldZeroExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{value}); +} + +static void ConstantPropagation(IR::Block& block) { + for (auto& inst : block) { + const auto opcode = inst.GetOpcode(); + + switch (opcode) { + case Op::LeastSignificantWord: + FoldLeastSignificantWord(inst); + break; + case Op::MostSignificantWord: + FoldMostSignificantWord(inst); + break; + case Op::LeastSignificantHalf: + FoldLeastSignificantHalf(inst); + break; + case Op::LeastSignificantByte: + FoldLeastSignificantByte(inst); + break; + case Op::MostSignificantBit: + FoldMostSignificantBit(inst); + break; + case Op::IsZero32: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); + } + break; + case Op::IsZero64: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); + } + break; + case Op::LogicalShiftLeft32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeft64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeftMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftLeftMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::LogicalShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::ArithmeticShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::ArithmeticShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::RotateRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); + } + break; + case Op::RotateRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); + } + break; + case Op::Add32: + case Op::Add64: + FoldAdd(inst, opcode == Op::Add32); + break; + case Op::Sub32: + case Op::Sub64: + FoldSub(inst, opcode == Op::Sub32); + break; + case Op::Mul32: + case Op::Mul64: + FoldMultiply(inst, opcode == Op::Mul32); + break; + case Op::SignedDiv32: + case Op::SignedDiv64: + FoldDivide(inst, opcode == Op::SignedDiv32, true); + break; + case Op::UnsignedDiv32: + case Op::UnsignedDiv64: + FoldDivide(inst, opcode == Op::UnsignedDiv32, false); + break; + case Op::And32: + case Op::And64: + FoldAND(inst, opcode == Op::And32); + break; + case Op::Eor32: + case Op::Eor64: + FoldEOR(inst, opcode == Op::Eor32); + break; + case Op::Or32: + case Op::Or64: + FoldOR(inst, opcode == Op::Or32); + break; + case Op::Not32: + case Op::Not64: + FoldNOT(inst, opcode == Op::Not32); + break; + case Op::SignExtendByteToWord: + case Op::SignExtendHalfToWord: + FoldSignExtendXToWord(inst); + break; + case Op::SignExtendByteToLong: + case Op::SignExtendHalfToLong: + case Op::SignExtendWordToLong: + FoldSignExtendXToLong(inst); + break; + case Op::ZeroExtendByteToWord: + case Op::ZeroExtendHalfToWord: + FoldZeroExtendXToWord(inst); + break; + case Op::ZeroExtendByteToLong: + case Op::ZeroExtendHalfToLong: + case Op::ZeroExtendWordToLong: + FoldZeroExtendXToLong(inst); + break; + case Op::ByteReverseWord: + case Op::ByteReverseHalf: + case Op::ByteReverseDual: + FoldByteReverse(inst, opcode); + break; + default: + break; + } + } +} + +static void DeadCodeElimination(IR::Block& block) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (auto& inst : mcl::iterator::reverse(block)) { + if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { + inst.Invalidate(); + } + } +} + +static void IdentityRemovalPass(IR::Block& block) { + boost::container::small_vector to_invalidate; + + auto iter = block.begin(); + while (iter != block.end()) { + IR::Inst& inst = *iter; + + const size_t num_args = inst.NumArgs(); + for (size_t i = 0; i < num_args; i++) { + while (true) { + IR::Value arg = inst.GetArg(i); + if (!arg.IsIdentity()) + break; + inst.SetArg(i, arg.GetInst()->GetArg(0)); + } + } + + if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { + iter = block.Instructions().erase(inst); + to_invalidate.push_back(&inst); + } else { + ++iter; + } + } + for (IR::Inst* inst : to_invalidate) { + inst->Invalidate(); + } +} + +static void NamingPass(IR::Block& block) { + u32 name = 1; + for (auto& inst : block) + inst.SetName(name++); +} + +static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + + const IR::U128 t = ir.VectorExtract(x, y, 32); + + IR::U128 result = ir.ZeroVector(); + for (size_t i = 0; i < 4; i++) { + const IR::U32 modified_element = [&] { + const IR::U32 element = ir.VectorGetElement(32, t, i); + const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); + const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); + const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); + }(); + + result = ir.VectorSetElement(32, result, i, modified_element); + } + result = ir.VectorAdd(32, result, x); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 z = (IR::U128)inst.GetArg(2); + + const IR::U128 T0 = ir.VectorExtract(y, z, 32); + + const IR::U128 lower_half = [&] { + const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); + const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); + return ir.VectorZeroUpper(tmp5); + }(); + + const IR::U64 upper_half = [&] { + const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + + // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] + const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); + const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); + + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); + return ir.VectorGetElement(64, tmp5, 0); + }(); + + const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); + + inst.ReplaceUsesWith(result); +} + +static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Eor(ir.And(ir.Eor(y, z), x), z); +} + +static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); +} + +static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 x = (IR::U128)inst.GetArg(0); + IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 w = (IR::U128)inst.GetArg(2); + const bool part1 = inst.GetArg(3).GetU1(); + + for (size_t i = 0; i < 4; i++) { + const IR::U32 low_x = ir.VectorGetElement(32, x, 0); + const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); + const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); + const IR::U32 high_x = ir.VectorGetElement(32, x, 3); + + const IR::U32 low_y = ir.VectorGetElement(32, y, 0); + const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); + const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); + const IR::U32 high_y = ir.VectorGetElement(32, y, 3); + + const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); + const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); + + const IR::U32 t = [&] { + const IR::U32 w_element = ir.VectorGetElement(32, w, i); + const IR::U32 sig = SHAhashSIGMA1(ir, low_y); + + return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); + }(); + + const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); + const IR::U32 new_low_y = ir.Add(t, high_x); + + // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] + const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); + const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); + + x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); + y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); + } + + inst.ReplaceUsesWith(part1 ? x : y); +} + +template +static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 n = (IR::U128)inst.GetArg(0); + IR::U128 m = (IR::U128)inst.GetArg(1); + + const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); + const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); + + const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { + if (polyfill == PolyfillOptions{}) { + return; + } + + IR::IREmitter ir{block}; + + for (auto& inst : block) { + ir.SetInsertionPointBefore(&inst); + + switch (inst.GetOpcode()) { + case IR::Opcode::SHA256MessageSchedule0: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule0(ir, inst); + } + break; + case IR::Opcode::SHA256MessageSchedule1: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule1(ir, inst); + } + break; + case IR::Opcode::SHA256Hash: + if (polyfill.sha256) { + PolyfillSHA256Hash(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, false>(ir, inst); + } + break; + default: + break; + } + } +} + +static void VerificationPass(const IR::Block& block) { + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) { + const IR::Type t1 = inst.GetArg(i).GetType(); + const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); + ASSERT_MSG(IR::AreTypesCompatible(t1, t2), "Block failed:\n{}", IR::DumpBlock(block)); + } + } + ankerl::unordered_dense::map actual_uses; + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) + if (IR::Value const arg = inst.GetArg(i); !arg.IsImmediate()) + actual_uses[arg.GetInst()]++; + } + for (auto const& pair : actual_uses) + ASSERT(pair.first->UseCount() == pair.second); +} + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) [[likely]] { + Optimization::A32GetSetElimination(block, {.convert_nzc_to_nz = true}); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + Optimization::IdentityRemovalPass(block); + Optimization::VerificationPass(block); +} + +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::A64CallbackConfigPass(block, conf); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) [[likely]] { + Optimization::A64GetSetElimination(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { + Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); + } + Optimization::VerificationPass(block); +} + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h new file mode 100644 index 0000000000..1963fad0a0 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -0,0 +1,34 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +namespace Dynarmic::A32 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::A64 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::IR { +class Block; +} + +namespace Dynarmic::Optimization { + +struct PolyfillOptions { + bool sha256 = false; + bool vector_multiply_widen = false; + + bool operator==(const PolyfillOptions&) const = default; +}; + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index 4d14141bbf..ad01e5718b 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -29,7 +29,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; @@ -179,13 +179,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 85d86c7966..29a8f23478 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -6,33 +6,24 @@ add_executable(dynarmic_tests fp/mantissa_util_tests.cpp fp/unpacked_tests.cpp rand_int.h + # A32 + A32/test_arm_disassembler.cpp + A32/test_arm_instructions.cpp + A32/test_coprocessor.cpp + A32/test_svc.cpp + A32/test_thumb_instructions.cpp + A32/testenv.h + decoder_tests.cpp + # A64 + A64/a64.cpp + A64/fibonacci.cpp + A64/fp_min_max.cpp + A64/misaligned_page_table.cpp + A64/test_invalidation.cpp + A64/real_world.cpp + A64/testenv.h ) - -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/test_arm_disassembler.cpp - A32/test_arm_instructions.cpp - A32/test_coprocessor.cpp - A32/test_svc.cpp - A32/test_thumb_instructions.cpp - A32/testenv.h - decoder_tests.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) - - target_sources(dynarmic_tests PRIVATE - A64/a64.cpp - A64/fibonacci.cpp - A64/fp_min_max.cpp - A64/misaligned_page_table.cpp - A64/test_invalidation.cpp - A64/real_world.cpp - A64/testenv.h - ) -endif() +target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) if (DYNARMIC_TESTS_USE_UNICORN) target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn) @@ -40,25 +31,17 @@ if (DYNARMIC_TESTS_USE_UNICORN) target_sources(dynarmic_tests PRIVATE fuzz_util.cpp fuzz_util.h + # A32 + A32/fuzz_arm.cpp + A32/fuzz_thumb.cpp + unicorn_emu/a32_unicorn.cpp + unicorn_emu/a32_unicorn.h + # A64 + A64/fuzz_with_unicorn.cpp + A64/verify_unicorn.cpp + unicorn_emu/a64_unicorn.cpp + unicorn_emu/a64_unicorn.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/fuzz_arm.cpp - A32/fuzz_thumb.cpp - unicorn_emu/a32_unicorn.cpp - unicorn_emu/a32_unicorn.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A64/fuzz_with_unicorn.cpp - A64/verify_unicorn.cpp - unicorn_emu/a64_unicorn.cpp - unicorn_emu/a64_unicorn.h - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -69,9 +52,6 @@ if ("x86_64" IN_LIST ARCHITECTURE) target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak) target_architecture_specific_sources(dynarmic_tests "x86_64" x64_cpu_info.cpp - ) - - target_architecture_specific_sources(dynarmic_tests "x86_64" native/preserve_xmm.cpp ) @@ -85,47 +65,47 @@ endif() include(CreateDirectoryGroups) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_print_info - print_info.cpp - ) +# +# dynarmic_print_info +# +add_executable(dynarmic_print_info + print_info.cpp +) +create_target_directory_groups(dynarmic_print_info) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_print_info PRIVATE . ../src) +target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_print_info) +# +# dynarmic_test_generator +# +add_executable(dynarmic_test_generator + fuzz_util.cpp + fuzz_util.h + test_generator.cpp +) - target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_print_info PRIVATE . ../src) - target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +create_target_directory_groups(dynarmic_test_generator) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_generator - fuzz_util.cpp - fuzz_util.h - test_generator.cpp - ) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_generator PRIVATE . ../src) +target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_test_generator) - - target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_generator PRIVATE . ../src) - target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() - -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_reader - test_reader.cpp - ) - - create_target_directory_groups(dynarmic_test_reader) - - target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_reader PRIVATE . ../src) - target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +# +# dynarmic_test_reader +# +add_executable(dynarmic_test_reader + test_reader.cpp +) +create_target_directory_groups(dynarmic_test_reader) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_reader PRIVATE . ../src) +target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) +# create_target_directory_groups(dynarmic_tests) target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index ef8b87bbd1..3263ca729a 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -34,7 +34,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; From c30eadcb0df2fdb21878759d344ca0e9aaf9dc91 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:57:48 +0000 Subject: [PATCH 02/17] Fix license headers --- src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index 8e48fa3687..01af361b27 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index 2b50ad9ea3..c4c1c42792 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h index 1963fad0a0..88b8020031 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.h +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD From f8040d3b523f42f057923e16c375c73a41b93ef9 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 00:11:40 +0000 Subject: [PATCH 03/17] [dynarmic] use small vector experiment Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index f70329f471..df1619d552 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -12,6 +12,7 @@ #include #include +#include "boost/container/small_vector.hpp" #include "dynarmic/common/common_types.h" #include #include @@ -83,7 +84,7 @@ public: void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial - std::vector values; //24 + boost::container::small_vector values; //24 // Block state uint16_t total_uses = 0; //8 //sometimes zeroed @@ -96,7 +97,7 @@ private: uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - alignas(16) uint8_t lru_counter = 0; //1 + uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From da67f58a51baf9eb131dff8da8f9b7aeda8ea860 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:07 +0000 Subject: [PATCH 04/17] [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index fa6006ed2a..014c121be5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.push_back(inst); ASSERT(size_t(total_uses) + inst->UseCount() < std::numeric_limits::max()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); + max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); } void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index df1619d552..160c2ef098 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -78,7 +78,7 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } inline size_t GetMaxBitWidth() const noexcept { - return max_bit_width; + return 1 << max_bit_width; } void AddValue(IR::Inst* inst) noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; @@ -94,10 +94,10 @@ private: uint16_t is_being_used_count = 0; //8 uint16_t current_references = 0; //8 // Value state - uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 + uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6) + uint8_t lru_counter : 2 = 0; //1 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From 7243704623a8ed862a38927a4a1a4b14b3cacf06 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:22 +0000 Subject: [PATCH 05/17] [dynarmic] fix tests_reader and tests_generator Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 2 +- src/dynarmic/tests/A32/testenv.h | 1 - src/dynarmic/tests/A64/testenv.h | 1 - src/dynarmic/tests/CMakeLists.txt | 28 ++++++++++++++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 6948bf39fb..dbd5926ee9 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -371,7 +371,7 @@ target_link_libraries(dynarmic ) if (BOOST_NO_HEADERS) -target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) + target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) else() target_link_libraries(dynarmic PRIVATE Boost::headers) endif() diff --git a/src/dynarmic/tests/A32/testenv.h b/src/dynarmic/tests/A32/testenv.h index a6df2017ce..72eaafce14 100644 --- a/src/dynarmic/tests/A32/testenv.h +++ b/src/dynarmic/tests/A32/testenv.h @@ -17,7 +17,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A32/a32.h" -#include "../native/testenv.h" template class A32TestEnv : public Dynarmic::A32::UserCallbacks { diff --git a/src/dynarmic/tests/A64/testenv.h b/src/dynarmic/tests/A64/testenv.h index 31e338b138..fcdadb23e6 100644 --- a/src/dynarmic/tests/A64/testenv.h +++ b/src/dynarmic/tests/A64/testenv.h @@ -12,7 +12,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A64/a64.h" -#include "../native/testenv.h" using Vector = Dynarmic::A64::Vector; diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 29a8f23478..f8f420e2d6 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -72,7 +72,12 @@ add_executable(dynarmic_print_info print_info.cpp ) create_target_directory_groups(dynarmic_print_info) -target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_print_info PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_print_info PRIVATE . ../src) target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -88,7 +93,12 @@ add_executable(dynarmic_test_generator create_target_directory_groups(dynarmic_test_generator) -target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_generator PRIVATE . ../src) target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -100,7 +110,12 @@ add_executable(dynarmic_test_reader test_reader.cpp ) create_target_directory_groups(dynarmic_test_reader) -target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_reader PRIVATE . ../src) target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -108,7 +123,12 @@ target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LIT # create_target_directory_groups(dynarmic_tests) -target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) +target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_tests PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_tests PRIVATE . ../src) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) From 75c9ba2adb20326d4f23b9bd173d065709b15fa5 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 09:10:18 +0000 Subject: [PATCH 06/17] [docs] fastmem draft Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 5 +++++ src/dynarmic/docs/Fastmem.svg | 4 ++++ src/dynarmic/docs/HostToGuest.svg | 4 ++++ 3 files changed, 13 insertions(+) create mode 100644 src/dynarmic/docs/FastMemory.md create mode 100644 src/dynarmic/docs/Fastmem.svg create mode 100644 src/dynarmic/docs/HostToGuest.svg diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md new file mode 100644 index 0000000000..2a7c1a2583 --- /dev/null +++ b/src/dynarmic/docs/FastMemory.md @@ -0,0 +1,5 @@ +# Fast memory (Fastmem) + +![Host to guest translation](./HostToGuest.svg) + +![Fastmem translation](./Fastmem.svg) diff --git a/src/dynarmic/docs/Fastmem.svg b/src/dynarmic/docs/Fastmem.svg new file mode 100644 index 0000000000..a3ed0bb68b --- /dev/null +++ b/src/dynarmic/docs/Fastmem.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
SIGSEGV Trap
Fastmem
Only needs to linearly offset from fastmem arena
Less codegen (SIGSEGV traps)
Is fast only if SIGSEGV handlers are sufficiently fast
\ No newline at end of file diff --git a/src/dynarmic/docs/HostToGuest.svg b/src/dynarmic/docs/HostToGuest.svg new file mode 100644 index 0000000000..6a15a44b46 --- /dev/null +++ b/src/dynarmic/docs/HostToGuest.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
Resolver
Host to Guest translation
Looks up correct PTE
Translates each address 
Is slow
\ No newline at end of file From c21eea1304bbdf78090b9affc42bae90a5dcfeca Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 10:16:22 +0000 Subject: [PATCH 07/17] [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 6 +- .../frontend/decoder/decoder_detail.h | 65 ++++++++----------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 2c2c54a1ad..3f7bf7f967 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,12 +210,8 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 438b7b691c..682b0cfbf6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,12 +269,8 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index bd76efda2a..4050033ef5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -70,11 +70,9 @@ struct detail { return std::make_tuple(mask, expect); } - /** - * Generates the masks and shifts for each argument. - * A '-' in a bitstring indicates that we don't care about that value. - * An argument is specified by a continuous string of the same character. - */ + /// @brief Generates the masks and shifts for each argument. + /// A '-' in a bitstring indicates that we don't care about that value. + /// An argument is specified by a continuous string of the same character. template static consteval auto GetArgInfo(std::array bitstring) { std::array masks = {}; @@ -98,7 +96,6 @@ struct detail { if constexpr (N > 0) { const size_t bit_position = opcode_bitsize - i - 1; - if (arg_index >= N) throw std::out_of_range("Unexpected field"); @@ -109,20 +106,16 @@ struct detail { } } } - #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) // Avoids a MSVC ICE, and avoids Android NDK issue. ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); #endif - return std::make_tuple(masks, shifts); } - /** - * This struct's Make member function generates a lambda which decodes an instruction based on - * the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a - * template argument. - */ + /// @brief This struct's Make member function generates a lambda which decodes an instruction + /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is + /// provided as a template argument. template struct VisitorCaller; @@ -130,36 +123,36 @@ struct detail { # pragma warning(push) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) #endif - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...), + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...), const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...) const, + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...) const, const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; @@ -167,22 +160,16 @@ struct detail { # pragma warning(pop) #endif - /** - * Creates a matcher that can match and parse instructions based on bitstring. - * See also: GetMaskAndExpect and GetArgInfo for format of bitstring. - */ - template - static auto GetMatcher(FnT fn, const char* const name) { - constexpr size_t args_count = mcl::parameter_count_v; - + /// @brief Creates a matcher that can match and parse instructions based on bitstring. + /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. + template + static constexpr auto GetMatcher(F fn, const char* const name) { + constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); - - using Iota = std::make_index_sequence; - - const auto proxy_fn = VisitorCaller::Make(Iota(), fn, arg_masks, arg_shifts); + const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); return MatcherT(name, mask, expect, proxy_fn); } }; From 8a79bba0d483deeb3a0d6e0c23eeacf29e94dced Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 4 Sep 2025 07:24:49 +0000 Subject: [PATCH 08/17] [dynarmic] reduce matcher table noise and cache misses Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/a32_types.h | 21 +++++---------- .../src/dynarmic/frontend/A32/decoder/asimd.h | 27 +++++++++---------- .../src/dynarmic/frontend/A32/decoder/vfp.h | 12 +++------ .../src/dynarmic/frontend/A64/decoder/a64.h | 23 +++++++--------- .../frontend/decoder/decoder_detail.h | 6 ++--- .../src/dynarmic/frontend/decoder/matcher.h | 11 ++------ 6 files changed, 38 insertions(+), 62 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index fdadef8257..2a0cc25751 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -9,12 +9,9 @@ #pragma once #include -#include - #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/ir/cond.h" @@ -89,23 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) { inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); - return static_cast(reg); + return size_t(reg); } inline size_t RegNumber(ExtReg reg) { if (IsSingleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::S0); + return size_t(reg) - size_t(ExtReg::S0); + } else if (IsDoubleExtReg(reg)) { + return size_t(reg) - size_t(ExtReg::D0); } - - if (IsDoubleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::D0); - } - - if (IsQuadExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::Q0); - } - - ASSERT_MSG(false, "Invalid extended register"); + ASSERT(IsQuadExtReg(reg)); + return size_t(reg) - size_t(ExtReg::Q0); } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f2e206695b..f63816d2b1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher; template std::vector> GetASIMDDecodeTable() { - std::vector> table = { - -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> table = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" #undef INST - }; - // Exceptions to the rule of thumb. const std::set comes_first{ "VBIC, VMOV, VMVN, VORR (immediate)", @@ -53,19 +50,21 @@ std::vector> GetASIMDDecodeTable() { "VQDMULH (scalar)", "VQRDMULH (scalar)", }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_first.count(matcher.GetName()) > 0; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_last.count(matcher.GetName()) == 0; + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - - return table; + std::vector> final_table; + std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + return e.second; + }); + return final_table; } template diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index 5fcacd2bda..a969570547 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -30,22 +30,18 @@ std::optional>> DecodeVFP(u32 instruc static const struct Tables { Table unconditional; Table conditional; - } tables = [] { + } tables = []() { Table list = { - #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./vfp.inc" #undef INST - }; - - const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return (matcher.GetMask() & 0xF0000000) == 0xF0000000; }); - return Tables{ - Table{list.begin(), division}, - Table{division, list.end()}, + Table{list.begin(), it}, + Table{it, list.end()}, }; }(); diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c651dd7cde..c7bc981ac6 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) { template constexpr DecodeTable GetDecodeTable() { - std::vector> list = { -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" #undef INST }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { return std::set{ "MOVI, MVNI, ORR, BIC (vector, immediate)", "FMOV (vector, immediate)", "Unallocated SIMD modified immediate", - }.count(matcher.GetName()) > 0; + }.count(e.first) > 0; }); - DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); + for (auto const& e : list) { + const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e.second); } } } diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 4050033ef5..0d9da6169c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -163,18 +163,18 @@ struct detail { /// @brief Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template - static constexpr auto GetMatcher(F fn, const char* const name) { + static constexpr auto GetMatcher(F fn) { constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); - return MatcherT(name, mask, expect, proxy_fn); + return MatcherT(mask, expect, proxy_fn); } }; -#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn, name) +#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn) } // namespace detail } // namespace Dynarmic::Decoder diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 7e5c9c5a8f..5a2afe57c5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -31,14 +31,8 @@ public: using visitor_type = Visitor; using handler_return_type = typename Visitor::instruction_return_type; using handler_function = std::function; - - Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) - : name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {} - - /// Gets the name of this type of instruction. - const char* GetName() const { - return name; - } + Matcher(opcode_type mask, opcode_type expected, handler_function func) + : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. opcode_type GetMask() const { @@ -70,7 +64,6 @@ public: } private: - const char* name; opcode_type mask; opcode_type expected; handler_function fn; From 718891d11f53a8496ce1462ce37a3c0d4083ba33 Mon Sep 17 00:00:00 2001 From: Maufeat Date: Fri, 5 Sep 2025 00:04:37 +0200 Subject: [PATCH 09/17] [fs] temporarely disable nca verification (#298) This adds a passthrough to basically disable nca verification for newer NCAs, this fixes (tested) Pokemon 4.0.0 update and other newer SDK games and updates (as reported on the discord) This is implemented as toggle that is default enabled, this needs proper implementation in the future. Co-authored-by: crueter Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/298 Reviewed-by: MaranBr Reviewed-by: crueter Co-authored-by: Maufeat Co-committed-by: Maufeat --- src/android/app/build.gradle.kts | 4 +- .../org/yuzu/yuzu_emu/adapters/GameAdapter.kt | 22 ++ .../features/settings/model/BooleanSetting.kt | 1 + .../features/settings/model/Settings.kt | 1 + .../settings/model/view/SettingsItem.kt | 8 +- .../settings/ui/SettingsFragmentPresenter.kt | 1 + .../org/yuzu/yuzu_emu/model/HomeViewModel.kt | 7 - .../org/yuzu/yuzu_emu/ui/main/MainActivity.kt | 34 --- src/android/app/src/main/jni/native.cpp | 2 + .../app/src/main/res/values-ar/strings.xml | 2 + .../app/src/main/res/values-ckb/strings.xml | 2 + .../app/src/main/res/values-cs/strings.xml | 2 + .../app/src/main/res/values-de/strings.xml | 2 + .../app/src/main/res/values-es/strings.xml | 2 + .../app/src/main/res/values-fa/strings.xml | 2 + .../app/src/main/res/values-fr/strings.xml | 2 + .../app/src/main/res/values-he/strings.xml | 2 + .../app/src/main/res/values-hu/strings.xml | 2 + .../app/src/main/res/values-id/strings.xml | 2 + .../app/src/main/res/values-it/strings.xml | 2 + .../app/src/main/res/values-ja/strings.xml | 2 + .../app/src/main/res/values-ko/strings.xml | 2 + .../app/src/main/res/values-nb/strings.xml | 2 + .../app/src/main/res/values-pl/strings.xml | 2 + .../src/main/res/values-pt-rBR/strings.xml | 2 + .../src/main/res/values-pt-rPT/strings.xml | 2 + .../app/src/main/res/values-ru/strings.xml | 2 + .../app/src/main/res/values-sr/strings.xml | 2 + .../app/src/main/res/values-uk/strings.xml | 2 + .../app/src/main/res/values-vi/strings.xml | 2 + .../src/main/res/values-zh-rCN/strings.xml | 2 + .../src/main/res/values-zh-rTW/strings.xml | 2 + .../app/src/main/res/values/strings.xml | 5 + src/audio_core/common/feature_support.h | 2 +- src/common/settings.h | 9 +- src/core/CMakeLists.txt | 3 + .../fssystem/fssystem_bucket_tree.cpp | 9 +- .../fssystem_hierarchical_sha256_storage.cpp | 20 +- .../fssystem_hierarchical_sha3_storage.cpp | 57 +++++ .../fssystem_hierarchical_sha3_storage.h | 44 ++++ .../fssystem_nca_file_system_driver.cpp | 221 +++++++++++++----- .../fssystem_nca_file_system_driver.h | 7 + .../file_sys/fssystem/fssystem_nca_header.cpp | 9 +- .../fssystem/fssystem_passthrough_storage.h | 32 +++ src/core/hle/service/am/applet.cpp | 9 +- src/core/hle/service/am/applet.h | 4 + .../all_system_applet_proxies_service.cpp | 10 + .../all_system_applet_proxies_service.h | 1 + .../am/service/applet_common_functions.cpp | 7 + .../am/service/applet_common_functions.h | 1 + .../am/service/application_functions.cpp | 11 + .../am/service/application_functions.h | 4 + .../am/service/library_applet_creator.cpp | 4 +- .../ns/application_manager_interface.cpp | 16 ++ .../ns/application_manager_interface.h | 2 + .../service/pctl/parental_control_service.cpp | 14 +- .../service/pctl/parental_control_service.h | 2 + src/yuzu/applets/qt_web_browser.cpp | 9 +- src/yuzu/configuration/shared_translation.cpp | 6 + src/yuzu/main.cpp | 41 +++- src/yuzu/main.h | 3 + 61 files changed, 559 insertions(+), 129 deletions(-) create mode 100644 src/core/file_sys/fssystem/fssystem_hierarchical_sha3_storage.cpp create mode 100644 src/core/file_sys/fssystem/fssystem_hierarchical_sha3_storage.h create mode 100644 src/core/file_sys/fssystem/fssystem_passthrough_storage.h diff --git a/src/android/app/build.gradle.kts b/src/android/app/build.gradle.kts index e91d2e8c52..3f1a7c102b 100644 --- a/src/android/app/build.gradle.kts +++ b/src/android/app/build.gradle.kts @@ -30,8 +30,8 @@ val autoVersion = (((System.currentTimeMillis() / 1000) - 1451606400) / 10).toIn android { namespace = "org.yuzu.yuzu_emu" - compileSdkVersion = "android-35" - ndkVersion = "26.1.10909125" + compileSdkVersion = "android-36" + ndkVersion = "28.2.13676358" buildFeatures { viewBinding = true diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/adapters/GameAdapter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/adapters/GameAdapter.kt index 11b81a01a6..98f342c274 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/adapters/GameAdapter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/adapters/GameAdapter.kt @@ -36,6 +36,9 @@ import androidx.core.net.toUri import androidx.core.content.edit import com.google.android.material.dialog.MaterialAlertDialogBuilder import org.yuzu.yuzu_emu.NativeLibrary +import org.yuzu.yuzu_emu.features.settings.model.BooleanSetting +import org.yuzu.yuzu_emu.features.settings.model.Settings +import org.yuzu.yuzu_emu.utils.NativeConfig class GameAdapter(private val activity: AppCompatActivity) : AbstractDiffAdapter(exact = false) { @@ -229,6 +232,8 @@ class GameAdapter(private val activity: AppCompatActivity) : binding.root.findNavController().navigate(action) } + val preferences = PreferenceManager.getDefaultSharedPreferences(YuzuApplication.appContext) + if (NativeLibrary.gameRequiresFirmware(game.programId) && !NativeLibrary.isFirmwareAvailable()) { MaterialAlertDialogBuilder(activity) .setTitle(R.string.loader_requires_firmware) @@ -243,6 +248,23 @@ class GameAdapter(private val activity: AppCompatActivity) : } .setNegativeButton(android.R.string.cancel) { _, _ -> } .show() + } else if (BooleanSetting.DISABLE_NCA_VERIFICATION.getBoolean(false) && !preferences.getBoolean( + Settings.PREF_HIDE_NCA_POPUP, false)) { + MaterialAlertDialogBuilder(activity) + .setTitle(R.string.nca_verification_disabled) + .setMessage(activity.getString(R.string.nca_verification_disabled_description)) + .setPositiveButton(android.R.string.ok) { _, _ -> + launch() + } + .setNeutralButton(R.string.dont_show_again) { _, _ -> + preferences.edit { + putBoolean(Settings.PREF_HIDE_NCA_POPUP, true) + } + + launch() + } + .setNegativeButton(android.R.string.cancel) { _, _ -> } + .show() } else { launch() } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt index 3c5b9003de..6d4bfd97ac 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt @@ -35,6 +35,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting { RENDERER_SAMPLE_SHADING("sample_shading"), PICTURE_IN_PICTURE("picture_in_picture"), USE_CUSTOM_RTC("custom_rtc_enabled"), + DISABLE_NCA_VERIFICATION("disable_nca_verification"), BLACK_BACKGROUNDS("black_backgrounds"), JOYSTICK_REL_CENTER("joystick_rel_center"), DPAD_SLIDE("dpad_slide"), diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt index a52f582031..2564849ef4 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt @@ -37,6 +37,7 @@ object Settings { const val PREF_SHOULD_SHOW_PRE_ALPHA_WARNING = "ShouldShowPreAlphaWarning" const val PREF_SHOULD_SHOW_EDENS_VEIL_DIALOG = "ShouldShowEdensVeilDialog" const val PREF_MEMORY_WARNING_SHOWN = "MemoryWarningShown" + const val PREF_HIDE_NCA_POPUP = "HideNCAVerificationPopup" const val SECTION_STATS_OVERLAY = "Stats Overlay" // Deprecated input overlay preference keys diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt index a689b6ce76..883d8efaef 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt @@ -297,7 +297,13 @@ abstract class SettingsItem( descriptionId = R.string.use_custom_rtc_description ) ) - + put( + SwitchSetting( + BooleanSetting.DISABLE_NCA_VERIFICATION, + titleId = R.string.disable_nca_verification, + descriptionId = R.string.disable_nca_verification_description + ) + ) put( StringInputSetting( StringSetting.WEB_TOKEN, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt index 14d62ceec3..630bcb0d74 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt @@ -210,6 +210,7 @@ class SettingsFragmentPresenter( add(IntSetting.LANGUAGE_INDEX.key) add(BooleanSetting.USE_CUSTOM_RTC.key) add(LongSetting.CUSTOM_RTC.key) + add(BooleanSetting.DISABLE_NCA_VERIFICATION.key) add(HeaderSetting(R.string.network)) add(StringSetting.WEB_TOKEN.key) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeViewModel.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeViewModel.kt index 97a60ee184..a06abb394f 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeViewModel.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeViewModel.kt @@ -31,9 +31,6 @@ class HomeViewModel : ViewModel() { private val _checkKeys = MutableStateFlow(false) val checkKeys = _checkKeys.asStateFlow() - private val _checkFirmware = MutableStateFlow(false) - val checkFirmware = _checkFirmware.asStateFlow() - var navigatedToSetup = false fun setStatusBarShadeVisibility(visible: Boolean) { @@ -66,8 +63,4 @@ class HomeViewModel : ViewModel() { fun setCheckKeys(value: Boolean) { _checkKeys.value = value } - - fun setCheckFirmware(value: Boolean) { - _checkFirmware.value = value - } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt index e8dd566f79..cfed4d08ec 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt @@ -142,16 +142,6 @@ class MainActivity : AppCompatActivity(), ThemeProvider { checkedDecryption = true } - if (!checkedFirmware) { - val firstTimeSetup = PreferenceManager.getDefaultSharedPreferences(applicationContext) - .getBoolean(Settings.PREF_FIRST_APP_LAUNCH, true) - if (!firstTimeSetup) { - checkFirmware() - showPreAlphaWarningDialog() - } - checkedFirmware = true - } - WindowCompat.setDecorFitsSystemWindows(window, false) window.setSoftInputMode(WindowManager.LayoutParams.SOFT_INPUT_ADJUST_NOTHING) @@ -198,13 +188,6 @@ class MainActivity : AppCompatActivity(), ThemeProvider { if (it) checkKeys() } - homeViewModel.checkFirmware.collect( - this, - resetState = { homeViewModel.setCheckFirmware(false) } - ) { - if (it) checkFirmware() - } - setInsets() } @@ -243,21 +226,6 @@ class MainActivity : AppCompatActivity(), ThemeProvider { ).show(supportFragmentManager, MessageDialogFragment.TAG) } } - - private fun checkFirmware() { - val resultCode: Int = NativeLibrary.verifyFirmware() - if (resultCode == 0) return - - val resultString: String = - resources.getStringArray(R.array.verifyFirmwareResults)[resultCode] - - MessageDialogFragment.newInstance( - titleId = R.string.firmware_invalid, - descriptionString = resultString, - helpLinkId = R.string.firmware_missing_help - ).show(supportFragmentManager, MessageDialogFragment.TAG) - } - override fun onSaveInstanceState(outState: Bundle) { super.onSaveInstanceState(outState) outState.putBoolean(CHECKED_DECRYPTION, checkedDecryption) @@ -434,7 +402,6 @@ class MainActivity : AppCompatActivity(), ThemeProvider { cacheFirmwareDir.copyRecursively(firmwarePath, true) NativeLibrary.initializeSystem(true) homeViewModel.setCheckKeys(true) - homeViewModel.setCheckFirmware(true) getString(R.string.save_file_imported_success) } } catch (e: Exception) { @@ -464,7 +431,6 @@ class MainActivity : AppCompatActivity(), ThemeProvider { // Optionally reinitialize the system or perform other necessary steps NativeLibrary.initializeSystem(true) homeViewModel.setCheckKeys(true) - homeViewModel.setCheckFirmware(true) messageToShow = getString(R.string.firmware_uninstalled_success) } else { messageToShow = getString(R.string.firmware_uninstalled_failure) diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp index f8f175d313..306b7e2a4c 100644 --- a/src/android/app/src/main/jni/native.cpp +++ b/src/android/app/src/main/jni/native.cpp @@ -596,6 +596,8 @@ jstring Java_org_yuzu_yuzu_1emu_utils_GpuDriverHelper_getGpuModel(JNIEnv *env, j const std::string model_name{device.GetModelName()}; + window.release(); + return Common::Android::ToJString(env, model_name); } diff --git a/src/android/app/src/main/res/values-ar/strings.xml b/src/android/app/src/main/res/values-ar/strings.xml index ed3fc76f3b..c705ae08f1 100644 --- a/src/android/app/src/main/res/values-ar/strings.xml +++ b/src/android/app/src/main/res/values-ar/strings.xml @@ -498,6 +498,8 @@ ساعة مخصصة في الوقت الحقيقي يسمح لك بتعيين ساعة مخصصة في الوقت الفعلي منفصلة عن وقت النظام الحالي لديك تعيين ساعة مخصصة في الوقت الحقيقي + تعطيل التحقق من NCA + يعطل التحقق من سلامة أرشيفات محتوى NCA. قد يحسن هذا من سرعة التحميل لكنه يخاطر بتلف البيانات أو تمرير ملفات غير صالحة دون اكتشاف. ضروري لجعل الألعاب والتحديثات التي تتطلب نظامًا أساسيًا 20+ تعمل. توليد diff --git a/src/android/app/src/main/res/values-ckb/strings.xml b/src/android/app/src/main/res/values-ckb/strings.xml index 34b1ae6252..af0eeeaa45 100644 --- a/src/android/app/src/main/res/values-ckb/strings.xml +++ b/src/android/app/src/main/res/values-ckb/strings.xml @@ -482,6 +482,8 @@ RTCی تایبەتمەند ڕێگەت پێدەدات کاتژمێرێکی کاتی ڕاستەقینەی تایبەتمەند دابنێیت کە جیاوازە لە کاتی ئێستای سیستەمەکەت. دانانی RTCی تایبەتمەند + ناچالاککردنی پشکنینی NCA + پشکنینی پێکهاتەی ئارشیڤەکانی ناوەڕۆکی NCA ناچالاک دەکات. ئەمە لەوانەیە خێرایی بارکردن به‌ره‌وپێش ببات، بەڵام مەترسی لەناوچوونی داتا یان ئەوەی فایلە نادروستەکان بەبێ ئەوەی دۆزرایەوە تێپەڕبن زیاتر دەکات. بۆ ئەوەی یاری و نوێکردنەوەکان کار بکەن کە پێویستی بە فریموێری 20+ هەیە زۆر پێویستە. بەرهەم هێنان diff --git a/src/android/app/src/main/res/values-cs/strings.xml b/src/android/app/src/main/res/values-cs/strings.xml index 293524271e..8d42b8303f 100644 --- a/src/android/app/src/main/res/values-cs/strings.xml +++ b/src/android/app/src/main/res/values-cs/strings.xml @@ -458,6 +458,8 @@ Vlastní RTC Vlastní nastavení času Nastavit vlastní RTC + Zakázat ověřování NCA + Zakáže ověřování integrity archivů obsahu NCA. To může zlepšit rychlost načítání, ale hrozí poškození dat nebo neodhalení neplatných souborů. Je nutné, aby fungovaly hry a aktualizace vyžadující firmware 20+. Generovat diff --git a/src/android/app/src/main/res/values-de/strings.xml b/src/android/app/src/main/res/values-de/strings.xml index 46ae9ba7fe..907b114388 100644 --- a/src/android/app/src/main/res/values-de/strings.xml +++ b/src/android/app/src/main/res/values-de/strings.xml @@ -486,6 +486,8 @@ Wird der Handheld-Modus verwendet, verringert es die Auflösung und erhöht die RTC-Datum auswählen RTC-Zeit auswählen Benutzerdefinierte Echtzeituhr + NCA-Verifizierung deaktivieren + Deaktiviert die Integritätsprüfung von NCA-Inhaltsarchiven. Dies kann die Ladegeschwindigkeit verbessern, riskiert jedoch Datenbeschädigung oder dass ungültige Dateien unentdeckt bleiben. Ist notwendig, um Spiele und Updates, die Firmware 20+ benötigen, zum Laufen zu bringen. Generieren diff --git a/src/android/app/src/main/res/values-es/strings.xml b/src/android/app/src/main/res/values-es/strings.xml index 8712f455de..12b7183cae 100644 --- a/src/android/app/src/main/res/values-es/strings.xml +++ b/src/android/app/src/main/res/values-es/strings.xml @@ -506,6 +506,8 @@ RTC personalizado Te permite tener un reloj personalizado en tiempo real diferente del tiempo del propio sistema. Configurar RTC personalizado + Desactivar verificación NCA + Desactiva la verificación de integridad de los archivos de contenido NCA. Esto puede mejorar la velocidad de carga, pero arriesga corrupción de datos o que archivos inválidos pasen desapercibidos. Es necesario para que funcionen juegos y actualizaciones que requieren firmware 20+. Generar diff --git a/src/android/app/src/main/res/values-fa/strings.xml b/src/android/app/src/main/res/values-fa/strings.xml index 07ff8ff4e0..d7ac1b770a 100644 --- a/src/android/app/src/main/res/values-fa/strings.xml +++ b/src/android/app/src/main/res/values-fa/strings.xml @@ -504,6 +504,8 @@ زمان سفارشی به شما امکان می‌دهد یک ساعت سفارشی جدا از زمان فعلی سیستم خود تنظیم کنید. تنظیم زمان سفارشی + غیرفعال کردن تأیید اعتبار NCA + بررسی صحت آرشیوهای محتوای NCA را غیرفعال می‌کند. این ممکن است سرعت بارگذاری را بهبود بخشد اما خطر خرابی داده یا تشخیص داده نشدن فایل‌های نامعتبر را به همراه دارد. برای کار کردن بازی‌ها و به‌روزرسانی‌هایی که به فرمور ۲۰+ نیاز دارند، ضروری است. تولید diff --git a/src/android/app/src/main/res/values-fr/strings.xml b/src/android/app/src/main/res/values-fr/strings.xml index 2e06ac98e1..62e67a6fee 100644 --- a/src/android/app/src/main/res/values-fr/strings.xml +++ b/src/android/app/src/main/res/values-fr/strings.xml @@ -506,6 +506,8 @@ RTC personnalisé Vous permet de définir une horloge en temps réel personnalisée distincte de l\'heure actuelle de votre système. Définir l\'horloge RTC personnalisée + Désactiver la vérification NCA + Désactive la vérification d\'intégrité des archives de contenu NCA. Cela peut améliorer la vitesse de chargement mais risque une corruption des données ou que des fichiers invalides ne soient pas détectés. Est nécessaire pour faire fonctionner les jeux et mises à jour nécessitant un firmware 20+. Générer diff --git a/src/android/app/src/main/res/values-he/strings.xml b/src/android/app/src/main/res/values-he/strings.xml index c0c835d633..ec5b526ce0 100644 --- a/src/android/app/src/main/res/values-he/strings.xml +++ b/src/android/app/src/main/res/values-he/strings.xml @@ -505,6 +505,8 @@ RTC מותאם אישית מאפשר לך לקבוע שעון זמן אמת נפרד משעון המערכת שלך. קבע RTC מותאם אישית + השבת אימות NCA + משבית את אימות השלמות של ארכיוני התוכן של NCA. זה עשוי לשפר את מהירות הטעינה אך מסתכן בשחיקת נתונים או שמא קבצים לא חוקיים יעברו ללא זיהוי. זה הכרחי כדי לגרום למשחקים ועדכונים הדורשים firmware 20+ לעבוד. יצירה diff --git a/src/android/app/src/main/res/values-hu/strings.xml b/src/android/app/src/main/res/values-hu/strings.xml index 46a5ac7cce..b45692f147 100644 --- a/src/android/app/src/main/res/values-hu/strings.xml +++ b/src/android/app/src/main/res/values-hu/strings.xml @@ -501,6 +501,8 @@ Egyéni RTC Megadhatsz egy valós idejű órát, amely eltér a rendszer által használt órától. Egyéni RTC beállítása + NCA ellenőrzés letiltása + Letiltja az NCA tartalomarchívumok integritás-ellenőrzését. Ez javíthatja a betöltési sebességet, de az adatsérülés vagy az érvénytelen fájlok észrevétlen maradásának kockázatával jár. Elengedhetetlen a 20+ firmware-et igénylő játékok és frissítések működtetéséhez. Generálás diff --git a/src/android/app/src/main/res/values-id/strings.xml b/src/android/app/src/main/res/values-id/strings.xml index cffb526ad5..1817fc654a 100644 --- a/src/android/app/src/main/res/values-id/strings.xml +++ b/src/android/app/src/main/res/values-id/strings.xml @@ -502,6 +502,8 @@ RTC Kustom Memungkinkan Anda untuk mengatur jam waktu nyata kustom yang terpisah dari waktu sistem saat ini Anda. Setel RTC Kustom + Nonaktifkan Verifikasi NCA + Menonaktifkan verifikasi integritas arsip konten NCA. Ini dapat meningkatkan kecepatan pemuatan tetapi berisiko kerusakan data atau file yang tidak valid tidak terdeteksi. Diperlukan untuk membuat game dan pembaruan yang membutuhkan firmware 20+ bekerja. Hasilkan diff --git a/src/android/app/src/main/res/values-it/strings.xml b/src/android/app/src/main/res/values-it/strings.xml index cb234cf61e..ff3706dd40 100644 --- a/src/android/app/src/main/res/values-it/strings.xml +++ b/src/android/app/src/main/res/values-it/strings.xml @@ -505,6 +505,8 @@ RTC Personalizzato Ti permette di impostare un orologio in tempo reale personalizzato, completamente separato da quello di sistema. Imposta un orologio in tempo reale personalizzato + Disabilita verifica NCA + Disabilita la verifica dell\'integrità degli archivi di contenuto NCA. Può migliorare la velocità di caricamento ma rischia il danneggiamento dei dati o che file non validi passino inosservati. Necessario per far funzionare giochi e aggiornamenti che richiedono il firmware 20+. Genera diff --git a/src/android/app/src/main/res/values-ja/strings.xml b/src/android/app/src/main/res/values-ja/strings.xml index abedb1e0bc..a6c1ebf8ab 100644 --- a/src/android/app/src/main/res/values-ja/strings.xml +++ b/src/android/app/src/main/res/values-ja/strings.xml @@ -491,6 +491,8 @@ カスタム RTC 現在のシステム時間とは別に、任意のリアルタイムクロックを設定できます。 カスタムRTCを設定 + NCA検証を無効化 + NCAコンテンツアーカイブの整合性検証を無効にします。読み込み速度が向上する可能性がありますが、データ破損や不正なファイルが検出されないリスクがあります。ファームウェア20以上が必要なゲームや更新を動作させるために必要です。 生成 diff --git a/src/android/app/src/main/res/values-ko/strings.xml b/src/android/app/src/main/res/values-ko/strings.xml index c6d9457744..01e2c5f4c0 100644 --- a/src/android/app/src/main/res/values-ko/strings.xml +++ b/src/android/app/src/main/res/values-ko/strings.xml @@ -501,6 +501,8 @@ 사용자 지정 RTC 현재 시스템 시간과 별도로 사용자 지정 RTC를 설정할 수 있습니다. 사용자 지정 RTC 설정 + NCA 검증 비활성화 + NCA 콘텐츠 아카이브의 무결성 검증을 비활성화합니다. 로딩 속도를 향상시킬 수 있지만 데이터 손상이나 유효하지 않은 파일이 미검증될 위험이 있습니다. 펌웨어 20+가 필요한 게임 및 업데이트를 실행하려면 필요합니다. 생성 diff --git a/src/android/app/src/main/res/values-nb/strings.xml b/src/android/app/src/main/res/values-nb/strings.xml index 3cc4c6d12c..90c0dbf05e 100644 --- a/src/android/app/src/main/res/values-nb/strings.xml +++ b/src/android/app/src/main/res/values-nb/strings.xml @@ -482,6 +482,8 @@ Tilpasset Sannhetstidsklokke Gjør det mulig å stille inn en egendefinert sanntidsklokke separat fra den gjeldende systemtiden. Angi tilpasset RTC + Deaktiver NCA-verifisering + Deaktiverer integritetsverifisering av NCA-innholdsarkiv. Dette kan forbedre lastehastigheten, men medfører risiko for datakorrupsjon eller at ugyldige filer ikke oppdages. Er nødvendig for å få spill og oppdateringer som trenger firmware 20+ til å fungere. Generer diff --git a/src/android/app/src/main/res/values-pl/strings.xml b/src/android/app/src/main/res/values-pl/strings.xml index b9858838e8..080064edaf 100644 --- a/src/android/app/src/main/res/values-pl/strings.xml +++ b/src/android/app/src/main/res/values-pl/strings.xml @@ -482,6 +482,8 @@ Niestandardowy RTC Ta opcja pozwala na wybranie własnych ustawień czasu używanych w czasie emulacji, innych niż czas systemu Android. Ustaw niestandardowy czas RTC + Wyłącz weryfikację NCA + Wyłącza weryfikację integralności archiwów zawartości NCA. Może to poprawić szybkość ładowania, ale niesie ryzyko uszkodzenia danych lub niezauważenia nieprawidłowych plików. Konieczne, aby działały gry i aktualizacje wymagające firmware\'u 20+. Generuj diff --git a/src/android/app/src/main/res/values-pt-rBR/strings.xml b/src/android/app/src/main/res/values-pt-rBR/strings.xml index 1296fad889..3dc1f83e6e 100644 --- a/src/android/app/src/main/res/values-pt-rBR/strings.xml +++ b/src/android/app/src/main/res/values-pt-rBR/strings.xml @@ -506,6 +506,8 @@ Data e hora personalizadas Permite a você configurar um relógio em tempo real separado do relógio do seu dispositivo. Definir um relógio em tempo real personalizado + Desativar verificação NCA + Desativa a verificação de integridade de arquivos de conteúdo NCA. Pode melhorar a velocidade de carregamento, mas arrisica corromper dados ou que arquivos inválidos passem despercebidos. É necessário para fazer jogos e atualizações que exigem firmware 20+ funcionarem. Gerar diff --git a/src/android/app/src/main/res/values-pt-rPT/strings.xml b/src/android/app/src/main/res/values-pt-rPT/strings.xml index a166907877..feb4950fcb 100644 --- a/src/android/app/src/main/res/values-pt-rPT/strings.xml +++ b/src/android/app/src/main/res/values-pt-rPT/strings.xml @@ -506,6 +506,8 @@ RTC personalizado Permite a você configurar um relógio em tempo real separado do relógio do seu dispositivo. Defina um relógio em tempo real personalizado + Desativar verificação NCA + Desativa a verificação de integridade dos arquivos de conteúdo NCA. Pode melhorar a velocidade de carregamento, mas arrisca a corrupção de dados ou que ficheiros inválidos passem despercebidos. É necessário para que jogos e atualizações que necessitam de firmware 20+ funcionem. Gerar diff --git a/src/android/app/src/main/res/values-ru/strings.xml b/src/android/app/src/main/res/values-ru/strings.xml index dc68c7b817..d56c94f10b 100644 --- a/src/android/app/src/main/res/values-ru/strings.xml +++ b/src/android/app/src/main/res/values-ru/strings.xml @@ -508,6 +508,8 @@ Пользовательский RTC Позволяет установить пользовательские часы реального времени отдельно от текущего системного времени. Установить пользовательский RTC + Отключить проверку NCA + Отключает проверку целостности архивов содержимого NCA. Может улучшить скорость загрузки, но есть риск повреждения данных или того, что недействительные файлы останутся незамеченными. Необходимо для работы игр и обновлений, требующих прошивку 20+. Создать diff --git a/src/android/app/src/main/res/values-sr/strings.xml b/src/android/app/src/main/res/values-sr/strings.xml index c547b3f761..5b444f6187 100644 --- a/src/android/app/src/main/res/values-sr/strings.xml +++ b/src/android/app/src/main/res/values-sr/strings.xml @@ -457,6 +457,8 @@ Цустом РТЦ Омогућава вам да поставите прилагођени сат у реалном времену одвојено од тренутног времена система. Подесите прилагођени РТЦ + Искључи верификацију НЦА + Искључује верификацију интегритета НЦА архива садржаја. Ово може побољшати брзину учитавања, али ризикује оштећење података или да неважећи фајлови прођу незапажено. Неопходно је да би игре и ажурирања која захтевају firmware 20+ радили. Генериши diff --git a/src/android/app/src/main/res/values-uk/strings.xml b/src/android/app/src/main/res/values-uk/strings.xml index b48a8a4a58..4ae61340dc 100644 --- a/src/android/app/src/main/res/values-uk/strings.xml +++ b/src/android/app/src/main/res/values-uk/strings.xml @@ -495,6 +495,8 @@ Свій RTC Дозволяє встановити власний час (Real-time clock, або RTC), відмінний від системного. Встановити RTC + Вимкнути перевірку NCA + Вимкає перевірку цілісності архівів вмісту NCA. Може покращити швидкість завантаження, але ризикує пошкодженням даних або тим, що недійсні файли залишаться непоміченими. Необхідно для роботи ігор та оновлень, які вимагають прошивки 20+. Створити diff --git a/src/android/app/src/main/res/values-vi/strings.xml b/src/android/app/src/main/res/values-vi/strings.xml index b19d437ceb..dd64fbca55 100644 --- a/src/android/app/src/main/res/values-vi/strings.xml +++ b/src/android/app/src/main/res/values-vi/strings.xml @@ -482,6 +482,8 @@ RTC tuỳ chỉnh Cho phép bạn thiết lập một đồng hồ thời gian thực tùy chỉnh riêng biệt so với thời gian hệ thống hiện tại. Thiết lập RTC tùy chỉnh + Tắt xác minh NCA + Tắt xác minh tính toàn vẹn của kho lưu trữ nội dung NCA. Có thể cải thiện tốc độ tải nhưng có nguy cơ hỏng dữ liệu hoặc các tệp không hợp lệ không bị phát hiện. Cần thiết để các trò chơi và bản cập nhật yêu cầu firmware 20+ hoạt động. Tạo diff --git a/src/android/app/src/main/res/values-zh-rCN/strings.xml b/src/android/app/src/main/res/values-zh-rCN/strings.xml index 95ab14abd0..a12c00063f 100644 --- a/src/android/app/src/main/res/values-zh-rCN/strings.xml +++ b/src/android/app/src/main/res/values-zh-rCN/strings.xml @@ -500,6 +500,8 @@ 自定义系统时间 此选项允许您设置与目前系统时间相独立的自定义系统时钟。 设置自定义系统时间 +禁用NCA验证 +禁用NCA内容存档的完整性验证。可能会提高加载速度,但存在数据损坏或无效文件未被检测到的风险。对于需要固件20+的游戏和更新是必需的。 生成 diff --git a/src/android/app/src/main/res/values-zh-rTW/strings.xml b/src/android/app/src/main/res/values-zh-rTW/strings.xml index 8640875f2c..a125553102 100644 --- a/src/android/app/src/main/res/values-zh-rTW/strings.xml +++ b/src/android/app/src/main/res/values-zh-rTW/strings.xml @@ -505,6 +505,8 @@ 自訂 RTC 允許您設定與您的目前系統時間相互獨立的自訂即時時鐘。 設定自訂 RTC + 停用NCA驗證 + 停用NCA內容存檔的完整性驗證。可能會提高載入速度,但存在資料損毀或無效檔案未被偵測到的風險。對於需要韌體20+的遊戲和更新是必需的。 生成 diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index 7124ba41b4..2c7923d5a3 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -474,6 +474,8 @@ Custom RTC Allows you to set a custom real-time clock separate from your current system time. Set custom RTC + Disable NCA Verification + Disables integrity verification of NCA content archives. This may improve loading speed but risks data corruption or invalid files going undetected. Some games that require firmware versions 20+ may need this as well. Generate @@ -782,6 +784,9 @@ Game Requires Firmware dump and install firmware, or press "OK" to launch anyways.]]> + NCA Verification Disabled + This is required to run new games and updates, but may cause instability or crashes if NCA files are corrupt, modified, or tampered with. If unsure, re-enable verification in Advanced Settings -> System, and use firmware versions of 19.0.1 or below. + Searching for game... Game not found for Title ID: %1$s diff --git a/src/audio_core/common/feature_support.h b/src/audio_core/common/feature_support.h index eef2a844ba..cd83df3832 100644 --- a/src/audio_core/common/feature_support.h +++ b/src/audio_core/common/feature_support.h @@ -13,7 +13,7 @@ #include "common/polyfill_ranges.h" namespace AudioCore { -constexpr u32 CurrentRevision = 13; +constexpr u32 CurrentRevision = 15; enum class SupportTags { CommandProcessingTimeEstimatorVersion4, diff --git a/src/common/settings.h b/src/common/settings.h index b657dc8658..047dfc800a 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -217,7 +217,8 @@ struct Values { true, true, &use_speed_limit}; - SwitchableSetting sync_core_speed{linkage, false, "sync_core_speed", Category::Core, Specialization::Default}; + SwitchableSetting sync_core_speed{linkage, false, "sync_core_speed", Category::Core, + Specialization::Default}; // Memory #ifdef HAS_NCE @@ -624,7 +625,11 @@ struct Values { linkage, 0, "rng_seed", Category::System, Specialization::Hex, true, true, &rng_seed_enabled}; Setting device_name{ - linkage, "Eden", "device_name", Category::System, Specialization::Default, true, true}; + linkage, "Eden", "device_name", Category::System, Specialization::Default, true, true}; + SwitchableSetting disable_nca_verification{linkage, true, "disable_nca_verification", + Category::System, Specialization::Default}; + Setting hide_nca_verification_popup{ + linkage, false, "hide_nca_verification_popup", Category::System, Specialization::Default}; Setting current_user{linkage, 0, "current_user", Category::System}; diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index eab506f194..33990d61a5 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -88,6 +88,8 @@ add_library(core STATIC file_sys/fssystem/fssystem_crypto_configuration.h file_sys/fssystem/fssystem_hierarchical_integrity_verification_storage.cpp file_sys/fssystem/fssystem_hierarchical_integrity_verification_storage.h + file_sys/fssystem/fssystem_hierarchical_sha3_storage.cpp + file_sys/fssystem/fssystem_hierarchical_sha3_storage.h file_sys/fssystem/fssystem_hierarchical_sha256_storage.cpp file_sys/fssystem/fssystem_hierarchical_sha256_storage.h file_sys/fssystem/fssystem_indirect_storage.cpp @@ -102,6 +104,7 @@ add_library(core STATIC file_sys/fssystem/fssystem_nca_header.cpp file_sys/fssystem/fssystem_nca_header.h file_sys/fssystem/fssystem_nca_reader.cpp + file_sys/fssystem/fssystem_passthrough_storage.h file_sys/fssystem/fssystem_pooled_buffer.cpp file_sys/fssystem/fssystem_pooled_buffer.h file_sys/fssystem/fssystem_sparse_storage.cpp diff --git a/src/core/file_sys/fssystem/fssystem_bucket_tree.cpp b/src/core/file_sys/fssystem/fssystem_bucket_tree.cpp index af8541009e..615a624f4f 100644 --- a/src/core/file_sys/fssystem/fssystem_bucket_tree.cpp +++ b/src/core/file_sys/fssystem/fssystem_bucket_tree.cpp @@ -1,6 +1,10 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/settings.h" #include "core/file_sys/errors.h" #include "core/file_sys/fssystem/fssystem_bucket_tree.h" #include "core/file_sys/fssystem/fssystem_bucket_tree_utils.h" @@ -233,7 +237,10 @@ Result BucketTree::Initialize(VirtualFile node_storage, VirtualFile entry_storag void BucketTree::Initialize(size_t node_size, s64 end_offset) { ASSERT(NodeSizeMin <= node_size && node_size <= NodeSizeMax); ASSERT(Common::IsPowerOfTwo(node_size)); - ASSERT(end_offset > 0); + + if (!Settings::values.disable_nca_verification.GetValue()) { + ASSERT(end_offset > 0); + } ASSERT(!this->IsInitialized()); m_node_size = node_size; diff --git a/src/core/file_sys/fssystem/fssystem_hierarchical_sha256_storage.cpp b/src/core/file_sys/fssystem/fssystem_hierarchical_sha256_storage.cpp index a68fd973c9..e8669a4a7d 100644 --- a/src/core/file_sys/fssystem/fssystem_hierarchical_sha256_storage.cpp +++ b/src/core/file_sys/fssystem/fssystem_hierarchical_sha256_storage.cpp @@ -5,23 +5,10 @@ #include "common/scope_exit.h" #include "core/file_sys/fssystem/fssystem_hierarchical_sha256_storage.h" +#include + namespace FileSys { -namespace { - -s32 Log2(s32 value) { - ASSERT(value > 0); - ASSERT(Common::IsPowerOfTwo(value)); - - s32 log = 0; - while ((value >>= 1) > 0) { - ++log; - } - return log; -} - -} // namespace - Result HierarchicalSha256Storage::Initialize(VirtualFile* base_storages, s32 layer_count, size_t htbs, void* hash_buf, size_t hash_buf_size) { // Validate preconditions. @@ -31,7 +18,8 @@ Result HierarchicalSha256Storage::Initialize(VirtualFile* base_storages, s32 lay // Set size tracking members. m_hash_target_block_size = static_cast(htbs); - m_log_size_ratio = Log2(m_hash_target_block_size / HashSize); + m_log_size_ratio = + static_cast(std::log2(static_cast(m_hash_target_block_size) / HashSize)); // Get the base storage size. m_base_storage_size = base_storages[2]->GetSize(); diff --git a/src/core/file_sys/fssystem/fssystem_hierarchical_sha3_storage.cpp b/src/core/file_sys/fssystem/fssystem_hierarchical_sha3_storage.cpp new file mode 100644 index 0000000000..d58f2ee9be --- /dev/null +++ b/src/core/file_sys/fssystem/fssystem_hierarchical_sha3_storage.cpp @@ -0,0 +1,57 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common/alignment.h" +#include "common/scope_exit.h" +#include "core/file_sys/fssystem/fssystem_hierarchical_sha3_storage.h" + +#include + +namespace FileSys { + +Result HierarchicalSha3Storage::Initialize(VirtualFile* base_storages, s32 layer_count, size_t htbs, + void* hash_buf, size_t hash_buf_size) { + ASSERT(layer_count == LayerCount); + ASSERT(Common::IsPowerOfTwo(htbs)); + ASSERT(hash_buf != nullptr); + + m_hash_target_block_size = static_cast(htbs); + m_log_size_ratio = + static_cast(std::log2(static_cast(m_hash_target_block_size) / HashSize)); + + m_base_storage_size = base_storages[2]->GetSize(); + { + auto size_guard = SCOPE_GUARD { + m_base_storage_size = 0; + }; + R_UNLESS(m_base_storage_size <= static_cast(HashSize) + << m_log_size_ratio << m_log_size_ratio, + ResultHierarchicalSha256BaseStorageTooLarge); + size_guard.Cancel(); + } + + m_base_storage = base_storages[2]; + m_hash_buffer = static_cast(hash_buf); + m_hash_buffer_size = hash_buf_size; + + std::array master_hash{}; + base_storages[0]->ReadObject(std::addressof(master_hash)); + + s64 hash_storage_size = base_storages[1]->GetSize(); + ASSERT(Common::IsAligned(hash_storage_size, HashSize)); + ASSERT(hash_storage_size <= m_hash_target_block_size); + ASSERT(hash_storage_size <= static_cast(m_hash_buffer_size)); + + base_storages[1]->Read(reinterpret_cast(m_hash_buffer), + static_cast(hash_storage_size), 0); + R_SUCCEED(); +} + +size_t HierarchicalSha3Storage::Read(u8* buffer, size_t size, size_t offset) const { + if (size == 0) + return size; + ASSERT(buffer != nullptr); + return m_base_storage->Read(buffer, size, offset); +} + +} // namespace FileSys diff --git a/src/core/file_sys/fssystem/fssystem_hierarchical_sha3_storage.h b/src/core/file_sys/fssystem/fssystem_hierarchical_sha3_storage.h new file mode 100644 index 0000000000..2db7bb28e1 --- /dev/null +++ b/src/core/file_sys/fssystem/fssystem_hierarchical_sha3_storage.h @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include + +#include "core/file_sys/errors.h" +#include "core/file_sys/fssystem/fs_i_storage.h" +#include "core/file_sys/vfs/vfs.h" + +namespace FileSys { + +class HierarchicalSha3Storage : public IReadOnlyStorage { + YUZU_NON_COPYABLE(HierarchicalSha3Storage); + YUZU_NON_MOVEABLE(HierarchicalSha3Storage); + +public: + static constexpr s32 LayerCount = 3; + static constexpr size_t HashSize = 256 / 8; // SHA3-256 + +public: + HierarchicalSha3Storage() : m_mutex() {} + + Result Initialize(VirtualFile* base_storages, s32 layer_count, size_t htbs, void* hash_buf, + size_t hash_buf_size); + + virtual size_t GetSize() const override { + return m_base_storage->GetSize(); + } + + virtual size_t Read(u8* buffer, size_t length, size_t offset) const override; + +private: + VirtualFile m_base_storage; + s64 m_base_storage_size{}; + char* m_hash_buffer{}; + size_t m_hash_buffer_size{}; + s32 m_hash_target_block_size{}; + s32 m_log_size_ratio{}; + std::mutex m_mutex; +}; + +} // namespace FileSys diff --git a/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.cpp b/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.cpp index ab5a7984e3..1bc7039318 100644 --- a/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.cpp +++ b/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.cpp @@ -1,6 +1,10 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/settings.h" #include "core/file_sys/fssystem/fssystem_aes_ctr_counter_extended_storage.h" #include "core/file_sys/fssystem/fssystem_aes_ctr_storage.h" #include "core/file_sys/fssystem/fssystem_aes_xts_storage.h" @@ -10,10 +14,12 @@ #include "core/file_sys/fssystem/fssystem_hierarchical_sha256_storage.h" #include "core/file_sys/fssystem/fssystem_indirect_storage.h" #include "core/file_sys/fssystem/fssystem_integrity_romfs_storage.h" +#include "core/file_sys/fssystem/fssystem_passthrough_storage.h" #include "core/file_sys/fssystem/fssystem_memory_resource_buffer_hold_storage.h" #include "core/file_sys/fssystem/fssystem_nca_file_system_driver.h" #include "core/file_sys/fssystem/fssystem_sparse_storage.h" #include "core/file_sys/fssystem/fssystem_switch_storage.h" +#include "core/file_sys/fssystem/fssystem_hierarchical_sha3_storage.h" #include "core/file_sys/vfs/vfs_offset.h" #include "core/file_sys/vfs/vfs_vector.h" @@ -299,18 +305,24 @@ Result NcaFileSystemDriver::CreateStorageByRawStorage(VirtualFile* out, // Process hash/integrity layer. switch (header_reader->GetHashType()) { case NcaFsHeader::HashType::HierarchicalSha256Hash: - R_TRY(this->CreateSha256Storage(std::addressof(storage), std::move(storage), - header_reader->GetHashData().hierarchical_sha256_data)); + R_TRY(CreateSha256Storage(&storage, std::move(storage), + header_reader->GetHashData().hierarchical_sha256_data)); break; case NcaFsHeader::HashType::HierarchicalIntegrityHash: - R_TRY(this->CreateIntegrityVerificationStorage( - std::addressof(storage), std::move(storage), - header_reader->GetHashData().integrity_meta_info)); + R_TRY(CreateIntegrityVerificationStorage(&storage, std::move(storage), + header_reader->GetHashData().integrity_meta_info)); + break; + case NcaFsHeader::HashType::HierarchicalSha3256Hash: + R_TRY(CreateSha3Storage(&storage, std::move(storage), + header_reader->GetHashData().hierarchical_sha256_data)); break; default: + LOG_ERROR(Loader, "Unhandled Fs HashType enum={}", + static_cast(header_reader->GetHashType())); R_THROW(ResultInvalidNcaFsHeaderHashType); } + // Process compression layer. if (header_reader->ExistsCompressionLayer()) { R_TRY(this->CreateCompressedStorage( @@ -679,6 +691,7 @@ Result NcaFileSystemDriver::CreateSparseStorageMetaStorageWithVerification( // Create the verification storage. VirtualFile integrity_storage; + Result rc = this->CreateIntegrityVerificationStorageForMeta( std::addressof(integrity_storage), out_layer_info_storage, std::move(decrypted_storage), meta_offset, meta_data_hash_data_info); @@ -734,8 +747,26 @@ Result NcaFileSystemDriver::CreateSparseStorageWithVerification( NcaHeader::CtrBlockSize))); // Check the meta data hash type. - R_UNLESS(meta_data_hash_type == NcaFsHeader::MetaDataHashType::HierarchicalIntegrity, - ResultRomNcaInvalidSparseMetaDataHashType); + if (meta_data_hash_type != NcaFsHeader::MetaDataHashType::HierarchicalIntegrity) { + LOG_ERROR(Loader, "Sparse meta hash type {} not supported for verification; mounting sparse data WITHOUT verification (temporary).", static_cast(meta_data_hash_type)); + + R_TRY(this->CreateBodySubStorage(std::addressof(body_substorage), + sparse_info.physical_offset, + sparse_info.GetPhysicalSize())); + + // Create sparse core directly (no meta verification) + std::shared_ptr sparse_storage_fallback; + R_TRY(this->CreateSparseStorageCore(std::addressof(sparse_storage_fallback), + body_substorage, sparse_info.GetPhysicalSize(), + /*meta_storage*/ body_substorage, // dummy; not used + sparse_info, false)); + + if (out_sparse_storage) + *out_sparse_storage = sparse_storage_fallback; + *out_fs_data_offset = fs_offset; + *out = std::move(sparse_storage_fallback); + R_SUCCEED(); + } // Create the meta storage. VirtualFile meta_storage; @@ -1093,6 +1124,56 @@ Result NcaFileSystemDriver::CreatePatchMetaStorage( R_SUCCEED(); } +Result NcaFileSystemDriver::CreateSha3Storage( + VirtualFile* out, VirtualFile base_storage, + const NcaFsHeader::HashData::HierarchicalSha256Data& hash_data) { + ASSERT(out != nullptr); + ASSERT(base_storage != nullptr); + + using VerificationStorage = HierarchicalSha3Storage; + + R_UNLESS(Common::IsPowerOfTwo(hash_data.hash_block_size), + ResultInvalidHierarchicalSha256BlockSize); + R_UNLESS(hash_data.hash_layer_count == VerificationStorage::LayerCount - 1, + ResultInvalidHierarchicalSha256LayerCount); + + const auto& hash_region = hash_data.hash_layer_region[0]; + const auto& data_region = hash_data.hash_layer_region[1]; + + constexpr s32 CacheBlockCount = 2; + const auto hash_buffer_size = static_cast(hash_region.size); + const auto cache_buffer_size = CacheBlockCount * hash_data.hash_block_size; + const auto total_buffer_size = hash_buffer_size + cache_buffer_size; + + auto buffer_hold_storage = std::make_shared( + std::move(base_storage), total_buffer_size); + R_UNLESS(buffer_hold_storage != nullptr, ResultAllocationMemoryFailedAllocateShared); + R_UNLESS(buffer_hold_storage->IsValid(), ResultAllocationMemoryFailedInNcaFileSystemDriverI); + + s64 base_size = buffer_hold_storage->GetSize(); + R_UNLESS(hash_region.offset + hash_region.size <= base_size, ResultNcaBaseStorageOutOfRangeC); + R_UNLESS(data_region.offset + data_region.size <= base_size, ResultNcaBaseStorageOutOfRangeC); + + auto master_hash_storage = + std::make_shared>(hash_data.fs_data_master_hash.value); + + auto verification_storage = std::make_shared(); + R_UNLESS(verification_storage != nullptr, ResultAllocationMemoryFailedAllocateShared); + + std::array layer_storages{ + std::make_shared(master_hash_storage, sizeof(Hash), 0), + std::make_shared(buffer_hold_storage, hash_region.size, hash_region.offset), + std::make_shared(buffer_hold_storage, data_region.size, data_region.offset), + }; + + R_TRY(verification_storage->Initialize(layer_storages.data(), VerificationStorage::LayerCount, + hash_data.hash_block_size, + buffer_hold_storage->GetBuffer(), hash_buffer_size)); + + *out = std::move(verification_storage); + R_SUCCEED(); +} + Result NcaFileSystemDriver::CreateSha256Storage( VirtualFile* out, VirtualFile base_storage, const NcaFsHeader::HashData::HierarchicalSha256Data& hash_data) { @@ -1160,6 +1241,7 @@ Result NcaFileSystemDriver::CreateSha256Storage( Result NcaFileSystemDriver::CreateIntegrityVerificationStorage( VirtualFile* out, VirtualFile base_storage, const NcaFsHeader::HashData::IntegrityMetaInfo& meta_info) { + R_RETURN(this->CreateIntegrityVerificationStorageImpl( out, base_storage, meta_info, 0, IntegrityDataCacheCount, IntegrityHashCacheCount, HierarchicalIntegrityVerificationStorage::GetDefaultDataCacheBufferLevel( @@ -1209,63 +1291,96 @@ Result NcaFileSystemDriver::CreateIntegrityVerificationStorageImpl( VirtualFile* out, VirtualFile base_storage, const NcaFsHeader::HashData::IntegrityMetaInfo& meta_info, s64 layer_info_offset, int max_data_cache_entries, int max_hash_cache_entries, s8 buffer_level) { - // Validate preconditions. + // Preconditions ASSERT(out != nullptr); ASSERT(base_storage != nullptr); ASSERT(layer_info_offset >= 0); - // Define storage types. - using VerificationStorage = HierarchicalIntegrityVerificationStorage; - using StorageInfo = VerificationStorage::HierarchicalStorageInformation; + if (!Settings::values.disable_nca_verification.GetValue()) { + // Define storage types. + using VerificationStorage = HierarchicalIntegrityVerificationStorage; + using StorageInfo = VerificationStorage::HierarchicalStorageInformation; - // Validate the meta info. - HierarchicalIntegrityVerificationInformation level_hash_info; - std::memcpy(std::addressof(level_hash_info), std::addressof(meta_info.level_hash_info), - sizeof(level_hash_info)); + // Validate the meta info. + HierarchicalIntegrityVerificationInformation level_hash_info; + std::memcpy(std::addressof(level_hash_info), std::addressof(meta_info.level_hash_info), + sizeof(level_hash_info)); - R_UNLESS(IntegrityMinLayerCount <= level_hash_info.max_layers, - ResultInvalidNcaHierarchicalIntegrityVerificationLayerCount); - R_UNLESS(level_hash_info.max_layers <= IntegrityMaxLayerCount, - ResultInvalidNcaHierarchicalIntegrityVerificationLayerCount); + R_UNLESS(IntegrityMinLayerCount <= level_hash_info.max_layers, + ResultInvalidNcaHierarchicalIntegrityVerificationLayerCount); + R_UNLESS(level_hash_info.max_layers <= IntegrityMaxLayerCount, + ResultInvalidNcaHierarchicalIntegrityVerificationLayerCount); - // Get the base storage size. - s64 base_storage_size = base_storage->GetSize(); + // Get the base storage size. + s64 base_storage_size = base_storage->GetSize(); - // Create storage info. - StorageInfo storage_info; - for (s32 i = 0; i < static_cast(level_hash_info.max_layers - 2); ++i) { - const auto& layer_info = level_hash_info.info[i]; - R_UNLESS(layer_info_offset + layer_info.offset + layer_info.size <= base_storage_size, + // Create storage info. + StorageInfo storage_info; + for (s32 i = 0; i < static_cast(level_hash_info.max_layers - 2); ++i) { + const auto& layer_info = level_hash_info.info[i]; + R_UNLESS(layer_info_offset + layer_info.offset + layer_info.size <= base_storage_size, + ResultNcaBaseStorageOutOfRangeD); + + storage_info[i + 1] = std::make_shared( + base_storage, layer_info.size, layer_info_offset + layer_info.offset); + } + + // Set the last layer info. + const auto& layer_info = level_hash_info.info[level_hash_info.max_layers - 2]; + const s64 last_layer_info_offset = layer_info_offset > 0 ? 0LL : layer_info.offset.Get(); + R_UNLESS(last_layer_info_offset + layer_info.size <= base_storage_size, ResultNcaBaseStorageOutOfRangeD); + if (layer_info_offset > 0) { + R_UNLESS(last_layer_info_offset + layer_info.size <= layer_info_offset, + ResultRomNcaInvalidIntegrityLayerInfoOffset); + } + storage_info.SetDataStorage(std::make_shared( + std::move(base_storage), layer_info.size, last_layer_info_offset)); - storage_info[i + 1] = std::make_shared( - base_storage, layer_info.size, layer_info_offset + layer_info.offset); + // Make the integrity romfs storage. + auto integrity_storage = std::make_shared(); + R_UNLESS(integrity_storage != nullptr, ResultAllocationMemoryFailedAllocateShared); + + // Initialize the integrity storage. + R_TRY(integrity_storage->Initialize(level_hash_info, meta_info.master_hash, storage_info, + max_data_cache_entries, max_hash_cache_entries, + buffer_level)); + + // Set the output. + *out = std::move(integrity_storage); + R_SUCCEED(); + } else { + // Read IVFC layout + HierarchicalIntegrityVerificationInformation lhi{}; + std::memcpy(std::addressof(lhi), std::addressof(meta_info.level_hash_info), sizeof(lhi)); + + R_UNLESS(IntegrityMinLayerCount <= lhi.max_layers, + ResultInvalidNcaHierarchicalIntegrityVerificationLayerCount); + R_UNLESS(lhi.max_layers <= IntegrityMaxLayerCount, + ResultInvalidNcaHierarchicalIntegrityVerificationLayerCount); + + const auto& data_li = lhi.info[lhi.max_layers - 2]; + + const s64 base_size = base_storage->GetSize(); + + // Compute the data layer window + const s64 data_off = (layer_info_offset > 0) ? 0LL : data_li.offset.Get(); + R_UNLESS(data_off + data_li.size <= base_size, ResultNcaBaseStorageOutOfRangeD); + if (layer_info_offset > 0) { + R_UNLESS(data_off + data_li.size <= layer_info_offset, + ResultRomNcaInvalidIntegrityLayerInfoOffset); + } + + // TODO: Passthrough (temporary compatibility: integrity disabled) + auto data_view = std::make_shared(base_storage, data_li.size, data_off); + R_UNLESS(data_view != nullptr, ResultAllocationMemoryFailedAllocateShared); + + auto passthrough = std::make_shared(std::move(data_view)); + R_UNLESS(passthrough != nullptr, ResultAllocationMemoryFailedAllocateShared); + + *out = std::move(passthrough); + R_SUCCEED(); } - - // Set the last layer info. - const auto& layer_info = level_hash_info.info[level_hash_info.max_layers - 2]; - const s64 last_layer_info_offset = layer_info_offset > 0 ? 0LL : layer_info.offset.Get(); - R_UNLESS(last_layer_info_offset + layer_info.size <= base_storage_size, - ResultNcaBaseStorageOutOfRangeD); - if (layer_info_offset > 0) { - R_UNLESS(last_layer_info_offset + layer_info.size <= layer_info_offset, - ResultRomNcaInvalidIntegrityLayerInfoOffset); - } - storage_info.SetDataStorage(std::make_shared( - std::move(base_storage), layer_info.size, last_layer_info_offset)); - - // Make the integrity romfs storage. - auto integrity_storage = std::make_shared(); - R_UNLESS(integrity_storage != nullptr, ResultAllocationMemoryFailedAllocateShared); - - // Initialize the integrity storage. - R_TRY(integrity_storage->Initialize(level_hash_info, meta_info.master_hash, storage_info, - max_data_cache_entries, max_hash_cache_entries, - buffer_level)); - - // Set the output. - *out = std::move(integrity_storage); - R_SUCCEED(); } Result NcaFileSystemDriver::CreateRegionSwitchStorage(VirtualFile* out, diff --git a/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.h b/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.h index 5bc838de64..e09bfc588a 100644 --- a/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.h +++ b/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -329,6 +332,10 @@ private: const NcaPatchInfo& patch_info, const NcaMetaDataHashDataInfo& meta_data_hash_data_info); + + Result CreateSha3Storage(VirtualFile* out, VirtualFile base_storage, + const NcaFsHeader::HashData::HierarchicalSha256Data& hash_data); + Result CreateSha256Storage(VirtualFile* out, VirtualFile base_storage, const NcaFsHeader::HashData::HierarchicalSha256Data& sha256_data); diff --git a/src/core/file_sys/fssystem/fssystem_nca_header.cpp b/src/core/file_sys/fssystem/fssystem_nca_header.cpp index bf5742d39f..cef0f0bb94 100644 --- a/src/core/file_sys/fssystem/fssystem_nca_header.cpp +++ b/src/core/file_sys/fssystem/fssystem_nca_header.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -10,11 +13,13 @@ u8 NcaHeader::GetProperKeyGeneration() const { } bool NcaPatchInfo::HasIndirectTable() const { - return this->indirect_size != 0; + static constexpr unsigned char BKTR[4] = {'B', 'K', 'T', 'R'}; + return std::memcmp(indirect_header.data(), BKTR, sizeof(BKTR)) == 0; } bool NcaPatchInfo::HasAesCtrExTable() const { - return this->aes_ctr_ex_size != 0; + static constexpr unsigned char BKTR[4] = {'B', 'K', 'T', 'R'}; + return std::memcmp(aes_ctr_ex_header.data(), BKTR, sizeof(BKTR)) == 0; } } // namespace FileSys diff --git a/src/core/file_sys/fssystem/fssystem_passthrough_storage.h b/src/core/file_sys/fssystem/fssystem_passthrough_storage.h new file mode 100644 index 0000000000..8fc6f4962a --- /dev/null +++ b/src/core/file_sys/fssystem/fssystem_passthrough_storage.h @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once +#include "core/file_sys/fssystem/fs_i_storage.h" +#include "core/file_sys/vfs/vfs.h" + +namespace FileSys { + +//TODO: No integrity verification. +class PassthroughStorage final : public IReadOnlyStorage { + YUZU_NON_COPYABLE(PassthroughStorage); + YUZU_NON_MOVEABLE(PassthroughStorage); + +public: + explicit PassthroughStorage(VirtualFile base) : base_(std::move(base)) {} + ~PassthroughStorage() override = default; + + size_t Read(u8* buffer, size_t size, size_t offset) const override { + if (!base_ || size == 0) + return 0; + return base_->Read(buffer, size, offset); + } + size_t GetSize() const override { + return base_ ? base_->GetSize() : 0; + } + +private: + VirtualFile base_{}; +}; + +} // namespace FileSys diff --git a/src/core/hle/service/am/applet.cpp b/src/core/hle/service/am/applet.cpp index 59ade29c8e..aa355b06d5 100644 --- a/src/core/hle/service/am/applet.cpp +++ b/src/core/hle/service/am/applet.cpp @@ -1,5 +1,8 @@ -// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator +// Project// SPDX-License-Identifier: GPL-2.0-or-later #include "core/core.h" #include "core/hle/service/am/applet.h" @@ -12,7 +15,7 @@ Applet::Applet(Core::System& system, std::unique_ptr process_, bool is_ process(std::move(process_)), hid_registration(system, *process), gpu_error_detected_event(context), friend_invitation_storage_channel_event(context), notification_storage_channel_event(context), health_warning_disappeared_system_event(context), - acquired_sleep_lock_event(context), pop_from_general_channel_event(context), + unknown_event(context), acquired_sleep_lock_event(context), pop_from_general_channel_event(context), library_applet_launchable_event(context), accumulated_suspended_tick_changed_event(context), sleep_lock_event(context), state_changed_event(context) { diff --git a/src/core/hle/service/am/applet.h b/src/core/hle/service/am/applet.h index 835cfe6ec8..6cc8cdf741 100644 --- a/src/core/hle/service/am/applet.h +++ b/src/core/hle/service/am/applet.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -120,6 +123,7 @@ struct Applet { Event friend_invitation_storage_channel_event; Event notification_storage_channel_event; Event health_warning_disappeared_system_event; + Event unknown_event; Event acquired_sleep_lock_event; Event pop_from_general_channel_event; Event library_applet_launchable_event; diff --git a/src/core/hle/service/am/service/all_system_applet_proxies_service.cpp b/src/core/hle/service/am/service/all_system_applet_proxies_service.cpp index 5a787494a8..d99482a45c 100644 --- a/src/core/hle/service/am/service/all_system_applet_proxies_service.cpp +++ b/src/core/hle/service/am/service/all_system_applet_proxies_service.cpp @@ -18,6 +18,7 @@ IAllSystemAppletProxiesService::IAllSystemAppletProxiesService(Core::System& sys // clang-format off static const FunctionInfo functions[] = { {100, D<&IAllSystemAppletProxiesService::OpenSystemAppletProxy>, "OpenSystemAppletProxy"}, + {110, D<&IAllSystemAppletProxiesService::OpenSystemAppletProxy>, "OpenSystemAppletProxyEx"}, {200, D<&IAllSystemAppletProxiesService::OpenLibraryAppletProxyOld>, "OpenLibraryAppletProxyOld"}, {201, D<&IAllSystemAppletProxiesService::OpenLibraryAppletProxy>, "OpenLibraryAppletProxy"}, {300, nullptr, "OpenOverlayAppletProxy"}, @@ -25,6 +26,7 @@ IAllSystemAppletProxiesService::IAllSystemAppletProxiesService(Core::System& sys {400, nullptr, "CreateSelfLibraryAppletCreatorForDevelop"}, {410, nullptr, "GetSystemAppletControllerForDebug"}, {450, D<&IAllSystemAppletProxiesService::GetSystemProcessCommonFunctions>, "GetSystemProcessCommonFunctions"}, // 19.0.0+ + {460, D<&IAllSystemAppletProxiesService::GetAppletAlternativeFunctions>, "GetAppletAlternativeFunctions"}, // 20.0.0+ {1000, nullptr, "GetDebugFunctions"}, }; // clang-format on @@ -99,6 +101,14 @@ Result IAllSystemAppletProxiesService::GetSystemProcessCommonFunctions() { R_SUCCEED(); } +Result IAllSystemAppletProxiesService::GetAppletAlternativeFunctions() { + LOG_DEBUG(Service_AM, "(STUBBED) called."); + + // TODO (maufeat) + + R_SUCCEED(); +} + std::shared_ptr IAllSystemAppletProxiesService::GetAppletFromProcessId( ProcessId process_id) { return m_window_system.GetByAppletResourceUserId(process_id.pid); diff --git a/src/core/hle/service/am/service/all_system_applet_proxies_service.h b/src/core/hle/service/am/service/all_system_applet_proxies_service.h index a3111c4c9b..525525c795 100644 --- a/src/core/hle/service/am/service/all_system_applet_proxies_service.h +++ b/src/core/hle/service/am/service/all_system_applet_proxies_service.h @@ -39,6 +39,7 @@ private: InCopyHandle process_handle, InLargeData attribute); Result GetSystemProcessCommonFunctions(); + Result GetAppletAlternativeFunctions(); private: std::shared_ptr GetAppletFromProcessId(ProcessId pid); diff --git a/src/core/hle/service/am/service/applet_common_functions.cpp b/src/core/hle/service/am/service/applet_common_functions.cpp index 1c9cd74533..6a73a896f9 100644 --- a/src/core/hle/service/am/service/applet_common_functions.cpp +++ b/src/core/hle/service/am/service/applet_common_functions.cpp @@ -35,6 +35,7 @@ IAppletCommonFunctions::IAppletCommonFunctions(Core::System& system_, {310, nullptr, "IsSystemAppletHomeMenu"}, //19.0.0+ {320, nullptr, "SetGpuTimeSliceBoost"}, //19.0.0+ {321, nullptr, "SetGpuTimeSliceBoostDueToApplication"}, //19.0.0+ + {350, D<&IAppletCommonFunctions::Unknown350>, "Unknown350"} //20.0.0+ }; // clang-format on @@ -70,4 +71,10 @@ Result IAppletCommonFunctions::GetCurrentApplicationId(Out out_application_ R_SUCCEED(); } +Result IAppletCommonFunctions::Unknown350(Out out_unknown) { + LOG_WARNING(Service_AM, "(STUBBED) called"); + *out_unknown = 0; + R_SUCCEED(); +} + } // namespace Service::AM diff --git a/src/core/hle/service/am/service/applet_common_functions.h b/src/core/hle/service/am/service/applet_common_functions.h index 376f85acf7..623efdb7fc 100644 --- a/src/core/hle/service/am/service/applet_common_functions.h +++ b/src/core/hle/service/am/service/applet_common_functions.h @@ -20,6 +20,7 @@ private: Result GetHomeButtonDoubleClickEnabled(Out out_home_button_double_click_enabled); Result SetCpuBoostRequestPriority(s32 priority); Result GetCurrentApplicationId(Out out_application_id); + Result Unknown350(Out out_unknown); const std::shared_ptr applet; }; diff --git a/src/core/hle/service/am/service/application_functions.cpp b/src/core/hle/service/am/service/application_functions.cpp index 560244c714..b736e2821b 100644 --- a/src/core/hle/service/am/service/application_functions.cpp +++ b/src/core/hle/service/am/service/application_functions.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -85,6 +88,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_, std::shared_ {181, nullptr, "UpgradeLaunchRequiredVersion"}, {190, nullptr, "SendServerMaintenanceOverlayNotification"}, {200, nullptr, "GetLastApplicationExitReason"}, + {210, D<&IApplicationFunctions::GetUnknownEvent210>, "Unknown210"}, {500, nullptr, "StartContinuousRecordingFlushForDebug"}, {1000, nullptr, "CreateMovieMaker"}, {1001, D<&IApplicationFunctions::PrepareForJit>, "PrepareForJit"}, @@ -487,6 +491,13 @@ Result IApplicationFunctions::GetHealthWarningDisappearedSystemEvent( R_SUCCEED(); } +Result IApplicationFunctions::GetUnknownEvent210( + OutCopyHandle out_event) { + LOG_DEBUG(Service_AM, "called"); + *out_event = m_applet->unknown_event.GetHandle(); + R_SUCCEED(); +} + Result IApplicationFunctions::PrepareForJit() { LOG_WARNING(Service_AM, "(STUBBED) called"); diff --git a/src/core/hle/service/am/service/application_functions.h b/src/core/hle/service/am/service/application_functions.h index 10025a152b..35b3e9505d 100644 --- a/src/core/hle/service/am/service/application_functions.h +++ b/src/core/hle/service/am/service/application_functions.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -76,6 +79,7 @@ private: Result TryPopFromFriendInvitationStorageChannel(Out> out_storage); Result GetNotificationStorageChannelEvent(OutCopyHandle out_event); Result GetHealthWarningDisappearedSystemEvent(OutCopyHandle out_event); + Result GetUnknownEvent210(OutCopyHandle out_event); Result PrepareForJit(); const std::shared_ptr m_applet; diff --git a/src/core/hle/service/am/service/library_applet_creator.cpp b/src/core/hle/service/am/service/library_applet_creator.cpp index 413388d40a..54790838e0 100644 --- a/src/core/hle/service/am/service/library_applet_creator.cpp +++ b/src/core/hle/service/am/service/library_applet_creator.cpp @@ -113,9 +113,11 @@ std::shared_ptr CreateGuestApplet(Core::System& system, Firmware1700 = 17, Firmware1800 = 18, Firmware1900 = 19, + Firmware2000 = 20, + Firmware2100 = 21, }; - auto process = CreateProcess(system, program_id, Firmware1400, Firmware1900); + auto process = CreateProcess(system, program_id, Firmware1400, Firmware2100); if (!process) { // Couldn't initialize the guest process return {}; diff --git a/src/core/hle/service/ns/application_manager_interface.cpp b/src/core/hle/service/ns/application_manager_interface.cpp index f1ddba8231..517ec75743 100644 --- a/src/core/hle/service/ns/application_manager_interface.cpp +++ b/src/core/hle/service/ns/application_manager_interface.cpp @@ -306,6 +306,9 @@ IApplicationManagerInterface::IApplicationManagerInterface(Core::System& system_ {3013, nullptr, "IsGameCardEnabled"}, {3014, nullptr, "IsLocalContentShareEnabled"}, {3050, nullptr, "ListAssignELicenseTaskResult"}, + {4022, D<&IApplicationManagerInterface::Unknown4022>, "Unknown4022"}, + {4023, D<&IApplicationManagerInterface::Unknown4023>, "Unknown4023"}, + {4088, D<&IApplicationManagerInterface::Unknown4022>, "Unknown4088"}, {9999, nullptr, "GetApplicationCertificate"}, }; // clang-format on @@ -523,4 +526,17 @@ Result IApplicationManagerInterface::GetApplicationTerminateResult(Out o R_SUCCEED(); } +Result IApplicationManagerInterface::Unknown4022( + OutCopyHandle out_event) { + LOG_WARNING(Service_NS, "(STUBBED) called"); + *out_event = gamecard_update_detection_event.GetHandle(); + R_SUCCEED(); +} + +Result IApplicationManagerInterface::Unknown4023(Out out_result) { + LOG_WARNING(Service_NS, "(STUBBED) called."); + *out_result = 0; + R_SUCCEED(); +} + } // namespace Service::NS diff --git a/src/core/hle/service/ns/application_manager_interface.h b/src/core/hle/service/ns/application_manager_interface.h index 2def50bd5c..251f93ee06 100644 --- a/src/core/hle/service/ns/application_manager_interface.h +++ b/src/core/hle/service/ns/application_manager_interface.h @@ -53,6 +53,8 @@ public: u64 application_id); Result CheckApplicationLaunchVersion(u64 application_id); Result GetApplicationTerminateResult(Out out_result, u64 application_id); + Result Unknown4022(OutCopyHandle out_event); + Result Unknown4023(Out out_result); private: KernelHelpers::ServiceContext service_context; diff --git a/src/core/hle/service/pctl/parental_control_service.cpp b/src/core/hle/service/pctl/parental_control_service.cpp index 1d990e66d7..82c65ac1fd 100644 --- a/src/core/hle/service/pctl/parental_control_service.cpp +++ b/src/core/hle/service/pctl/parental_control_service.cpp @@ -80,11 +80,12 @@ IParentalControlService::IParentalControlService(Core::System& system_, Capabili {1451, D<&IParentalControlService::StartPlayTimer>, "StartPlayTimer"}, {1452, D<&IParentalControlService::StopPlayTimer>, "StopPlayTimer"}, {1453, D<&IParentalControlService::IsPlayTimerEnabled>, "IsPlayTimerEnabled"}, - {1454, nullptr, "GetPlayTimerRemainingTime"}, + {1454, D<&IParentalControlService::GetPlayTimerRemainingTime>, "GetPlayTimerRemainingTime"}, {1455, D<&IParentalControlService::IsRestrictedByPlayTimer>, "IsRestrictedByPlayTimer"}, {1456, D<&IParentalControlService::GetPlayTimerSettingsOld>, "GetPlayTimerSettingsOld"}, {1457, D<&IParentalControlService::GetPlayTimerEventToRequestSuspension>, "GetPlayTimerEventToRequestSuspension"}, {1458, D<&IParentalControlService::IsPlayTimerAlarmDisabled>, "IsPlayTimerAlarmDisabled"}, + {1459, D<&IParentalControlService::GetPlayTimerRemainingTimeDisplayInfo>, "GetPlayTimerRemainingTimeDisplayInfo"}, {1471, nullptr, "NotifyWrongPinCodeInputManyTimes"}, {1472, nullptr, "CancelNetworkRequest"}, {1473, D<&IParentalControlService::GetUnlinkedEvent>, "GetUnlinkedEvent"}, @@ -378,6 +379,12 @@ Result IParentalControlService::IsPlayTimerEnabled(Out out_is_play_timer_e R_SUCCEED(); } +Result IParentalControlService::GetPlayTimerRemainingTime(Out out_remaining_time) { + LOG_WARNING(Service_PCTL, "(STUBBED) called"); + *out_remaining_time = std::numeric_limits::max(); + R_SUCCEED(); +} + Result IParentalControlService::IsRestrictedByPlayTimer(Out out_is_restricted_by_play_timer) { *out_is_restricted_by_play_timer = false; LOG_WARNING(Service_PCTL, "(STUBBED) called, restricted={}", *out_is_restricted_by_play_timer); @@ -412,6 +419,11 @@ Result IParentalControlService::IsPlayTimerAlarmDisabled(Out out_play_time R_SUCCEED(); } +Result IParentalControlService::GetPlayTimerRemainingTimeDisplayInfo(/* Out 0x18 */) { + LOG_INFO(Service_PCTL, "called"); + R_SUCCEED(); +} + Result IParentalControlService::GetUnlinkedEvent(OutCopyHandle out_event) { LOG_INFO(Service_PCTL, "called"); *out_event = unlinked_event.GetHandle(); diff --git a/src/core/hle/service/pctl/parental_control_service.h b/src/core/hle/service/pctl/parental_control_service.h index 1b1884c4de..9d143fe2e2 100644 --- a/src/core/hle/service/pctl/parental_control_service.h +++ b/src/core/hle/service/pctl/parental_control_service.h @@ -49,10 +49,12 @@ private: Result StartPlayTimer(); Result StopPlayTimer(); Result IsPlayTimerEnabled(Out out_is_play_timer_enabled); + Result GetPlayTimerRemainingTime(Out out_remaining_time); Result IsRestrictedByPlayTimer(Out out_is_restricted_by_play_timer); Result GetPlayTimerSettingsOld(Out out_play_timer_settings); Result GetPlayTimerEventToRequestSuspension(OutCopyHandle out_event); Result IsPlayTimerAlarmDisabled(Out out_play_timer_alarm_disabled); + Result GetPlayTimerRemainingTimeDisplayInfo(); Result GetUnlinkedEvent(OutCopyHandle out_event); Result GetStereoVisionRestriction(Out out_stereo_vision_restriction); Result SetStereoVisionRestriction(bool stereo_vision_restriction); diff --git a/src/yuzu/applets/qt_web_browser.cpp b/src/yuzu/applets/qt_web_browser.cpp index 8245c12ba2..a287ea16df 100644 --- a/src/yuzu/applets/qt_web_browser.cpp +++ b/src/yuzu/applets/qt_web_browser.cpp @@ -17,15 +17,16 @@ #include "yuzu/applets/qt_web_browser_scripts.h" #endif +#include "yuzu/applets/qt_web_browser.h" +#include "yuzu/main.h" + +#ifdef YUZU_USE_QT_WEB_ENGINE + #include "common/fs/path_util.h" #include "core/core.h" #include "input_common/drivers/keyboard.h" -#include "yuzu/applets/qt_web_browser.h" -#include "yuzu/main.h" #include "yuzu/util/url_request_interceptor.h" -#ifdef YUZU_USE_QT_WEB_ENGINE - namespace { constexpr int HIDButtonToKey(Core::HID::NpadButton button) { diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp index fca4c94893..1137145659 100644 --- a/src/yuzu/configuration/shared_translation.cpp +++ b/src/yuzu/configuration/shared_translation.cpp @@ -409,6 +409,12 @@ std::unique_ptr InitializeTranslations(QWidget* parent) "their resolution, details and supported controllers and depending on this setting.\n" "Setting to Handheld can help improve performance for low end systems.")); INSERT(Settings, current_user, QString(), QString()); + INSERT(Settings, disable_nca_verification, tr("Disable NCA Verification"), + tr("Disables integrity verification of NCA content archives." + "\nThis may improve loading speed but risks data corruption or invalid files going " + "undetected.\n" + "Is necessary to make games and updates work that needs firmware 20+.")); + INSERT(Settings, hide_nca_verification_popup, QString(), QString()); // Controls diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 4c6b176c56..4604a7b904 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -2036,6 +2036,10 @@ bool GMainWindow::LoadROM(const QString& filename, Service::AM::FrontendAppletPa } } + if (!OnCheckNcaVerification()) { + return false; + } + /** Exec */ const Core::SystemResultStatus result{ system->Load(*render_window, filename.toStdString(), params)}; @@ -5265,6 +5269,41 @@ void GMainWindow::OnCheckFirmwareDecryption() { UpdateMenuState(); } +bool GMainWindow::OnCheckNcaVerification() { + if (!Settings::values.disable_nca_verification.GetValue()) + return true; + + const bool currently_hidden = Settings::values.hide_nca_verification_popup.GetValue(); + LOG_INFO(Frontend, "NCA Verification is disabled. Popup State={}", currently_hidden); + if (currently_hidden) + return true; + + QMessageBox msgbox(this); + msgbox.setWindowTitle(tr("NCA Verification Disabled")); + msgbox.setText(tr("NCA Verification is disabled.\n" + "This is required to run new games and updates.\n" + "Running without verification can cause instability or crashes if NCA files " + "are corrupt, modified, or tampered.\n" + "If unsure, re-enable verification in Eden's Settings and use firmware " + "version 19.0.1 or below.")); + msgbox.setIcon(QMessageBox::Warning); + msgbox.setStandardButtons(QMessageBox::Ok | QMessageBox::Cancel); + msgbox.setDefaultButton(QMessageBox::Ok); + + QCheckBox* cb = new QCheckBox(tr("Don't show again"), &msgbox); + cb->setChecked(currently_hidden); + msgbox.setCheckBox(cb); + + int result = msgbox.exec(); + + const bool hide = cb->isChecked(); + if (hide != currently_hidden) { + Settings::values.hide_nca_verification_popup.SetValue(hide); + } + + return result == static_cast(QMessageBox::Ok); +} + bool GMainWindow::CheckFirmwarePresence() { return FirmwareManager::CheckFirmwarePresence(*system.get()); } @@ -5285,7 +5324,7 @@ void GMainWindow::SetFirmwareVersion() { const std::string display_version(firmware_data.display_version.data()); const std::string display_title(firmware_data.display_title.data()); - LOG_INFO(Frontend, "Installed firmware: {}", display_title); + LOG_INFO(Frontend, "Installed firmware: {}", display_version); firmware_label->setText(QString::fromStdString(display_version)); firmware_label->setToolTip(QString::fromStdString(display_title)); diff --git a/src/yuzu/main.h b/src/yuzu/main.h index b1c5669a41..7857788fcf 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -485,6 +485,9 @@ private: const std::filesystem::path& command, const std::string& arguments, const std::string& categories, const std::string& keywords, const std::string& name); + + bool OnCheckNcaVerification(); + /** * Mimic the behavior of QMessageBox::question but link controller navigation to the dialog * The only difference is that it returns a boolean. From 211fefde20d4acbc8aafebbfc6b7f2fd434b88ff Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:54:53 +0000 Subject: [PATCH 10/17] [dynarmic] reduce opt pass latency Signed-off-by: lizzie --- src/dynarmic/CMakeLists.txt | 3 - src/dynarmic/docs/RegisterAllocator.md | 40 +- .../docs/ReturnStackBufferOptimization.md | 162 +- src/dynarmic/src/dynarmic/CMakeLists.txt | 199 +-- .../backend/arm64/a32_address_space.cpp | 18 +- .../backend/arm64/a64_address_space.cpp | 19 +- .../backend/riscv64/a32_address_space.cpp | 16 +- .../dynarmic/backend/x64/a32_interface.cpp | 16 +- .../dynarmic/backend/x64/a64_interface.cpp | 18 +- .../src/dynarmic/common/memory_pool.cpp | 13 - .../src/dynarmic/common/memory_pool.h | 61 - src/dynarmic/src/dynarmic/ir/basic_block.cpp | 7 +- src/dynarmic/src/dynarmic/ir/basic_block.h | 3 - src/dynarmic/src/dynarmic/ir/ir_emitter.cpp | 21 - .../ir/opt/a32_constant_memory_reads_pass.cpp | 70 - .../ir/opt/a32_get_set_elimination_pass.cpp | 382 ----- .../ir/opt/a64_callback_config_pass.cpp | 57 - .../ir/opt/a64_get_set_elimination_pass.cpp | 165 -- .../ir/opt/a64_merge_interpret_blocks.cpp | 57 - .../ir/opt/constant_propagation_pass.cpp | 559 ------ .../ir/opt/dead_code_elimination_pass.cpp | 23 - .../dynarmic/ir/opt/identity_removal_pass.cpp | 44 - src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h | 127 -- .../src/dynarmic/ir/opt/naming_pass.cpp | 18 - src/dynarmic/src/dynarmic/ir/opt/passes.h | 47 - .../src/dynarmic/ir/opt/polyfill_pass.cpp | 218 --- .../src/dynarmic/ir/opt/verification_pass.cpp | 51 - src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 1502 +++++++++++++++++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 34 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 10 +- src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 2 +- src/dynarmic/tests/CMakeLists.txt | 146 +- src/dynarmic/tests/print_info.cpp | 2 +- 33 files changed, 1808 insertions(+), 2302 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.cpp delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.h delete mode 100644 src/dynarmic/src/dynarmic/ir/ir_emitter.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/passes.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.h diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 5c28435f72..471a08c703 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -23,9 +23,6 @@ option(DYNARMIC_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON) option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF) option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF) option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT}) -if (NOT DEFINED DYNARMIC_FRONTENDS) - set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable") -endif() # Default to a Release build if (NOT CMAKE_BUILD_TYPE) diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index fea6f19e6a..5a7210c791 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -16,19 +16,31 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun The member functions on `RegAlloc` are just a combination of the above concepts. +The following registers are reserved for internal use and should NOT participate in register allocation: +- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. +- `%rsp`: Stack pointer. + +The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. + +Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration. + ### `Scratch` - Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) - Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) +```c++ +Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr); +Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm); +``` At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope. ### Pure `Use` - Xbyak::Reg64 UseGpr(Argument& arg); - Xbyak::Xmm UseXmm(Argument& arg); - OpArg UseOpArg(Argument& arg); - void Use(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseGpr(Argument& arg); +Xbyak::Xmm UseXmm(Argument& arg); +OpArg UseOpArg(Argument& arg); +void Use(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it @@ -39,9 +51,11 @@ This register **must not** have it's value changed. ### `UseScratch` - Xbyak::Reg64 UseScratchGpr(Argument& arg); - Xbyak::Xmm UseScratchXmm(Argument& arg); - void UseScratch(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseScratchGpr(Argument& arg); +Xbyak::Xmm UseScratchXmm(Argument& arg); +void UseScratch(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it @@ -55,7 +69,9 @@ You are free to modify the value in the register. The register is discarded at t A `Define` is the defintion of a value. This is the only time when a value may be set. - void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +```c++ +void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +``` By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the value to the specified register `reg`. @@ -64,7 +80,9 @@ value to the specified register `reg`. Adding a `Define` to an existing value. - void DefineValue(IR::Inst* inst, Argument& arg); +```c++ +void DefineValue(IR::Inst* inst, Argument& arg); +``` You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are emitted. diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index 6ffe41bcc6..0e72c3bce8 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -23,15 +23,17 @@ One complication dynarmic has is that a compiled block is not uniquely identifia the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block. - u64 LocationDescriptor::UniqueHash() const { - // This value MUST BE UNIQUE. - // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint - u64 pc_u64 = u64(arm_pc) << 32; - u64 fpscr_u64 = u64(fpscr.Value()); - u64 t_u64 = cpsr.T() ? 1 : 0; - u64 e_u64 = cpsr.E() ? 2 : 0; - return pc_u64 | fpscr_u64 | t_u64 | e_u64; - } +```c++ +u64 LocationDescriptor::UniqueHash() const { + // This value MUST BE UNIQUE. + // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint + u64 pc_u64 = u64(arm_pc) << 32; + u64 fpscr_u64 = u64(fpscr.Value()); + u64 t_u64 = cpsr.T() ? 1 : 0; + u64 e_u64 = cpsr.E() ? 2 : 0; + return pc_u64 | fpscr_u64 | t_u64 | e_u64; +} +``` ## Our implementation isn't actually a stack @@ -49,97 +51,107 @@ host addresses for the corresponding the compiled blocks. size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8 showed degraded performance. - struct JitState { - // ... +```c++ +struct JitState { + // ... - static constexpr size_t RSBSize = 8; // MUST be a power of 2. - u32 rsb_ptr = 0; - std::array rsb_location_descriptors; - std::array rsb_codeptrs; - void ResetRSB(); + static constexpr size_t RSBSize = 8; // MUST be a power of 2. + u32 rsb_ptr = 0; + std::array rsb_location_descriptors; + std::array rsb_codeptrs; + void ResetRSB(); - // ... - }; + // ... +}; +``` ### RSB Push We insert our prediction at the insertion point iff the RSB doesn't already contain a prediction with the same `UniqueHash`. - void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { + using namespace Xbyak::util; - ASSERT(inst->GetArg(0).IsImmediate()); - u64 imm64 = inst->GetArg(0).GetU64(); + ASSERT(inst->GetArg(0).IsImmediate()); + u64 imm64 = inst->GetArg(0).GetU64(); - Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); - Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); - Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); - u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() - ? u64(unique_hash_to_code_ptr[imm64]) - : u64(code->GetReturnFromRunCodeAddress()); + Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); + Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); + Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); + u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() + ? u64(unique_hash_to_code_ptr[imm64]) + : u64(code->GetReturnFromRunCodeAddress()); - code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); - code->add(index_reg, 1); - code->and_(index_reg, u32(JitState::RSBSize - 1)); + code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); + code->add(index_reg, 1); + code->and_(index_reg, u32(JitState::RSBSize - 1)); - code->mov(loc_desc_reg, u64(imm64)); - CodePtr patch_location = code->getCurr(); - patch_unique_hash_locations[imm64].emplace_back(patch_location); - code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. - code->EnsurePatchLocationSize(patch_location, 10); + code->mov(loc_desc_reg, u64(imm64)); + CodePtr patch_location = code->getCurr(); + patch_unique_hash_locations[imm64].emplace_back(patch_location); + code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. + code->EnsurePatchLocationSize(patch_location, 10); - Xbyak::Label label; - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->je(label, code->T_SHORT); - } - - code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); - code->L(label); + Xbyak::Label label; + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->je(label, code->T_SHORT); } + code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); + code->L(label); +} +``` + In pseudocode: - for (i := 0 .. RSBSize-1) - if (rsb_location_descriptors[i] == imm64) - goto label; - rsb_ptr++; - rsb_ptr %= RSBSize; - rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash - rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; - label: +```c++ + for (i := 0 .. RSBSize-1) + if (rsb_location_descriptors[i] == imm64) + goto label; + rsb_ptr++; + rsb_ptr %= RSBSize; + rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash + rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; +label: +``` ## RSB Pop To check if a predicition is in the RSB, we linearly scan the RSB. - void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { + using namespace Xbyak::util; - // This calculation has to match up with IREmitter::PushRSB - code->mov(ecx, MJitStateReg(Arm::Reg::PC)); - code->shl(rcx, 32); - code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); - code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); - code->or_(rbx, rcx); + // This calculation has to match up with IREmitter::PushRSB + code->mov(ecx, MJitStateReg(Arm::Reg::PC)); + code->shl(rcx, 32); + code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); + code->or_(rbx, rcx); - code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); - } - - code->jmp(rax); + code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); } + code->jmp(rax); +} +``` + In pseudocode: - rbx := ComputeUniqueHash() - rax := ReturnToDispatch - for (i := 0 .. RSBSize-1) - if (rbx == rsb_location_descriptors[i]) - rax = rsb_codeptrs[i] - goto rax \ No newline at end of file +```c++ +rbx := ComputeUniqueHash() +rax := ReturnToDispatch +for (i := 0 .. RSBSize-1) + if (rbx == rsb_location_descriptors[i]) + rax = rsb_codeptrs[i] +goto rax +``` diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index e060989f82..6948bf39fb 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -56,8 +56,6 @@ add_library(dynarmic common/lut_from_list.h common/math_util.cpp common/math_util.h - common/memory_pool.cpp - common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h @@ -78,7 +76,6 @@ add_library(dynarmic ir/basic_block.cpp ir/basic_block.h ir/cond.h - ir/ir_emitter.cpp ir/ir_emitter.h ir/location_descriptor.cpp ir/location_descriptor.h @@ -87,78 +84,59 @@ add_library(dynarmic ir/opcodes.cpp ir/opcodes.h ir/opcodes.inc - ir/opt/constant_propagation_pass.cpp - ir/opt/dead_code_elimination_pass.cpp - ir/opt/identity_removal_pass.cpp - ir/opt/ir_matcher.h - ir/opt/naming_pass.cpp - ir/opt/passes.h - ir/opt/polyfill_pass.cpp - ir/opt/verification_pass.cpp + ir/opt_passes.cpp + ir/opt_passes.h ir/terminal.h ir/type.cpp ir/type.h ir/value.cpp ir/value.h + # A32 + frontend/A32/a32_ir_emitter.cpp + frontend/A32/a32_ir_emitter.h + frontend/A32/a32_location_descriptor.cpp + frontend/A32/a32_location_descriptor.h + frontend/A32/decoder/arm.h + frontend/A32/decoder/arm.inc + frontend/A32/decoder/asimd.h + frontend/A32/decoder/asimd.inc + frontend/A32/decoder/thumb16.h + frontend/A32/decoder/thumb16.inc + frontend/A32/decoder/thumb32.h + frontend/A32/decoder/thumb32.inc + frontend/A32/decoder/vfp.h + frontend/A32/decoder/vfp.inc + frontend/A32/disassembler/disassembler.h + frontend/A32/disassembler/disassembler_arm.cpp + frontend/A32/disassembler/disassembler_thumb.cpp + frontend/A32/FPSCR.h + frontend/A32/ITState.h + frontend/A32/PSR.h + frontend/A32/translate/a32_translate.cpp + frontend/A32/translate/a32_translate.h + frontend/A32/translate/conditional_state.cpp + frontend/A32/translate/conditional_state.h + frontend/A32/translate/translate_arm.cpp + frontend/A32/translate/translate_thumb.cpp + interface/A32/a32.h + interface/A32/arch_version.h + interface/A32/config.h + interface/A32/coprocessor.h + interface/A32/coprocessor_util.h + interface/A32/disassembler.h + # A64 + frontend/A64/a64_ir_emitter.cpp + frontend/A64/a64_ir_emitter.h + frontend/A64/a64_location_descriptor.cpp + frontend/A64/a64_location_descriptor.h + frontend/A64/decoder/a64.h + frontend/A64/decoder/a64.inc + frontend/A64/translate/a64_translate.cpp + frontend/A64/translate/a64_translate.h + interface/A64/a64.h + interface/A64/config.h ) -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A32/a32_ir_emitter.cpp - frontend/A32/a32_ir_emitter.h - frontend/A32/a32_location_descriptor.cpp - frontend/A32/a32_location_descriptor.h - frontend/A32/decoder/arm.h - frontend/A32/decoder/arm.inc - frontend/A32/decoder/asimd.h - frontend/A32/decoder/asimd.inc - frontend/A32/decoder/thumb16.h - frontend/A32/decoder/thumb16.inc - frontend/A32/decoder/thumb32.h - frontend/A32/decoder/thumb32.inc - frontend/A32/decoder/vfp.h - frontend/A32/decoder/vfp.inc - frontend/A32/disassembler/disassembler.h - frontend/A32/disassembler/disassembler_arm.cpp - frontend/A32/disassembler/disassembler_thumb.cpp - frontend/A32/FPSCR.h - frontend/A32/ITState.h - frontend/A32/PSR.h - frontend/A32/translate/a32_translate.cpp - frontend/A32/translate/a32_translate.h - frontend/A32/translate/conditional_state.cpp - frontend/A32/translate/conditional_state.h - frontend/A32/translate/translate_arm.cpp - frontend/A32/translate/translate_thumb.cpp - interface/A32/a32.h - interface/A32/arch_version.h - interface/A32/config.h - interface/A32/coprocessor.h - interface/A32/coprocessor_util.h - interface/A32/disassembler.h - ir/opt/a32_constant_memory_reads_pass.cpp - ir/opt/a32_get_set_elimination_pass.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A64/a64_ir_emitter.cpp - frontend/A64/a64_ir_emitter.h - frontend/A64/a64_location_descriptor.cpp - frontend/A64/a64_location_descriptor.h - frontend/A64/decoder/a64.h - frontend/A64/decoder/a64.inc - frontend/A64/translate/a64_translate.cpp - frontend/A64/translate/a64_translate.h - interface/A64/a64.h - interface/A64/config.h - ir/opt/a64_callback_config_pass.cpp - ir/opt/a64_get_set_elimination_pass.cpp - ir/opt/a64_merge_interpret_blocks.cpp - ) -endif() - if ("x86_64" IN_LIST ARCHITECTURE) target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1) target_link_libraries(dynarmic @@ -211,29 +189,21 @@ if ("x86_64" IN_LIST ARCHITECTURE) common/spin_lock_x64.h common/x64_disassemble.cpp common/x64_disassemble.h + # A32 + backend/x64/a32_emit_x64.cpp + backend/x64/a32_emit_x64.h + backend/x64/a32_emit_x64_memory.cpp + backend/x64/a32_interface.cpp + backend/x64/a32_jitstate.cpp + backend/x64/a32_jitstate.h + # A64 + backend/x64/a64_emit_x64.cpp + backend/x64/a64_emit_x64.h + backend/x64/a64_emit_x64_memory.cpp + backend/x64/a64_interface.cpp + backend/x64/a64_jitstate.cpp + backend/x64/a64_jitstate.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a32_emit_x64.cpp - backend/x64/a32_emit_x64.h - backend/x64/a32_emit_x64_memory.cpp - backend/x64/a32_interface.cpp - backend/x64/a32_jitstate.cpp - backend/x64/a32_jitstate.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a64_emit_x64.cpp - backend/x64/a64_emit_x64.h - backend/x64/a64_emit_x64_memory.cpp - backend/x64/a64_interface.cpp - backend/x64/a64_jitstate.cpp - backend/x64/a64_jitstate.h - ) - endif() endif() if ("arm64" IN_LIST ARCHITECTURE) @@ -277,25 +247,17 @@ if ("arm64" IN_LIST ARCHITECTURE) backend/arm64/verbose_debugging_output.h common/spin_lock_arm64.cpp common/spin_lock_arm64.h + # A32 + backend/arm64/a32_address_space.cpp + backend/arm64/a32_address_space.h + backend/arm64/a32_core.h + backend/arm64/a32_interface.cpp + # A64 + backend/arm64/a64_address_space.cpp + backend/arm64/a64_address_space.h + backend/arm64/a64_core.h + backend/arm64/a64_interface.cpp ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a32_address_space.cpp - backend/arm64/a32_address_space.h - backend/arm64/a32_core.h - backend/arm64/a32_interface.cpp - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a64_address_space.cpp - backend/arm64/a64_address_space.h - backend/arm64/a64_core.h - backend/arm64/a64_interface.cpp - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -324,21 +286,14 @@ if ("riscv" IN_LIST ARCHITECTURE) backend/riscv64/reg_alloc.cpp backend/riscv64/reg_alloc.h backend/riscv64/stack_layout.h + # A32 + backend/riscv64/a32_address_space.cpp + backend/riscv64/a32_address_space.h + backend/riscv64/a32_core.h + backend/riscv64/a32_interface.cpp + backend/riscv64/code_block.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - backend/riscv64/a32_address_space.cpp - backend/riscv64/a32_address_space.h - backend/riscv64/a32_core.h - backend/riscv64/a32_interface.cpp - backend/riscv64/code_block.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") - endif() + message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") endif() if (WIN32) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index bbdf925ff4..8e48fa3687 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -16,7 +16,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -163,21 +163,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index d4fe9a9cb7..2b50ad9ea3 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -331,22 +331,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index efa211618b..0bb9591411 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/backend/riscv64/stack_layout.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::RV64 { @@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index b116ec180e..382eb70f3f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -29,7 +29,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/location_descriptor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A32 { @@ -217,19 +217,7 @@ private: block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index ddd2327395..c65b582982 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -25,7 +25,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/a64.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A64 { @@ -275,21 +275,7 @@ private: const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block).entrypoint; } diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.cpp b/src/dynarmic/src/dynarmic/common/memory_pool.cpp deleted file mode 100644 index f41dd92af5..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/common/memory_pool.h" - -#include - -namespace Dynarmic::Common { - - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.h b/src/dynarmic/src/dynarmic/common/memory_pool.h deleted file mode 100644 index c99316e107..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -namespace Dynarmic::Common { - -/// @tparam object_size Byte-size of objects to construct -/// @tparam slab_size Number of objects to have per slab -template -class Pool { -public: - inline Pool() noexcept { - AllocateNewSlab(); - } - inline ~Pool() noexcept { - std::free(current_slab); - for (char* slab : slabs) { - std::free(slab); - } - } - - Pool(const Pool&) = delete; - Pool(Pool&&) = delete; - - Pool& operator=(const Pool&) = delete; - Pool& operator=(Pool&&) = delete; - - /// @brief Returns a pointer to an `object_size`-bytes block of memory. - [[nodiscard]] void* Alloc() noexcept { - if (remaining == 0) { - slabs.push_back(current_slab); - AllocateNewSlab(); - } - void* ret = static_cast(current_ptr); - current_ptr += object_size; - remaining--; - return ret; - } -private: - /// @brief Allocates a completely new memory slab. - /// Used when an entirely new slab is needed - /// due the current one running out of usable space. - void AllocateNewSlab() noexcept { - current_slab = static_cast(std::malloc(object_size * slab_size)); - current_ptr = current_slab; - remaining = slab_size; - } - - std::vector slabs; - char* current_slab = nullptr; - char* current_ptr = nullptr; - size_t remaining = 0; -}; - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index b00ab3cb20..a13a7ebfc9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -15,8 +15,6 @@ #include #include "dynarmic/common/assert.h" - -#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -27,8 +25,7 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL}, - instruction_alloc_pool{std::make_unique>()} + cond{Cond::AL} { } @@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); + IR::Inst* inst = new IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index e88dc92fc4..2f2d9ab6de 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -21,7 +21,6 @@ #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" #include "dynarmic/ir/dense_list.h" -#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -174,8 +173,6 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; - /// Memory pool for instruction list - std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp deleted file mode 100644 index a8ef7e2989..0000000000 --- a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/ir_emitter.h" - -#include - -#include "dynarmic/common/assert.h" -#include - -#include "dynarmic/ir/opcodes.h" - -namespace Dynarmic::IR { - - -} // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp deleted file mode 100644 index 9699f18345..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/interface/A32/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { - for (auto& inst : block) { - switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp deleted file mode 100644 index 499b38b120..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp +++ /dev/null @@ -1,382 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A32/a32_ir_emitter.h" -#include "dynarmic/frontend/A32/a32_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -namespace { - -void FlagsPass(IR::Block& block) { - using Iterator = std::reverse_iterator; - - struct FlagInfo { - bool set_not_required = false; - bool has_value_request = false; - Iterator value_request = {}; - }; - struct ValuelessFlagInfo { - bool set_not_required = false; - }; - ValuelessFlagInfo nzcvq; - ValuelessFlagInfo nzcv; - ValuelessFlagInfo nz; - FlagInfo c_flag; - FlagInfo ge; - - auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(value); - } - info.has_value_request = false; - - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_get = [](FlagInfo& info, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(IR::Value{&*inst}); - } - info.has_value_request = true; - info.value_request = inst; - }; - - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetCFlag: { - do_get(c_flag, inst); - break; - } - case IR::Opcode::A32SetCpsrNZCV: { - if (c_flag.has_value_request) { - ir.SetInsertionPointBefore(inst.base()); // base is one ahead - IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); - c_flag.value_request->ReplaceUsesWith(c); - c_flag.has_value_request = false; - break; // This case will be executed again because of the above - } - - do_set_valueless(nzcv, inst); - - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVRaw: { - if (c_flag.has_value_request) { - nzcv.set_not_required = false; - } - - do_set_valueless(nzcv, inst); - - nzcvq = {}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVQ: { - if (c_flag.has_value_request) { - nzcvq.set_not_required = false; - } - - do_set_valueless(nzcvq, inst); - - nzcv = {.set_not_required = true}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZ: { - do_set_valueless(nz, inst); - - nzcvq = {}; - nzcv = {}; - break; - } - case IR::Opcode::A32SetCpsrNZC: { - if (c_flag.has_value_request) { - c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); - c_flag.has_value_request = false; - } - - if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { - const auto nz_value = inst->GetArg(0); - - inst->Invalidate(); - - ir.SetInsertionPointBefore(inst.base()); - ir.SetCpsrNZ(IR::NZCV{nz_value}); - - nzcvq = {}; - nzcv = {}; - nz = {.set_not_required = true}; - break; - } - - if (nz.set_not_required && c_flag.set_not_required) { - inst->Invalidate(); - } else if (nz.set_not_required) { - inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); - } - nz.set_not_required = true; - c_flag.set_not_required = true; - - nzcv = {}; - nzcvq = {}; - break; - } - case IR::Opcode::A32SetGEFlags: { - do_set(ge, inst->GetArg(0), inst); - break; - } - case IR::Opcode::A32GetGEFlags: { - do_get(ge, inst); - break; - } - case IR::Opcode::A32SetGEFlagsCompressed: { - ge = {.set_not_required = true}; - break; - } - case IR::Opcode::A32OrQFlag: { - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcvq = {}; - nzcv = {}; - nz = {}; - c_flag = {}; - ge = {}; - } - break; - } - } - } -} - -void RegisterPass(IR::Block& block) { - using Iterator = IR::Block::iterator; - - struct RegInfo { - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array reg_info; - - const auto do_get = [](RegInfo& info, Iterator get_inst) { - if (info.register_value.IsEmpty()) { - info.register_value = IR::Value(&*get_inst); - return; - } - get_inst->ReplaceUsesWith(info.register_value); - }; - - const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { - if (info.last_set_instruction) { - (*info.last_set_instruction)->Invalidate(); - } - info = { - .register_value = value, - .last_set_instruction = set_inst, - }; - }; - - enum class ExtValueType { - Empty, - Single, - Double, - VectorDouble, - VectorQuad, - }; - struct ExtRegInfo { - ExtValueType value_type = {}; - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array ext_reg_info; - - const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { - if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = IR::Value(&*get_inst), - .last_set_instruction = std::nullopt, - }; - } - return; - } - get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); - }; - - const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { - if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - if (std::data(infos)[0].get().last_set_instruction) { - (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); - } - } - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = value, - .last_set_instruction = set_inst, - }; - } - }; - - // Location and version don't matter here. - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - ASSERT(reg != A32::Reg::PC); - const size_t reg_index = static_cast(reg); - do_get(reg_info[reg_index], inst); - break; - } - case IR::Opcode::A32SetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - if (reg == A32::Reg::PC) { - break; - } - const auto reg_index = static_cast(reg); - do_set(reg_info[reg_index], inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); - break; - } - case IR::Opcode::A32SetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - break; - } - case IR::Opcode::A32SetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst->GetArg(1), - inst); - break; - } - case IR::Opcode::A32GetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - do_ext_get(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_get(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst); - } - break; - } - case IR::Opcode::A32SetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - ir.SetInsertionPointAfter(inst); - const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); - do_ext_set(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - stored_value, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_set(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst->GetArg(1), - inst); - } - break; - } - default: { - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - ext_reg_info = {}; - } - break; - } - } - } -} - -} // namespace - -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { - FlagsPass(block); - RegisterPass(block); -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp deleted file mode 100644 index 79d9769520..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/frontend/A64/a64_ir_emitter.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { - if (conf.hook_data_cache_operations) { - return; - } - - for (auto& inst : block) { - if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { - continue; - } - - const auto op = static_cast(inst.GetArg(1).GetU64()); - if (op == A64::DataCacheOperation::ZeroByVA) { - A64::IREmitter ir{block}; - ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; - ir.SetInsertionPointBefore(&inst); - - size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); - IR::U64 addr{inst.GetArg(2)}; - - const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); - while (bytes >= 16) { - ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(16)); - bytes -= 16; - } - - while (bytes >= 8) { - ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(8)); - bytes -= 8; - } - - while (bytes >= 4) { - ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(4)); - bytes -= 4; - } - } - inst.Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp deleted file mode 100644 index 53e3b27176..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -void A64GetSetElimination(IR::Block& block) { - using Iterator = IR::Block::iterator; - - enum class TrackingType { - W, - X, - S, - D, - Q, - SP, - NZCV, - NZCVRaw, - }; - struct RegisterInfo { - IR::Value register_value; - TrackingType tracking_type; - bool set_instruction_present = false; - Iterator last_set_instruction; - }; - std::array reg_info; - std::array vec_info; - RegisterInfo sp_info; - RegisterInfo nzcv_info; - - const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { - if (info.set_instruction_present) { - info.last_set_instruction->Invalidate(); - block.Instructions().erase(info.last_set_instruction); - } - - info.register_value = value; - info.tracking_type = tracking_type; - info.set_instruction_present = true; - info.last_set_instruction = set_inst; - }; - - const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { - const auto do_nothing = [&] { - info = {}; - info.register_value = IR::Value(&*get_inst); - info.tracking_type = tracking_type; - }; - - if (info.register_value.IsEmpty()) { - do_nothing(); - return; - } - - if (info.tracking_type == tracking_type) { - get_inst->ReplaceUsesWith(info.register_value); - return; - } - - do_nothing(); - }; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A64GetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::W); - break; - } - case IR::Opcode::A64GetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::X); - break; - } - case IR::Opcode::A64GetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::S); - break; - } - case IR::Opcode::A64GetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::D); - break; - } - case IR::Opcode::A64GetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64GetSP: { - do_get(sp_info, inst, TrackingType::SP); - break; - } - case IR::Opcode::A64GetNZCVRaw: { - do_get(nzcv_info, inst, TrackingType::NZCVRaw); - break; - } - case IR::Opcode::A64SetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); - break; - } - case IR::Opcode::A64SetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); - break; - } - case IR::Opcode::A64SetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); - break; - } - case IR::Opcode::A64SetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); - break; - } - case IR::Opcode::A64SetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64SetSP: { - do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); - break; - } - case IR::Opcode::A64SetNZCV: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); - break; - } - case IR::Opcode::A64SetNZCVRaw: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcv_info = {}; - } - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - vec_info = {}; - sp_info = {}; - } - break; - } - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp deleted file mode 100644 index 25b7ef0ff1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_location_descriptor.h" -#include "dynarmic/frontend/A64/translate/a64_translate.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { - const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { - const auto instruction = cb->MemoryReadCode(location.PC()); - if (!instruction) - return false; - - IR::Block new_block{location}; - A64::TranslateSingleInstruction(new_block, location, *instruction); - - if (!new_block.Instructions().empty()) - return false; - - const IR::Terminal terminal = new_block.GetTerminal(); - if (auto term = boost::get(&terminal)) { - return term->next == location; - } - - return false; - }; - - IR::Terminal terminal = block.GetTerminal(); - auto term = boost::get(&terminal); - if (!term) - return; - - A64::LocationDescriptor location{term->next}; - size_t num_instructions = 1; - - while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { - num_instructions++; - } - - term->num_instructions = num_instructions; - block.ReplaceTerminal(terminal); - block.CycleCount() += num_instructions - 1; -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp deleted file mode 100644 index 86ebca87d2..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp +++ /dev/null @@ -1,559 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/assert.h" -#include -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/common/safe_ops.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -using Op = Dynarmic::IR::Opcode; - -namespace { - -// Tiny helper to avoid the need to store based off the opcode -// bit size all over the place within folding functions. -void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { - if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); - } else { - inst.ReplaceUsesWith(IR::Value{value}); - } -} - -IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; -} - -template -bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - - const bool is_lhs_immediate = lhs.IsImmediate(); - const bool is_rhs_immediate = rhs.IsImmediate(); - - if (is_lhs_immediate && is_rhs_immediate) { - const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); - ReplaceUsesWith(inst, is_32_bit, result); - return false; - } - - if (is_lhs_immediate && !is_rhs_immediate) { - const IR::Inst* rhs_inst = rhs.GetInstRecursive(); - if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, rhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } else { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - } - } - - if (!is_lhs_immediate && is_rhs_immediate) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } - } - - return true; -} - -void FoldAdd(IR::Inst& inst, bool is_32_bit) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - if (lhs.IsImmediate() && !rhs.IsImmediate()) { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - FoldAdd(inst, is_32_bit); - return; - } - - if (inst.HasAssociatedPseudoOperation()) { - return; - } - - if (!lhs.IsImmediate() && rhs.IsImmediate()) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { - const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); - if (combined == 0) { - inst.ReplaceUsesWith(lhs_inst->GetArg(0)); - return; - } - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - return; - } - if (rhs.IsZero() && carry.IsZero()) { - inst.ReplaceUsesWith(lhs); - return; - } - } - - if (inst.AreAllArgsImmediates()) { - const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); - return; - } -} - -/// Folds AND operations based on the following: -/// -/// 1. imm_x & imm_y -> result -/// 2. x & 0 -> 0 -/// 3. 0 & y -> 0 -/// 4. x & y -> y (where x has all bits set to 1) -/// 5. x & y -> x (where y has all bits set to 1) -/// -void FoldAND(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.HasAllBitsSet()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -/// Folds byte reversal opcodes based on the following: -/// -/// 1. imm -> swap(imm) -/// -void FoldByteReverse(IR::Inst& inst, Op op) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - if (op == Op::ByteReverseWord) { - const u32 result = mcl::bit::swap_bytes_32(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else if (op == Op::ByteReverseHalf) { - const u16 result = mcl::bit::swap_bytes_16(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else { - const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); - inst.ReplaceUsesWith(IR::Value{result}); - } -} - -/// Folds division operations based on the following: -/// -/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) -/// 2. imm_x / imm_y -> result -/// 3. x / 1 -> x -/// -void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { - const auto rhs = inst.GetArg(1); - - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - return; - } - - const auto lhs = inst.GetArg(0); - if (lhs.IsImmediate() && rhs.IsImmediate()) { - if (is_signed) { - const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); - ReplaceUsesWith(inst, is_32_bit, static_cast(result)); - } else { - const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); - } - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(IR::Value{lhs}); - } -} - -// Folds EOR operations based on the following: -// -// 1. imm_x ^ imm_y -> result -// 2. x ^ 0 -> x -// 3. 0 ^ y -> y -// -void FoldEOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -void FoldLeastSignificantByte(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantHalf(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldMostSignificantBit(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); -} - -void FoldMostSignificantWord(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - if (carry_inst) { - carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); - } - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); -} - -// Folds multiplication operations based on the following: -// -// 1. imm_x * imm_y -> result -// 2. x * 0 -> 0 -// 3. 0 * y -> 0 -// 4. x * 1 -> x -// 5. 1 * y -> y -// -void FoldMultiply(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -// Folds NOT operations if the contained value is an immediate. -void FoldNOT(IR::Inst& inst, bool is_32_bit) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - const u64 result = ~operand.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -// Folds OR operations based on the following: -// -// 1. imm_x | imm_y -> result -// 2. x | 0 -> x -// 3. 0 | y -> y -// -void FoldOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -bool FoldShifts(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - // The 32-bit variants can contain 3 arguments, while the - // 64-bit variants only contain 2. - if (inst.NumArgs() == 3 && !carry_inst) { - inst.SetArg(2, IR::Value(false)); - } - - const auto shift_amount = inst.GetArg(1); - - if (shift_amount.IsZero()) { - if (carry_inst) { - carry_inst->ReplaceUsesWith(inst.GetArg(2)); - } - inst.ReplaceUsesWith(inst.GetArg(0)); - return false; - } - - if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { - inst.SetArg(2, IR::Value(false)); - } - - if (!inst.AreAllArgsImmediates() || carry_inst) { - return false; - } - - return true; -} - -void FoldSignExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSignExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSub(IR::Inst& inst, bool is_32_bit) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -void FoldZeroExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldZeroExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{value}); -} -} // Anonymous namespace - -void ConstantPropagation(IR::Block& block) { - for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - - switch (opcode) { - case Op::LeastSignificantWord: - FoldLeastSignificantWord(inst); - break; - case Op::MostSignificantWord: - FoldMostSignificantWord(inst); - break; - case Op::LeastSignificantHalf: - FoldLeastSignificantHalf(inst); - break; - case Op::LeastSignificantByte: - FoldLeastSignificantByte(inst); - break; - case Op::MostSignificantBit: - FoldMostSignificantBit(inst); - break; - case Op::IsZero32: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); - } - break; - case Op::IsZero64: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); - } - break; - case Op::LogicalShiftLeft32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeft64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeftMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftLeftMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::LogicalShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::ArithmeticShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::ArithmeticShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::RotateRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); - } - break; - case Op::RotateRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); - } - break; - case Op::Add32: - case Op::Add64: - FoldAdd(inst, opcode == Op::Add32); - break; - case Op::Sub32: - case Op::Sub64: - FoldSub(inst, opcode == Op::Sub32); - break; - case Op::Mul32: - case Op::Mul64: - FoldMultiply(inst, opcode == Op::Mul32); - break; - case Op::SignedDiv32: - case Op::SignedDiv64: - FoldDivide(inst, opcode == Op::SignedDiv32, true); - break; - case Op::UnsignedDiv32: - case Op::UnsignedDiv64: - FoldDivide(inst, opcode == Op::UnsignedDiv32, false); - break; - case Op::And32: - case Op::And64: - FoldAND(inst, opcode == Op::And32); - break; - case Op::Eor32: - case Op::Eor64: - FoldEOR(inst, opcode == Op::Eor32); - break; - case Op::Or32: - case Op::Or64: - FoldOR(inst, opcode == Op::Or32); - break; - case Op::Not32: - case Op::Not64: - FoldNOT(inst, opcode == Op::Not32); - break; - case Op::SignExtendByteToWord: - case Op::SignExtendHalfToWord: - FoldSignExtendXToWord(inst); - break; - case Op::SignExtendByteToLong: - case Op::SignExtendHalfToLong: - case Op::SignExtendWordToLong: - FoldSignExtendXToLong(inst); - break; - case Op::ZeroExtendByteToWord: - case Op::ZeroExtendHalfToWord: - FoldZeroExtendXToWord(inst); - break; - case Op::ZeroExtendByteToLong: - case Op::ZeroExtendHalfToLong: - case Op::ZeroExtendWordToLong: - FoldZeroExtendXToLong(inst); - break; - case Op::ByteReverseWord: - case Op::ByteReverseHalf: - case Op::ByteReverseDual: - FoldByteReverse(inst, opcode); - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp deleted file mode 100644 index bda9f6efd1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void DeadCodeElimination(IR::Block& block) { - // We iterate over the instructions in reverse order. - // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp deleted file mode 100644 index e87fcc335b..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void IdentityRemovalPass(IR::Block& block) { - std::vector to_invalidate; - - auto iter = block.begin(); - while (iter != block.end()) { - IR::Inst& inst = *iter; - - const size_t num_args = inst.NumArgs(); - for (size_t i = 0; i < num_args; i++) { - while (true) { - IR::Value arg = inst.GetArg(i); - if (!arg.IsIdentity()) - break; - inst.SetArg(i, arg.GetInst()->GetArg(0)); - } - } - - if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { - iter = block.Instructions().erase(inst); - to_invalidate.push_back(&inst); - } else { - ++iter; - } - } - - for (IR::Inst* inst : to_invalidate) { - inst->Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h b/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h deleted file mode 100644 index 5eb1a55100..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h +++ /dev/null @@ -1,127 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization::IRMatcher { - -struct CaptureValue { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value); - } -}; - -struct CaptureInst { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return std::tuple(value.GetInstRecursive()); - } -}; - -struct CaptureUImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsU64()); - } -}; - -struct CaptureSImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsS64()); - } -}; - -template -struct UImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsU64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct SImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsS64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct Inst { -public: - using ReturnType = mp::concat, typename Args::ReturnType...>; - - static std::optional Match(const IR::Inst& inst) { - if (inst.GetOpcode() != Opcode) - return std::nullopt; - if (inst.HasAssociatedPseudoOperation()) - return std::nullopt; - return MatchArgs<0>(inst); - } - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return Match(*value.GetInstRecursive()); - } - -private: - template - static auto MatchArgs(const IR::Inst& inst) -> std::optional>, std::tuple<>>>> { - if constexpr (I >= sizeof...(Args)) { - return std::tuple(); - } else { - using Arg = mp::get>; - - if (const auto arg = Arg::Match(inst.GetArg(I))) { - if (const auto rest = MatchArgs(inst)) { - return std::tuple_cat(*arg, *rest); - } - } - - return std::nullopt; - } - } -}; - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t); -} - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t); -} - -} // namespace Dynarmic::Optimization::IRMatcher diff --git a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp deleted file mode 100644 index a766bdc83f..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2023 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" - -namespace Dynarmic::Optimization { - -void NamingPass(IR::Block& block) { - unsigned name = 1; - for (auto& inst : block) { - inst.SetName(name++); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/passes.h b/src/dynarmic/src/dynarmic/ir/opt/passes.h deleted file mode 100644 index 703145b556..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/passes.h +++ /dev/null @@ -1,47 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -namespace Dynarmic::A32 { -struct UserCallbacks; -} - -namespace Dynarmic::A64 { -struct UserCallbacks; -struct UserConfig; -} // namespace Dynarmic::A64 - -namespace Dynarmic::IR { -class Block; -} - -namespace Dynarmic::Optimization { - -struct PolyfillOptions { - bool sha256 = false; - bool vector_multiply_widen = false; - - bool operator==(const PolyfillOptions&) const = default; -}; - -struct A32GetSetEliminationOptions { - bool convert_nzc_to_nz = false; - bool convert_nz_to_nzc = false; -}; - -void PolyfillPass(IR::Block& block, const PolyfillOptions& opt); -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb); -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt); -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf); -void A64GetSetElimination(IR::Block& block); -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb); -void ConstantPropagation(IR::Block& block); -void DeadCodeElimination(IR::Block& block); -void IdentityRemovalPass(IR::Block& block); -void VerificationPass(const IR::Block& block); -void NamingPass(IR::Block& block); - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp deleted file mode 100644 index 1aa3aea91e..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2022 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -namespace { - -void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - - const IR::U128 t = ir.VectorExtract(x, y, 32); - - IR::U128 result = ir.ZeroVector(); - for (size_t i = 0; i < 4; i++) { - const IR::U32 modified_element = [&] { - const IR::U32 element = ir.VectorGetElement(32, t, i); - const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); - const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); - const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); - }(); - - result = ir.VectorSetElement(32, result, i, modified_element); - } - result = ir.VectorAdd(32, result, x); - - inst.ReplaceUsesWith(result); -} - -void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 z = (IR::U128)inst.GetArg(2); - - const IR::U128 T0 = ir.VectorExtract(y, z, 32); - - const IR::U128 lower_half = [&] { - const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); - const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); - return ir.VectorZeroUpper(tmp5); - }(); - - const IR::U64 upper_half = [&] { - const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - - // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] - const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); - const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); - - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); - return ir.VectorGetElement(64, tmp5, 0); - }(); - - const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); - - inst.ReplaceUsesWith(result); -} - -IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Eor(ir.And(ir.Eor(y, z), x), z); -} - -IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); -} - -IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 x = (IR::U128)inst.GetArg(0); - IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 w = (IR::U128)inst.GetArg(2); - const bool part1 = inst.GetArg(3).GetU1(); - - for (size_t i = 0; i < 4; i++) { - const IR::U32 low_x = ir.VectorGetElement(32, x, 0); - const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); - const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); - const IR::U32 high_x = ir.VectorGetElement(32, x, 3); - - const IR::U32 low_y = ir.VectorGetElement(32, y, 0); - const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); - const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); - const IR::U32 high_y = ir.VectorGetElement(32, y, 3); - - const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); - const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); - - const IR::U32 t = [&] { - const IR::U32 w_element = ir.VectorGetElement(32, w, i); - const IR::U32 sig = SHAhashSIGMA1(ir, low_y); - - return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); - }(); - - const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); - const IR::U32 new_low_y = ir.Add(t, high_x); - - // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] - const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); - const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); - - x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); - y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); - } - - inst.ReplaceUsesWith(part1 ? x : y); -} - -template -void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 n = (IR::U128)inst.GetArg(0); - IR::U128 m = (IR::U128)inst.GetArg(1); - - const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); - const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); - - const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); - - inst.ReplaceUsesWith(result); -} - -} // namespace - -void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { - if (polyfill == PolyfillOptions{}) { - return; - } - - IR::IREmitter ir{block}; - - for (auto& inst : block) { - ir.SetInsertionPointBefore(&inst); - - switch (inst.GetOpcode()) { - case IR::Opcode::SHA256MessageSchedule0: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule0(ir, inst); - } - break; - case IR::Opcode::SHA256MessageSchedule1: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule1(ir, inst); - } - break; - case IR::Opcode::SHA256Hash: - if (polyfill.sha256) { - PolyfillSHA256Hash(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, false>(ir, inst); - } - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp deleted file mode 100644 index c6c2cff231..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/type.h" - -namespace Dynarmic::Optimization { - -void VerificationPass(const IR::Block& block) { - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const IR::Type t1 = inst.GetArg(i).GetType(); - const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); - if (!IR::AreTypesCompatible(t1, t2)) { - std::puts(IR::DumpBlock(block).c_str()); - ASSERT_FALSE("above block failed validation"); - } - } - } - - ankerl::unordered_dense::map actual_uses; - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const auto arg = inst.GetArg(i); - if (!arg.IsImmediate()) { - actual_uses[arg.GetInst()]++; - } - } - } - - for (const auto& pair : actual_uses) { - ASSERT(pair.first->UseCount() == pair.second); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp new file mode 100644 index 0000000000..383b915839 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -0,0 +1,1502 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include + +#include +#include "boost/container/small_vector.hpp" +#include "dynarmic/frontend/A32/a32_ir_emitter.h" +#include "dynarmic/frontend/A32/a32_location_descriptor.h" +#include "dynarmic/frontend/A32/a32_types.h" +#include "dynarmic/frontend/A64/a64_ir_emitter.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/A32/config.h" +#include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/optimization_flags.h" +#include "dynarmic/common/safe_ops.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/microinstruction.h" +#include "dynarmic/ir/opcodes.h" +#include "dynarmic/ir/opt_passes.h" +#include "dynarmic/ir/type.h" +#include "mcl/bit/swap.hpp" +#include "mcl/bit/rotate.hpp" +#include "mcl/iterator/reverse.hpp" + +namespace Dynarmic::Optimization { + +static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { + for (auto& inst : block) { + switch (inst.GetOpcode()) { + case IR::Opcode::A32ReadMemory8: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory16: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory32: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory64: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + default: + break; + } + } +} + +static void FlagsPass(IR::Block& block) { + using Iterator = std::reverse_iterator; + + struct FlagInfo { + bool set_not_required = false; + bool has_value_request = false; + Iterator value_request = {}; + }; + struct ValuelessFlagInfo { + bool set_not_required = false; + }; + ValuelessFlagInfo nzcvq; + ValuelessFlagInfo nzcv; + ValuelessFlagInfo nz; + FlagInfo c_flag; + FlagInfo ge; + + auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(value); + } + info.has_value_request = false; + + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_get = [](FlagInfo& info, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(IR::Value{&*inst}); + } + info.has_value_request = true; + info.value_request = inst; + }; + + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetCFlag: { + do_get(c_flag, inst); + break; + } + case IR::Opcode::A32SetCpsrNZCV: { + if (c_flag.has_value_request) { + ir.SetInsertionPointBefore(inst.base()); // base is one ahead + IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); + c_flag.value_request->ReplaceUsesWith(c); + c_flag.has_value_request = false; + break; // This case will be executed again because of the above + } + + do_set_valueless(nzcv, inst); + + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVRaw: { + if (c_flag.has_value_request) { + nzcv.set_not_required = false; + } + + do_set_valueless(nzcv, inst); + + nzcvq = {}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVQ: { + if (c_flag.has_value_request) { + nzcvq.set_not_required = false; + } + + do_set_valueless(nzcvq, inst); + + nzcv = {.set_not_required = true}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZ: { + do_set_valueless(nz, inst); + + nzcvq = {}; + nzcv = {}; + break; + } + case IR::Opcode::A32SetCpsrNZC: { + if (c_flag.has_value_request) { + c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); + c_flag.has_value_request = false; + } + + if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { + const auto nz_value = inst->GetArg(0); + + inst->Invalidate(); + + ir.SetInsertionPointBefore(inst.base()); + ir.SetCpsrNZ(IR::NZCV{nz_value}); + + nzcvq = {}; + nzcv = {}; + nz = {.set_not_required = true}; + break; + } + + if (nz.set_not_required && c_flag.set_not_required) { + inst->Invalidate(); + } else if (nz.set_not_required) { + inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); + } + nz.set_not_required = true; + c_flag.set_not_required = true; + + nzcv = {}; + nzcvq = {}; + break; + } + case IR::Opcode::A32SetGEFlags: { + do_set(ge, inst->GetArg(0), inst); + break; + } + case IR::Opcode::A32GetGEFlags: { + do_get(ge, inst); + break; + } + case IR::Opcode::A32SetGEFlagsCompressed: { + ge = {.set_not_required = true}; + break; + } + case IR::Opcode::A32OrQFlag: { + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcvq = {}; + nzcv = {}; + nz = {}; + c_flag = {}; + ge = {}; + } + break; + } + } + } +} + +static void RegisterPass(IR::Block& block) { + using Iterator = IR::Block::iterator; + + struct RegInfo { + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array reg_info; + + const auto do_get = [](RegInfo& info, Iterator get_inst) { + if (info.register_value.IsEmpty()) { + info.register_value = IR::Value(&*get_inst); + return; + } + get_inst->ReplaceUsesWith(info.register_value); + }; + + const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { + if (info.last_set_instruction) { + (*info.last_set_instruction)->Invalidate(); + } + info = { + .register_value = value, + .last_set_instruction = set_inst, + }; + }; + + enum class ExtValueType { + Empty, + Single, + Double, + VectorDouble, + VectorQuad, + }; + struct ExtRegInfo { + ExtValueType value_type = {}; + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array ext_reg_info; + + const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { + if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = IR::Value(&*get_inst), + .last_set_instruction = std::nullopt, + }; + } + return; + } + get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); + }; + + const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { + if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + if (std::data(infos)[0].get().last_set_instruction) { + (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); + } + } + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = value, + .last_set_instruction = set_inst, + }; + } + }; + + // Location and version don't matter here. + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + ASSERT(reg != A32::Reg::PC); + const size_t reg_index = size_t(reg); + do_get(reg_info[reg_index], inst); + break; + } + case IR::Opcode::A32SetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + if (reg == A32::Reg::PC) { + break; + } + const auto reg_index = size_t(reg); + do_set(reg_info[reg_index], inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); + break; + } + case IR::Opcode::A32SetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + break; + } + case IR::Opcode::A32SetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst->GetArg(1), + inst); + break; + } + case IR::Opcode::A32GetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + do_ext_get(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_get(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst); + } + break; + } + case IR::Opcode::A32SetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + ir.SetInsertionPointAfter(inst); + const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); + do_ext_set(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + stored_value, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_set(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst->GetArg(1), + inst); + } + break; + } + default: { + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + ext_reg_info = {}; + } + break; + } + } + } +} + +struct A32GetSetEliminationOptions { + bool convert_nzc_to_nz = false; + bool convert_nz_to_nzc = false; +}; + +static void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { + FlagsPass(block); + RegisterPass(block); +} + +static void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { + if (conf.hook_data_cache_operations) { + return; + } + + for (auto& inst : block) { + if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { + continue; + } + + const auto op = static_cast(inst.GetArg(1).GetU64()); + if (op == A64::DataCacheOperation::ZeroByVA) { + A64::IREmitter ir{block}; + ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; + ir.SetInsertionPointBefore(&inst); + + size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); + IR::U64 addr{inst.GetArg(2)}; + + const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); + while (bytes >= 16) { + ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(16)); + bytes -= 16; + } + + while (bytes >= 8) { + ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(8)); + bytes -= 8; + } + + while (bytes >= 4) { + ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(4)); + bytes -= 4; + } + } + inst.Invalidate(); + } +} + +static void A64GetSetElimination(IR::Block& block) { + using Iterator = IR::Block::iterator; + + enum class TrackingType { + W, + X, + S, + D, + Q, + SP, + NZCV, + NZCVRaw, + }; + struct RegisterInfo { + IR::Value register_value; + TrackingType tracking_type; + bool set_instruction_present = false; + Iterator last_set_instruction; + }; + std::array reg_info; + std::array vec_info; + RegisterInfo sp_info; + RegisterInfo nzcv_info; + + const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { + if (info.set_instruction_present) { + info.last_set_instruction->Invalidate(); + block.Instructions().erase(info.last_set_instruction); + } + + info.register_value = value; + info.tracking_type = tracking_type; + info.set_instruction_present = true; + info.last_set_instruction = set_inst; + }; + + const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { + const auto do_nothing = [&] { + info = {}; + info.register_value = IR::Value(&*get_inst); + info.tracking_type = tracking_type; + }; + + if (info.register_value.IsEmpty()) { + do_nothing(); + return; + } + + if (info.tracking_type == tracking_type) { + get_inst->ReplaceUsesWith(info.register_value); + return; + } + + do_nothing(); + }; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A64GetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::W); + break; + } + case IR::Opcode::A64GetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::X); + break; + } + case IR::Opcode::A64GetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::S); + break; + } + case IR::Opcode::A64GetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::D); + break; + } + case IR::Opcode::A64GetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64GetSP: { + do_get(sp_info, inst, TrackingType::SP); + break; + } + case IR::Opcode::A64GetNZCVRaw: { + do_get(nzcv_info, inst, TrackingType::NZCVRaw); + break; + } + case IR::Opcode::A64SetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); + break; + } + case IR::Opcode::A64SetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); + break; + } + case IR::Opcode::A64SetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); + break; + } + case IR::Opcode::A64SetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); + break; + } + case IR::Opcode::A64SetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64SetSP: { + do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); + break; + } + case IR::Opcode::A64SetNZCV: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); + break; + } + case IR::Opcode::A64SetNZCVRaw: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcv_info = {}; + } + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + vec_info = {}; + sp_info = {}; + } + break; + } + } + } +} + +static void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { + const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { + const auto instruction = cb->MemoryReadCode(location.PC()); + if (!instruction) + return false; + + IR::Block new_block{location}; + A64::TranslateSingleInstruction(new_block, location, *instruction); + + if (!new_block.Instructions().empty()) + return false; + + const IR::Terminal terminal = new_block.GetTerminal(); + if (auto term = boost::get(&terminal)) { + return term->next == location; + } + + return false; + }; + + IR::Terminal terminal = block.GetTerminal(); + auto term = boost::get(&terminal); + if (!term) + return; + + A64::LocationDescriptor location{term->next}; + size_t num_instructions = 1; + + while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { + num_instructions++; + } + + term->num_instructions = num_instructions; + block.ReplaceTerminal(terminal); + block.CycleCount() += num_instructions - 1; +} + +using Op = Dynarmic::IR::Opcode; + +// Tiny helper to avoid the need to store based off the opcode +// bit size all over the place within folding functions. +static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { + if (is_32_bit) { + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + } else { + inst.ReplaceUsesWith(IR::Value{value}); + } +} + +static IR::Value Value(bool is_32_bit, u64 value) { + return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; +} + +template +static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + + const bool is_lhs_immediate = lhs.IsImmediate(); + const bool is_rhs_immediate = rhs.IsImmediate(); + + if (is_lhs_immediate && is_rhs_immediate) { + const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); + ReplaceUsesWith(inst, is_32_bit, result); + return false; + } + + if (is_lhs_immediate && !is_rhs_immediate) { + const IR::Inst* rhs_inst = rhs.GetInstRecursive(); + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, rhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } + } + + return true; +} + +static void FoldAdd(IR::Inst& inst, bool is_32_bit) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + if (lhs.IsImmediate() && !rhs.IsImmediate()) { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + FoldAdd(inst, is_32_bit); + return; + } + + if (inst.HasAssociatedPseudoOperation()) { + return; + } + + if (!lhs.IsImmediate() && rhs.IsImmediate()) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { + const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); + if (combined == 0) { + inst.ReplaceUsesWith(lhs_inst->GetArg(0)); + return; + } + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + return; + } + if (rhs.IsZero() && carry.IsZero()) { + inst.ReplaceUsesWith(lhs); + return; + } + } + + if (inst.AreAllArgsImmediates()) { + const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); + return; + } +} + +/// Folds AND operations based on the following: +/// +/// 1. imm_x & imm_y -> result +/// 2. x & 0 -> 0 +/// 3. 0 & y -> 0 +/// 4. x & y -> y (where x has all bits set to 1) +/// 5. x & y -> x (where y has all bits set to 1) +/// +static void FoldAND(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.HasAllBitsSet()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +/// Folds byte reversal opcodes based on the following: +/// +/// 1. imm -> swap(imm) +/// +static void FoldByteReverse(IR::Inst& inst, Op op) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + if (op == Op::ByteReverseWord) { + const u32 result = mcl::bit::swap_bytes_32(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else if (op == Op::ByteReverseHalf) { + const u16 result = mcl::bit::swap_bytes_16(u16(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } +} + +/// Folds division operations based on the following: +/// +/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) +/// 2. imm_x / imm_y -> result +/// 3. x / 1 -> x +/// +static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { + const auto rhs = inst.GetArg(1); + + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + return; + } + + const auto lhs = inst.GetArg(0); + if (lhs.IsImmediate() && rhs.IsImmediate()) { + if (is_signed) { + const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); + ReplaceUsesWith(inst, is_32_bit, static_cast(result)); + } else { + const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); + } + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(IR::Value{lhs}); + } +} + +// Folds EOR operations based on the following: +// +// 1. imm_x ^ imm_y -> result +// 2. x ^ 0 -> x +// 3. 0 ^ y -> y +// +static void FoldEOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static void FoldLeastSignificantByte(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantHalf(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldMostSignificantBit(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); +} + +static void FoldMostSignificantWord(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + if (carry_inst) { + carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); + } + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); +} + +// Folds multiplication operations based on the following: +// +// 1. imm_x * imm_y -> result +// 2. x * 0 -> 0 +// 3. 0 * y -> 0 +// 4. x * 1 -> x +// 5. 1 * y -> y +// +static void FoldMultiply(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +// Folds NOT operations if the contained value is an immediate. +static void FoldNOT(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + const u64 result = ~operand.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +// Folds OR operations based on the following: +// +// 1. imm_x | imm_y -> result +// 2. x | 0 -> x +// 3. 0 | y -> y +// +static void FoldOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static bool FoldShifts(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + // The 32-bit variants can contain 3 arguments, while the + // 64-bit variants only contain 2. + if (inst.NumArgs() == 3 && !carry_inst) { + inst.SetArg(2, IR::Value(false)); + } + + const auto shift_amount = inst.GetArg(1); + + if (shift_amount.IsZero()) { + if (carry_inst) { + carry_inst->ReplaceUsesWith(inst.GetArg(2)); + } + inst.ReplaceUsesWith(inst.GetArg(0)); + return false; + } + + if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { + inst.SetArg(2, IR::Value(false)); + } + + if (!inst.AreAllArgsImmediates() || carry_inst) { + return false; + } + + return true; +} + +static void FoldSignExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSignExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSub(IR::Inst& inst, bool is_32_bit) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return; + } + + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +static void FoldZeroExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldZeroExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{value}); +} + +static void ConstantPropagation(IR::Block& block) { + for (auto& inst : block) { + const auto opcode = inst.GetOpcode(); + + switch (opcode) { + case Op::LeastSignificantWord: + FoldLeastSignificantWord(inst); + break; + case Op::MostSignificantWord: + FoldMostSignificantWord(inst); + break; + case Op::LeastSignificantHalf: + FoldLeastSignificantHalf(inst); + break; + case Op::LeastSignificantByte: + FoldLeastSignificantByte(inst); + break; + case Op::MostSignificantBit: + FoldMostSignificantBit(inst); + break; + case Op::IsZero32: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); + } + break; + case Op::IsZero64: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); + } + break; + case Op::LogicalShiftLeft32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeft64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeftMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftLeftMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::LogicalShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::ArithmeticShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::ArithmeticShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::RotateRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); + } + break; + case Op::RotateRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); + } + break; + case Op::Add32: + case Op::Add64: + FoldAdd(inst, opcode == Op::Add32); + break; + case Op::Sub32: + case Op::Sub64: + FoldSub(inst, opcode == Op::Sub32); + break; + case Op::Mul32: + case Op::Mul64: + FoldMultiply(inst, opcode == Op::Mul32); + break; + case Op::SignedDiv32: + case Op::SignedDiv64: + FoldDivide(inst, opcode == Op::SignedDiv32, true); + break; + case Op::UnsignedDiv32: + case Op::UnsignedDiv64: + FoldDivide(inst, opcode == Op::UnsignedDiv32, false); + break; + case Op::And32: + case Op::And64: + FoldAND(inst, opcode == Op::And32); + break; + case Op::Eor32: + case Op::Eor64: + FoldEOR(inst, opcode == Op::Eor32); + break; + case Op::Or32: + case Op::Or64: + FoldOR(inst, opcode == Op::Or32); + break; + case Op::Not32: + case Op::Not64: + FoldNOT(inst, opcode == Op::Not32); + break; + case Op::SignExtendByteToWord: + case Op::SignExtendHalfToWord: + FoldSignExtendXToWord(inst); + break; + case Op::SignExtendByteToLong: + case Op::SignExtendHalfToLong: + case Op::SignExtendWordToLong: + FoldSignExtendXToLong(inst); + break; + case Op::ZeroExtendByteToWord: + case Op::ZeroExtendHalfToWord: + FoldZeroExtendXToWord(inst); + break; + case Op::ZeroExtendByteToLong: + case Op::ZeroExtendHalfToLong: + case Op::ZeroExtendWordToLong: + FoldZeroExtendXToLong(inst); + break; + case Op::ByteReverseWord: + case Op::ByteReverseHalf: + case Op::ByteReverseDual: + FoldByteReverse(inst, opcode); + break; + default: + break; + } + } +} + +static void DeadCodeElimination(IR::Block& block) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (auto& inst : mcl::iterator::reverse(block)) { + if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { + inst.Invalidate(); + } + } +} + +static void IdentityRemovalPass(IR::Block& block) { + boost::container::small_vector to_invalidate; + + auto iter = block.begin(); + while (iter != block.end()) { + IR::Inst& inst = *iter; + + const size_t num_args = inst.NumArgs(); + for (size_t i = 0; i < num_args; i++) { + while (true) { + IR::Value arg = inst.GetArg(i); + if (!arg.IsIdentity()) + break; + inst.SetArg(i, arg.GetInst()->GetArg(0)); + } + } + + if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { + iter = block.Instructions().erase(inst); + to_invalidate.push_back(&inst); + } else { + ++iter; + } + } + for (IR::Inst* inst : to_invalidate) { + inst->Invalidate(); + } +} + +static void NamingPass(IR::Block& block) { + u32 name = 1; + for (auto& inst : block) + inst.SetName(name++); +} + +static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + + const IR::U128 t = ir.VectorExtract(x, y, 32); + + IR::U128 result = ir.ZeroVector(); + for (size_t i = 0; i < 4; i++) { + const IR::U32 modified_element = [&] { + const IR::U32 element = ir.VectorGetElement(32, t, i); + const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); + const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); + const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); + }(); + + result = ir.VectorSetElement(32, result, i, modified_element); + } + result = ir.VectorAdd(32, result, x); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 z = (IR::U128)inst.GetArg(2); + + const IR::U128 T0 = ir.VectorExtract(y, z, 32); + + const IR::U128 lower_half = [&] { + const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); + const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); + return ir.VectorZeroUpper(tmp5); + }(); + + const IR::U64 upper_half = [&] { + const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + + // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] + const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); + const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); + + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); + return ir.VectorGetElement(64, tmp5, 0); + }(); + + const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); + + inst.ReplaceUsesWith(result); +} + +static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Eor(ir.And(ir.Eor(y, z), x), z); +} + +static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); +} + +static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 x = (IR::U128)inst.GetArg(0); + IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 w = (IR::U128)inst.GetArg(2); + const bool part1 = inst.GetArg(3).GetU1(); + + for (size_t i = 0; i < 4; i++) { + const IR::U32 low_x = ir.VectorGetElement(32, x, 0); + const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); + const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); + const IR::U32 high_x = ir.VectorGetElement(32, x, 3); + + const IR::U32 low_y = ir.VectorGetElement(32, y, 0); + const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); + const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); + const IR::U32 high_y = ir.VectorGetElement(32, y, 3); + + const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); + const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); + + const IR::U32 t = [&] { + const IR::U32 w_element = ir.VectorGetElement(32, w, i); + const IR::U32 sig = SHAhashSIGMA1(ir, low_y); + + return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); + }(); + + const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); + const IR::U32 new_low_y = ir.Add(t, high_x); + + // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] + const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); + const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); + + x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); + y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); + } + + inst.ReplaceUsesWith(part1 ? x : y); +} + +template +static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 n = (IR::U128)inst.GetArg(0); + IR::U128 m = (IR::U128)inst.GetArg(1); + + const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); + const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); + + const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { + if (polyfill == PolyfillOptions{}) { + return; + } + + IR::IREmitter ir{block}; + + for (auto& inst : block) { + ir.SetInsertionPointBefore(&inst); + + switch (inst.GetOpcode()) { + case IR::Opcode::SHA256MessageSchedule0: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule0(ir, inst); + } + break; + case IR::Opcode::SHA256MessageSchedule1: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule1(ir, inst); + } + break; + case IR::Opcode::SHA256Hash: + if (polyfill.sha256) { + PolyfillSHA256Hash(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, false>(ir, inst); + } + break; + default: + break; + } + } +} + +static void VerificationPass(const IR::Block& block) { + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) { + const IR::Type t1 = inst.GetArg(i).GetType(); + const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); + ASSERT_MSG(IR::AreTypesCompatible(t1, t2), "Block failed:\n{}", IR::DumpBlock(block)); + } + } + ankerl::unordered_dense::map actual_uses; + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) + if (IR::Value const arg = inst.GetArg(i); !arg.IsImmediate()) + actual_uses[arg.GetInst()]++; + } + for (auto const& pair : actual_uses) + ASSERT(pair.first->UseCount() == pair.second); +} + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) [[likely]] { + Optimization::A32GetSetElimination(block, {.convert_nzc_to_nz = true}); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + Optimization::IdentityRemovalPass(block); + Optimization::VerificationPass(block); +} + +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::A64CallbackConfigPass(block, conf); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) [[likely]] { + Optimization::A64GetSetElimination(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { + Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); + } + Optimization::VerificationPass(block); +} + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h new file mode 100644 index 0000000000..1963fad0a0 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -0,0 +1,34 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +namespace Dynarmic::A32 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::A64 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::IR { +class Block; +} + +namespace Dynarmic::Optimization { + +struct PolyfillOptions { + bool sha256 = false; + bool vector_multiply_widen = false; + + bool operator==(const PolyfillOptions&) const = default; +}; + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index 4d14141bbf..ad01e5718b 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -29,7 +29,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; @@ -179,13 +179,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 85d86c7966..29a8f23478 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -6,33 +6,24 @@ add_executable(dynarmic_tests fp/mantissa_util_tests.cpp fp/unpacked_tests.cpp rand_int.h + # A32 + A32/test_arm_disassembler.cpp + A32/test_arm_instructions.cpp + A32/test_coprocessor.cpp + A32/test_svc.cpp + A32/test_thumb_instructions.cpp + A32/testenv.h + decoder_tests.cpp + # A64 + A64/a64.cpp + A64/fibonacci.cpp + A64/fp_min_max.cpp + A64/misaligned_page_table.cpp + A64/test_invalidation.cpp + A64/real_world.cpp + A64/testenv.h ) - -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/test_arm_disassembler.cpp - A32/test_arm_instructions.cpp - A32/test_coprocessor.cpp - A32/test_svc.cpp - A32/test_thumb_instructions.cpp - A32/testenv.h - decoder_tests.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) - - target_sources(dynarmic_tests PRIVATE - A64/a64.cpp - A64/fibonacci.cpp - A64/fp_min_max.cpp - A64/misaligned_page_table.cpp - A64/test_invalidation.cpp - A64/real_world.cpp - A64/testenv.h - ) -endif() +target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) if (DYNARMIC_TESTS_USE_UNICORN) target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn) @@ -40,25 +31,17 @@ if (DYNARMIC_TESTS_USE_UNICORN) target_sources(dynarmic_tests PRIVATE fuzz_util.cpp fuzz_util.h + # A32 + A32/fuzz_arm.cpp + A32/fuzz_thumb.cpp + unicorn_emu/a32_unicorn.cpp + unicorn_emu/a32_unicorn.h + # A64 + A64/fuzz_with_unicorn.cpp + A64/verify_unicorn.cpp + unicorn_emu/a64_unicorn.cpp + unicorn_emu/a64_unicorn.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/fuzz_arm.cpp - A32/fuzz_thumb.cpp - unicorn_emu/a32_unicorn.cpp - unicorn_emu/a32_unicorn.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A64/fuzz_with_unicorn.cpp - A64/verify_unicorn.cpp - unicorn_emu/a64_unicorn.cpp - unicorn_emu/a64_unicorn.h - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -69,9 +52,6 @@ if ("x86_64" IN_LIST ARCHITECTURE) target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak) target_architecture_specific_sources(dynarmic_tests "x86_64" x64_cpu_info.cpp - ) - - target_architecture_specific_sources(dynarmic_tests "x86_64" native/preserve_xmm.cpp ) @@ -85,47 +65,47 @@ endif() include(CreateDirectoryGroups) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_print_info - print_info.cpp - ) +# +# dynarmic_print_info +# +add_executable(dynarmic_print_info + print_info.cpp +) +create_target_directory_groups(dynarmic_print_info) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_print_info PRIVATE . ../src) +target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_print_info) +# +# dynarmic_test_generator +# +add_executable(dynarmic_test_generator + fuzz_util.cpp + fuzz_util.h + test_generator.cpp +) - target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_print_info PRIVATE . ../src) - target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +create_target_directory_groups(dynarmic_test_generator) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_generator - fuzz_util.cpp - fuzz_util.h - test_generator.cpp - ) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_generator PRIVATE . ../src) +target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_test_generator) - - target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_generator PRIVATE . ../src) - target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() - -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_reader - test_reader.cpp - ) - - create_target_directory_groups(dynarmic_test_reader) - - target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_reader PRIVATE . ../src) - target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +# +# dynarmic_test_reader +# +add_executable(dynarmic_test_reader + test_reader.cpp +) +create_target_directory_groups(dynarmic_test_reader) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_reader PRIVATE . ../src) +target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) +# create_target_directory_groups(dynarmic_tests) target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index ef8b87bbd1..3263ca729a 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -34,7 +34,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; From 565f7f6532f8cf684e2c2d5f1af70e23f2131879 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:57:48 +0000 Subject: [PATCH 11/17] Fix license headers --- src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index 8e48fa3687..01af361b27 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index 2b50ad9ea3..c4c1c42792 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h index 1963fad0a0..88b8020031 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.h +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD From e31acb645e713f5f7517929bf4ee912a0d87e881 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 00:11:40 +0000 Subject: [PATCH 12/17] [dynarmic] use small vector experiment Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index f70329f471..df1619d552 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -12,6 +12,7 @@ #include #include +#include "boost/container/small_vector.hpp" #include "dynarmic/common/common_types.h" #include #include @@ -83,7 +84,7 @@ public: void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial - std::vector values; //24 + boost::container::small_vector values; //24 // Block state uint16_t total_uses = 0; //8 //sometimes zeroed @@ -96,7 +97,7 @@ private: uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - alignas(16) uint8_t lru_counter = 0; //1 + uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From 8b1f2a151c0d2abd926e434bf439862c90f8f010 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:07 +0000 Subject: [PATCH 13/17] [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index fa6006ed2a..014c121be5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.push_back(inst); ASSERT(size_t(total_uses) + inst->UseCount() < std::numeric_limits::max()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); + max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); } void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index df1619d552..160c2ef098 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -78,7 +78,7 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } inline size_t GetMaxBitWidth() const noexcept { - return max_bit_width; + return 1 << max_bit_width; } void AddValue(IR::Inst* inst) noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; @@ -94,10 +94,10 @@ private: uint16_t is_being_used_count = 0; //8 uint16_t current_references = 0; //8 // Value state - uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 + uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6) + uint8_t lru_counter : 2 = 0; //1 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From f1df72f196d755da394dae589745fd06f173761c Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:22 +0000 Subject: [PATCH 14/17] [dynarmic] fix tests_reader and tests_generator Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 2 +- src/dynarmic/tests/A32/testenv.h | 1 - src/dynarmic/tests/A64/testenv.h | 1 - src/dynarmic/tests/CMakeLists.txt | 28 ++++++++++++++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 6948bf39fb..dbd5926ee9 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -371,7 +371,7 @@ target_link_libraries(dynarmic ) if (BOOST_NO_HEADERS) -target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) + target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) else() target_link_libraries(dynarmic PRIVATE Boost::headers) endif() diff --git a/src/dynarmic/tests/A32/testenv.h b/src/dynarmic/tests/A32/testenv.h index a6df2017ce..72eaafce14 100644 --- a/src/dynarmic/tests/A32/testenv.h +++ b/src/dynarmic/tests/A32/testenv.h @@ -17,7 +17,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A32/a32.h" -#include "../native/testenv.h" template class A32TestEnv : public Dynarmic::A32::UserCallbacks { diff --git a/src/dynarmic/tests/A64/testenv.h b/src/dynarmic/tests/A64/testenv.h index 31e338b138..fcdadb23e6 100644 --- a/src/dynarmic/tests/A64/testenv.h +++ b/src/dynarmic/tests/A64/testenv.h @@ -12,7 +12,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A64/a64.h" -#include "../native/testenv.h" using Vector = Dynarmic::A64::Vector; diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 29a8f23478..f8f420e2d6 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -72,7 +72,12 @@ add_executable(dynarmic_print_info print_info.cpp ) create_target_directory_groups(dynarmic_print_info) -target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_print_info PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_print_info PRIVATE . ../src) target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -88,7 +93,12 @@ add_executable(dynarmic_test_generator create_target_directory_groups(dynarmic_test_generator) -target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_generator PRIVATE . ../src) target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -100,7 +110,12 @@ add_executable(dynarmic_test_reader test_reader.cpp ) create_target_directory_groups(dynarmic_test_reader) -target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_reader PRIVATE . ../src) target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -108,7 +123,12 @@ target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LIT # create_target_directory_groups(dynarmic_tests) -target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) +target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_tests PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_tests PRIVATE . ../src) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) From 779ee80a95028bbacdc8d5ba2419281ce229bd36 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 09:10:18 +0000 Subject: [PATCH 15/17] [docs] fastmem draft Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 5 +++++ src/dynarmic/docs/Fastmem.svg | 4 ++++ src/dynarmic/docs/HostToGuest.svg | 4 ++++ 3 files changed, 13 insertions(+) create mode 100644 src/dynarmic/docs/FastMemory.md create mode 100644 src/dynarmic/docs/Fastmem.svg create mode 100644 src/dynarmic/docs/HostToGuest.svg diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md new file mode 100644 index 0000000000..2a7c1a2583 --- /dev/null +++ b/src/dynarmic/docs/FastMemory.md @@ -0,0 +1,5 @@ +# Fast memory (Fastmem) + +![Host to guest translation](./HostToGuest.svg) + +![Fastmem translation](./Fastmem.svg) diff --git a/src/dynarmic/docs/Fastmem.svg b/src/dynarmic/docs/Fastmem.svg new file mode 100644 index 0000000000..a3ed0bb68b --- /dev/null +++ b/src/dynarmic/docs/Fastmem.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
SIGSEGV Trap
Fastmem
Only needs to linearly offset from fastmem arena
Less codegen (SIGSEGV traps)
Is fast only if SIGSEGV handlers are sufficiently fast
\ No newline at end of file diff --git a/src/dynarmic/docs/HostToGuest.svg b/src/dynarmic/docs/HostToGuest.svg new file mode 100644 index 0000000000..6a15a44b46 --- /dev/null +++ b/src/dynarmic/docs/HostToGuest.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
Resolver
Host to Guest translation
Looks up correct PTE
Translates each address 
Is slow
\ No newline at end of file From d68e62c9194f0ecf0ef9ce164c48d3eeb24035a8 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 10:16:22 +0000 Subject: [PATCH 16/17] [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 6 +- .../frontend/decoder/decoder_detail.h | 65 ++++++++----------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 2c2c54a1ad..3f7bf7f967 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,12 +210,8 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 438b7b691c..682b0cfbf6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,12 +269,8 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index bd76efda2a..4050033ef5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -70,11 +70,9 @@ struct detail { return std::make_tuple(mask, expect); } - /** - * Generates the masks and shifts for each argument. - * A '-' in a bitstring indicates that we don't care about that value. - * An argument is specified by a continuous string of the same character. - */ + /// @brief Generates the masks and shifts for each argument. + /// A '-' in a bitstring indicates that we don't care about that value. + /// An argument is specified by a continuous string of the same character. template static consteval auto GetArgInfo(std::array bitstring) { std::array masks = {}; @@ -98,7 +96,6 @@ struct detail { if constexpr (N > 0) { const size_t bit_position = opcode_bitsize - i - 1; - if (arg_index >= N) throw std::out_of_range("Unexpected field"); @@ -109,20 +106,16 @@ struct detail { } } } - #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) // Avoids a MSVC ICE, and avoids Android NDK issue. ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); #endif - return std::make_tuple(masks, shifts); } - /** - * This struct's Make member function generates a lambda which decodes an instruction based on - * the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a - * template argument. - */ + /// @brief This struct's Make member function generates a lambda which decodes an instruction + /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is + /// provided as a template argument. template struct VisitorCaller; @@ -130,36 +123,36 @@ struct detail { # pragma warning(push) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) #endif - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...), + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...), const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...) const, + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...) const, const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; @@ -167,22 +160,16 @@ struct detail { # pragma warning(pop) #endif - /** - * Creates a matcher that can match and parse instructions based on bitstring. - * See also: GetMaskAndExpect and GetArgInfo for format of bitstring. - */ - template - static auto GetMatcher(FnT fn, const char* const name) { - constexpr size_t args_count = mcl::parameter_count_v; - + /// @brief Creates a matcher that can match and parse instructions based on bitstring. + /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. + template + static constexpr auto GetMatcher(F fn, const char* const name) { + constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); - - using Iota = std::make_index_sequence; - - const auto proxy_fn = VisitorCaller::Make(Iota(), fn, arg_masks, arg_shifts); + const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); return MatcherT(name, mask, expect, proxy_fn); } }; From 889f7b750fc83129893bbf06d1b078422b025d7e Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 4 Sep 2025 07:24:49 +0000 Subject: [PATCH 17/17] [dynarmic] reduce matcher table noise and cache misses Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/a32_types.h | 21 +++++---------- .../src/dynarmic/frontend/A32/decoder/asimd.h | 27 +++++++++---------- .../src/dynarmic/frontend/A32/decoder/vfp.h | 12 +++------ .../src/dynarmic/frontend/A64/decoder/a64.h | 23 +++++++--------- .../frontend/decoder/decoder_detail.h | 6 ++--- .../src/dynarmic/frontend/decoder/matcher.h | 11 ++------ 6 files changed, 38 insertions(+), 62 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index fdadef8257..2a0cc25751 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -9,12 +9,9 @@ #pragma once #include -#include - #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/ir/cond.h" @@ -89,23 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) { inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); - return static_cast(reg); + return size_t(reg); } inline size_t RegNumber(ExtReg reg) { if (IsSingleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::S0); + return size_t(reg) - size_t(ExtReg::S0); + } else if (IsDoubleExtReg(reg)) { + return size_t(reg) - size_t(ExtReg::D0); } - - if (IsDoubleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::D0); - } - - if (IsQuadExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::Q0); - } - - ASSERT_MSG(false, "Invalid extended register"); + ASSERT(IsQuadExtReg(reg)); + return size_t(reg) - size_t(ExtReg::Q0); } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f2e206695b..f63816d2b1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher; template std::vector> GetASIMDDecodeTable() { - std::vector> table = { - -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> table = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" #undef INST - }; - // Exceptions to the rule of thumb. const std::set comes_first{ "VBIC, VMOV, VMVN, VORR (immediate)", @@ -53,19 +50,21 @@ std::vector> GetASIMDDecodeTable() { "VQDMULH (scalar)", "VQRDMULH (scalar)", }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_first.count(matcher.GetName()) > 0; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_last.count(matcher.GetName()) == 0; + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - - return table; + std::vector> final_table; + std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + return e.second; + }); + return final_table; } template diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index 5fcacd2bda..a969570547 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -30,22 +30,18 @@ std::optional>> DecodeVFP(u32 instruc static const struct Tables { Table unconditional; Table conditional; - } tables = [] { + } tables = []() { Table list = { - #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./vfp.inc" #undef INST - }; - - const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return (matcher.GetMask() & 0xF0000000) == 0xF0000000; }); - return Tables{ - Table{list.begin(), division}, - Table{division, list.end()}, + Table{list.begin(), it}, + Table{it, list.end()}, }; }(); diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c651dd7cde..c7bc981ac6 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) { template constexpr DecodeTable GetDecodeTable() { - std::vector> list = { -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" #undef INST }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { return std::set{ "MOVI, MVNI, ORR, BIC (vector, immediate)", "FMOV (vector, immediate)", "Unallocated SIMD modified immediate", - }.count(matcher.GetName()) > 0; + }.count(e.first) > 0; }); - DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); + for (auto const& e : list) { + const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e.second); } } } diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 4050033ef5..0d9da6169c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -163,18 +163,18 @@ struct detail { /// @brief Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template - static constexpr auto GetMatcher(F fn, const char* const name) { + static constexpr auto GetMatcher(F fn) { constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); - return MatcherT(name, mask, expect, proxy_fn); + return MatcherT(mask, expect, proxy_fn); } }; -#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn, name) +#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn) } // namespace detail } // namespace Dynarmic::Decoder diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 7e5c9c5a8f..5a2afe57c5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -31,14 +31,8 @@ public: using visitor_type = Visitor; using handler_return_type = typename Visitor::instruction_return_type; using handler_function = std::function; - - Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) - : name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {} - - /// Gets the name of this type of instruction. - const char* GetName() const { - return name; - } + Matcher(opcode_type mask, opcode_type expected, handler_function func) + : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. opcode_type GetMask() const { @@ -70,7 +64,6 @@ public: } private: - const char* name; opcode_type mask; opcode_type expected; handler_function fn;