Compare commits

...
Sign in to create a new pull request.

8 commits

Author SHA1 Message Date
8a79bba0d4 [dynarmic] reduce matcher table noise and cache misses
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
c21eea1304 [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
75c9ba2adb [docs] fastmem draft
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
7243704623 [dynarmic] fix tests_reader and tests_generator
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
da67f58a51 [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
f8040d3b52 [dynarmic] use small vector experiment
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
c30eadcb0d Fix license headers 2025-09-04 18:26:53 +02:00
2d9d7fafcb [dynarmic] reduce opt pass latency
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
48 changed files with 1920 additions and 2418 deletions

View file

@ -210,12 +210,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* pa
config.wall_clock_cntpct = m_uses_wall_clock; config.wall_clock_cntpct = m_uses_wall_clock;
config.enable_cycle_counting = !m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock;
// Code cache size // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
#ifdef ARCHITECTURE_arm64
config.code_cache_size = std::uint32_t(128_MiB); config.code_cache_size = std::uint32_t(128_MiB);
#else
config.code_cache_size = std::uint32_t(512_MiB);
#endif
// Allow memory fault handling to work // Allow memory fault handling to work
if (m_system.DebuggerEnabled()) { if (m_system.DebuggerEnabled()) {

View file

@ -269,12 +269,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
config.wall_clock_cntpct = m_uses_wall_clock; config.wall_clock_cntpct = m_uses_wall_clock;
config.enable_cycle_counting = !m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock;
// Code cache size // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
#ifdef ARCHITECTURE_arm64
config.code_cache_size = std::uint32_t(128_MiB); config.code_cache_size = std::uint32_t(128_MiB);
#else
config.code_cache_size = std::uint32_t(512_MiB);
#endif
// Allow memory fault handling to work // Allow memory fault handling to work
if (m_system.DebuggerEnabled()) { if (m_system.DebuggerEnabled()) {

View file

@ -23,9 +23,6 @@ option(DYNARMIC_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON)
option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF) option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF)
option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF) option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF)
option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT}) option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT})
if (NOT DEFINED DYNARMIC_FRONTENDS)
set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable")
endif()
# Default to a Release build # Default to a Release build
if (NOT CMAKE_BUILD_TYPE) if (NOT CMAKE_BUILD_TYPE)

View file

@ -0,0 +1,5 @@
# Fast memory (Fastmem)
![Host to guest translation](./HostToGuest.svg)
![Fastmem translation](./Fastmem.svg)

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 128 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 98 KiB

View file

@ -16,19 +16,31 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun
The member functions on `RegAlloc` are just a combination of the above concepts. The member functions on `RegAlloc` are just a combination of the above concepts.
The following registers are reserved for internal use and should NOT participate in register allocation:
- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access.
- `%rsp`: Stack pointer.
The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate.
Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration.
### `Scratch` ### `Scratch`
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) ```c++
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr);
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm);
```
At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope. At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope.
### Pure `Use` ### Pure `Use`
Xbyak::Reg64 UseGpr(Argument& arg); ```c++
Xbyak::Xmm UseXmm(Argument& arg); Xbyak::Reg64 UseGpr(Argument& arg);
OpArg UseOpArg(Argument& arg); Xbyak::Xmm UseXmm(Argument& arg);
void Use(Argument& arg, HostLoc host_loc); OpArg UseOpArg(Argument& arg);
void Use(Argument& arg, HostLoc host_loc);
```
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it
@ -39,9 +51,11 @@ This register **must not** have it's value changed.
### `UseScratch` ### `UseScratch`
Xbyak::Reg64 UseScratchGpr(Argument& arg); ```c++
Xbyak::Xmm UseScratchXmm(Argument& arg); Xbyak::Reg64 UseScratchGpr(Argument& arg);
void UseScratch(Argument& arg, HostLoc host_loc); Xbyak::Xmm UseScratchXmm(Argument& arg);
void UseScratch(Argument& arg, HostLoc host_loc);
```
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it
@ -55,7 +69,9 @@ You are free to modify the value in the register. The register is discarded at t
A `Define` is the defintion of a value. This is the only time when a value may be set. A `Define` is the defintion of a value. This is the only time when a value may be set.
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); ```c++
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
```
By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the
value to the specified register `reg`. value to the specified register `reg`.
@ -64,7 +80,9 @@ value to the specified register `reg`.
Adding a `Define` to an existing value. Adding a `Define` to an existing value.
void DefineValue(IR::Inst* inst, Argument& arg); ```c++
void DefineValue(IR::Inst* inst, Argument& arg);
```
You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are
emitted. emitted.

View file

@ -23,7 +23,8 @@ One complication dynarmic has is that a compiled block is not uniquely identifia
the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by
computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block. computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
u64 LocationDescriptor::UniqueHash() const { ```c++
u64 LocationDescriptor::UniqueHash() const {
// This value MUST BE UNIQUE. // This value MUST BE UNIQUE.
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
u64 pc_u64 = u64(arm_pc) << 32; u64 pc_u64 = u64(arm_pc) << 32;
@ -31,7 +32,8 @@ computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
u64 t_u64 = cpsr.T() ? 1 : 0; u64 t_u64 = cpsr.T() ? 1 : 0;
u64 e_u64 = cpsr.E() ? 2 : 0; u64 e_u64 = cpsr.E() ? 2 : 0;
return pc_u64 | fpscr_u64 | t_u64 | e_u64; return pc_u64 | fpscr_u64 | t_u64 | e_u64;
} }
```
## Our implementation isn't actually a stack ## Our implementation isn't actually a stack
@ -49,7 +51,8 @@ host addresses for the corresponding the compiled blocks.
size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8 size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8
showed degraded performance. showed degraded performance.
struct JitState { ```c++
struct JitState {
// ... // ...
static constexpr size_t RSBSize = 8; // MUST be a power of 2. static constexpr size_t RSBSize = 8; // MUST be a power of 2.
@ -59,14 +62,16 @@ showed degraded performance.
void ResetRSB(); void ResetRSB();
// ... // ...
}; };
```
### RSB Push ### RSB Push
We insert our prediction at the insertion point iff the RSB doesn't already We insert our prediction at the insertion point iff the RSB doesn't already
contain a prediction with the same `UniqueHash`. contain a prediction with the same `UniqueHash`.
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { ```c++
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util; using namespace Xbyak::util;
ASSERT(inst->GetArg(0).IsImmediate()); ASSERT(inst->GetArg(0).IsImmediate());
@ -99,10 +104,12 @@ contain a prediction with the same `UniqueHash`.
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
code->L(label); code->L(label);
} }
```
In pseudocode: In pseudocode:
```c++
for (i := 0 .. RSBSize-1) for (i := 0 .. RSBSize-1)
if (rsb_location_descriptors[i] == imm64) if (rsb_location_descriptors[i] == imm64)
goto label; goto label;
@ -110,13 +117,15 @@ In pseudocode:
rsb_ptr %= RSBSize; rsb_ptr %= RSBSize;
rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
label: label:
```
## RSB Pop ## RSB Pop
To check if a predicition is in the RSB, we linearly scan the RSB. To check if a predicition is in the RSB, we linearly scan the RSB.
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { ```c++
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util; using namespace Xbyak::util;
// This calculation has to match up with IREmitter::PushRSB // This calculation has to match up with IREmitter::PushRSB
@ -133,13 +142,16 @@ To check if a predicition is in the RSB, we linearly scan the RSB.
} }
code->jmp(rax); code->jmp(rax);
} }
```
In pseudocode: In pseudocode:
rbx := ComputeUniqueHash() ```c++
rax := ReturnToDispatch rbx := ComputeUniqueHash()
for (i := 0 .. RSBSize-1) rax := ReturnToDispatch
for (i := 0 .. RSBSize-1)
if (rbx == rsb_location_descriptors[i]) if (rbx == rsb_location_descriptors[i])
rax = rsb_codeptrs[i] rax = rsb_codeptrs[i]
goto rax goto rax
```

View file

@ -56,8 +56,6 @@ add_library(dynarmic
common/lut_from_list.h common/lut_from_list.h
common/math_util.cpp common/math_util.cpp
common/math_util.h common/math_util.h
common/memory_pool.cpp
common/memory_pool.h
common/safe_ops.h common/safe_ops.h
common/spin_lock.h common/spin_lock.h
common/string_util.h common/string_util.h
@ -78,7 +76,6 @@ add_library(dynarmic
ir/basic_block.cpp ir/basic_block.cpp
ir/basic_block.h ir/basic_block.h
ir/cond.h ir/cond.h
ir/ir_emitter.cpp
ir/ir_emitter.h ir/ir_emitter.h
ir/location_descriptor.cpp ir/location_descriptor.cpp
ir/location_descriptor.h ir/location_descriptor.h
@ -87,23 +84,14 @@ add_library(dynarmic
ir/opcodes.cpp ir/opcodes.cpp
ir/opcodes.h ir/opcodes.h
ir/opcodes.inc ir/opcodes.inc
ir/opt/constant_propagation_pass.cpp ir/opt_passes.cpp
ir/opt/dead_code_elimination_pass.cpp ir/opt_passes.h
ir/opt/identity_removal_pass.cpp
ir/opt/ir_matcher.h
ir/opt/naming_pass.cpp
ir/opt/passes.h
ir/opt/polyfill_pass.cpp
ir/opt/verification_pass.cpp
ir/terminal.h ir/terminal.h
ir/type.cpp ir/type.cpp
ir/type.h ir/type.h
ir/value.cpp ir/value.cpp
ir/value.h ir/value.h
) # A32
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
frontend/A32/a32_ir_emitter.cpp frontend/A32/a32_ir_emitter.cpp
frontend/A32/a32_ir_emitter.h frontend/A32/a32_ir_emitter.h
frontend/A32/a32_location_descriptor.cpp frontend/A32/a32_location_descriptor.cpp
@ -136,13 +124,7 @@ if ("A32" IN_LIST DYNARMIC_FRONTENDS)
interface/A32/coprocessor.h interface/A32/coprocessor.h
interface/A32/coprocessor_util.h interface/A32/coprocessor_util.h
interface/A32/disassembler.h interface/A32/disassembler.h
ir/opt/a32_constant_memory_reads_pass.cpp # A64
ir/opt/a32_get_set_elimination_pass.cpp
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
frontend/A64/a64_ir_emitter.cpp frontend/A64/a64_ir_emitter.cpp
frontend/A64/a64_ir_emitter.h frontend/A64/a64_ir_emitter.h
frontend/A64/a64_location_descriptor.cpp frontend/A64/a64_location_descriptor.cpp
@ -153,11 +135,7 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS)
frontend/A64/translate/a64_translate.h frontend/A64/translate/a64_translate.h
interface/A64/a64.h interface/A64/a64.h
interface/A64/config.h interface/A64/config.h
ir/opt/a64_callback_config_pass.cpp )
ir/opt/a64_get_set_elimination_pass.cpp
ir/opt/a64_merge_interpret_blocks.cpp
)
endif()
if ("x86_64" IN_LIST ARCHITECTURE) if ("x86_64" IN_LIST ARCHITECTURE)
target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1) target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1)
@ -211,21 +189,14 @@ if ("x86_64" IN_LIST ARCHITECTURE)
common/spin_lock_x64.h common/spin_lock_x64.h
common/x64_disassemble.cpp common/x64_disassemble.cpp
common/x64_disassemble.h common/x64_disassemble.h
) # A32
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "x86_64"
backend/x64/a32_emit_x64.cpp backend/x64/a32_emit_x64.cpp
backend/x64/a32_emit_x64.h backend/x64/a32_emit_x64.h
backend/x64/a32_emit_x64_memory.cpp backend/x64/a32_emit_x64_memory.cpp
backend/x64/a32_interface.cpp backend/x64/a32_interface.cpp
backend/x64/a32_jitstate.cpp backend/x64/a32_jitstate.cpp
backend/x64/a32_jitstate.h backend/x64/a32_jitstate.h
) # A64
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "x86_64"
backend/x64/a64_emit_x64.cpp backend/x64/a64_emit_x64.cpp
backend/x64/a64_emit_x64.h backend/x64/a64_emit_x64.h
backend/x64/a64_emit_x64_memory.cpp backend/x64/a64_emit_x64_memory.cpp
@ -233,7 +204,6 @@ if ("x86_64" IN_LIST ARCHITECTURE)
backend/x64/a64_jitstate.cpp backend/x64/a64_jitstate.cpp
backend/x64/a64_jitstate.h backend/x64/a64_jitstate.h
) )
endif()
endif() endif()
if ("arm64" IN_LIST ARCHITECTURE) if ("arm64" IN_LIST ARCHITECTURE)
@ -277,25 +247,17 @@ if ("arm64" IN_LIST ARCHITECTURE)
backend/arm64/verbose_debugging_output.h backend/arm64/verbose_debugging_output.h
common/spin_lock_arm64.cpp common/spin_lock_arm64.cpp
common/spin_lock_arm64.h common/spin_lock_arm64.h
) # A32
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "arm64"
backend/arm64/a32_address_space.cpp backend/arm64/a32_address_space.cpp
backend/arm64/a32_address_space.h backend/arm64/a32_address_space.h
backend/arm64/a32_core.h backend/arm64/a32_core.h
backend/arm64/a32_interface.cpp backend/arm64/a32_interface.cpp
) # A64
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "arm64"
backend/arm64/a64_address_space.cpp backend/arm64/a64_address_space.cpp
backend/arm64/a64_address_space.h backend/arm64/a64_address_space.h
backend/arm64/a64_core.h backend/arm64/a64_core.h
backend/arm64/a64_interface.cpp backend/arm64/a64_interface.cpp
) )
endif()
endif() endif()
if ("riscv" IN_LIST ARCHITECTURE) if ("riscv" IN_LIST ARCHITECTURE)
@ -324,21 +286,14 @@ if ("riscv" IN_LIST ARCHITECTURE)
backend/riscv64/reg_alloc.cpp backend/riscv64/reg_alloc.cpp
backend/riscv64/reg_alloc.h backend/riscv64/reg_alloc.h
backend/riscv64/stack_layout.h backend/riscv64/stack_layout.h
) # A32
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
backend/riscv64/a32_address_space.cpp backend/riscv64/a32_address_space.cpp
backend/riscv64/a32_address_space.h backend/riscv64/a32_address_space.h
backend/riscv64/a32_core.h backend/riscv64/a32_core.h
backend/riscv64/a32_interface.cpp backend/riscv64/a32_interface.cpp
backend/riscv64/code_block.h backend/riscv64/code_block.h
) )
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
endif()
endif() endif()
if (WIN32) if (WIN32)
@ -416,7 +371,7 @@ target_link_libraries(dynarmic
) )
if (BOOST_NO_HEADERS) if (BOOST_NO_HEADERS)
target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool)
else() else()
target_link_libraries(dynarmic PRIVATE Boost::headers) target_link_libraries(dynarmic PRIVATE Boost::headers)
endif() endif()

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project. /* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage * Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
@ -16,7 +19,7 @@
#include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/config.h"
#include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/opt/passes.h" #include "dynarmic/ir/opt_passes.h"
namespace Dynarmic::Backend::Arm64 { namespace Dynarmic::Backend::Arm64 {
@ -163,21 +166,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::Optimize(ir_block, conf, {});
Optimization::PolyfillPass(ir_block, {});
Optimization::NamingPass(ir_block);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
Optimization::IdentityRemovalPass(ir_block);
Optimization::VerificationPass(ir_block);
return ir_block; return ir_block;
} }

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project. /* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage * Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
@ -15,7 +18,7 @@
#include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/interface/A64/config.h" #include "dynarmic/interface/A64/config.h"
#include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/opt/passes.h" #include "dynarmic/ir/opt_passes.h"
namespace Dynarmic::Backend::Arm64 { namespace Dynarmic::Backend::Arm64 {
@ -331,22 +334,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code,
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
Optimization::Optimize(ir_block, conf, {});
Optimization::A64CallbackConfigPass(ir_block, conf);
Optimization::NamingPass(ir_block);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
Optimization::A64GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
}
Optimization::VerificationPass(ir_block);
return ir_block; return ir_block;
} }

View file

@ -15,7 +15,7 @@
#include "dynarmic/backend/riscv64/stack_layout.h" #include "dynarmic/backend/riscv64/stack_layout.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/ir/opt/passes.h" #include "dynarmic/ir/opt_passes.h"
namespace Dynarmic::Backend::RV64 { namespace Dynarmic::Backend::RV64 {
@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::Optimize(ir_block, conf, {});
Optimization::PolyfillPass(ir_block, {});
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
Optimization::VerificationPass(ir_block);
return ir_block; return ir_block;
} }

View file

@ -29,7 +29,7 @@
#include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/a32.h"
#include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/location_descriptor.h" #include "dynarmic/ir/location_descriptor.h"
#include "dynarmic/ir/opt/passes.h" #include "dynarmic/ir/opt_passes.h"
namespace Dynarmic::A32 { namespace Dynarmic::A32 {
@ -217,19 +217,7 @@ private:
block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE); block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE);
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::PolyfillPass(ir_block, polyfill_options); Optimization::Optimize(ir_block, conf, polyfill_options);
Optimization::NamingPass(ir_block);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true});
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
Optimization::IdentityRemovalPass(ir_block);
Optimization::VerificationPass(ir_block);
return emitter.Emit(ir_block); return emitter.Emit(ir_block);
} }

View file

@ -25,7 +25,7 @@
#include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/interface/A64/a64.h" #include "dynarmic/interface/A64/a64.h"
#include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opt/passes.h" #include "dynarmic/ir/opt_passes.h"
namespace Dynarmic::A64 { namespace Dynarmic::A64 {
@ -275,21 +275,7 @@ private:
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
Optimization::PolyfillPass(ir_block, polyfill_options); Optimization::Optimize(ir_block, conf, polyfill_options);
Optimization::A64CallbackConfigPass(ir_block, conf);
Optimization::NamingPass(ir_block);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
Optimization::A64GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
}
Optimization::VerificationPass(ir_block);
return emitter.Emit(ir_block).entrypoint; return emitter.Emit(ir_block).entrypoint;
} }

View file

@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept {
values.push_back(inst); values.push_back(inst);
ASSERT(size_t(total_uses) + inst->UseCount() < std::numeric_limits<uint16_t>::max()); ASSERT(size_t(total_uses) + inst->UseCount() < std::numeric_limits<uint16_t>::max());
total_uses += inst->UseCount(); total_uses += inst->UseCount();
max_bit_width = std::max<uint8_t>(max_bit_width, GetBitWidth(inst->GetType())); max_bit_width = std::max<uint8_t>(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType())));
} }
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept {

View file

@ -12,6 +12,7 @@
#include <functional> #include <functional>
#include <optional> #include <optional>
#include "boost/container/small_vector.hpp"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include <xbyak/xbyak.h> #include <xbyak/xbyak.h>
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
@ -77,13 +78,13 @@ public:
return std::find(values.begin(), values.end(), inst) != values.end(); return std::find(values.begin(), values.end(), inst) != values.end();
} }
inline size_t GetMaxBitWidth() const noexcept { inline size_t GetMaxBitWidth() const noexcept {
return max_bit_width; return 1 << max_bit_width;
} }
void AddValue(IR::Inst* inst) noexcept; void AddValue(IR::Inst* inst) noexcept;
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept;
private: private:
//non trivial //non trivial
std::vector<IR::Inst*> values; //24 boost::container::small_vector<IR::Inst*, 3> values; //24
// Block state // Block state
uint16_t total_uses = 0; //8 uint16_t total_uses = 0; //8
//sometimes zeroed //sometimes zeroed
@ -93,10 +94,10 @@ private:
uint16_t is_being_used_count = 0; //8 uint16_t is_being_used_count = 0; //8
uint16_t current_references = 0; //8 uint16_t current_references = 0; //8
// Value state // Value state
uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6)
uint8_t lru_counter : 2 = 0; //1
bool is_scratch : 1 = false; //1 bool is_scratch : 1 = false; //1
bool is_set_last_use : 1 = false; //1 bool is_set_last_use : 1 = false; //1
alignas(16) uint8_t lru_counter = 0; //1
friend class RegAlloc; friend class RegAlloc;
}; };
static_assert(sizeof(HostLocInfo) == 64); static_assert(sizeof(HostLocInfo) == 64);

View file

@ -1,13 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/common/memory_pool.h"
#include <cstdlib>
namespace Dynarmic::Common {
} // namespace Dynarmic::Common

View file

@ -1,61 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <cstddef>
#include <vector>
namespace Dynarmic::Common {
/// @tparam object_size Byte-size of objects to construct
/// @tparam slab_size Number of objects to have per slab
template<size_t object_size, size_t slab_size>
class Pool {
public:
inline Pool() noexcept {
AllocateNewSlab();
}
inline ~Pool() noexcept {
std::free(current_slab);
for (char* slab : slabs) {
std::free(slab);
}
}
Pool(const Pool&) = delete;
Pool(Pool&&) = delete;
Pool& operator=(const Pool&) = delete;
Pool& operator=(Pool&&) = delete;
/// @brief Returns a pointer to an `object_size`-bytes block of memory.
[[nodiscard]] void* Alloc() noexcept {
if (remaining == 0) {
slabs.push_back(current_slab);
AllocateNewSlab();
}
void* ret = static_cast<void*>(current_ptr);
current_ptr += object_size;
remaining--;
return ret;
}
private:
/// @brief Allocates a completely new memory slab.
/// Used when an entirely new slab is needed
/// due the current one running out of usable space.
void AllocateNewSlab() noexcept {
current_slab = static_cast<char*>(std::malloc(object_size * slab_size));
current_ptr = current_slab;
remaining = slab_size;
}
std::vector<char*> slabs;
char* current_slab = nullptr;
char* current_ptr = nullptr;
size_t remaining = 0;
};
} // namespace Dynarmic::Common

View file

@ -9,12 +9,9 @@
#pragma once #pragma once
#include <string> #include <string>
#include <utility>
#include <fmt/format.h> #include <fmt/format.h>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/interface/A32/coprocessor_util.h"
#include "dynarmic/ir/cond.h" #include "dynarmic/ir/cond.h"
@ -89,23 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) {
inline size_t RegNumber(Reg reg) { inline size_t RegNumber(Reg reg) {
ASSERT(reg != Reg::INVALID_REG); ASSERT(reg != Reg::INVALID_REG);
return static_cast<size_t>(reg); return size_t(reg);
} }
inline size_t RegNumber(ExtReg reg) { inline size_t RegNumber(ExtReg reg) {
if (IsSingleExtReg(reg)) { if (IsSingleExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::S0); return size_t(reg) - size_t(ExtReg::S0);
} else if (IsDoubleExtReg(reg)) {
return size_t(reg) - size_t(ExtReg::D0);
} }
ASSERT(IsQuadExtReg(reg));
if (IsDoubleExtReg(reg)) { return size_t(reg) - size_t(ExtReg::Q0);
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::D0);
}
if (IsQuadExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::Q0);
}
ASSERT_MSG(false, "Invalid extended register");
} }
inline Reg operator+(Reg reg, size_t number) { inline Reg operator+(Reg reg, size_t number) {

View file

@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher<Visitor, u32>;
template<typename V> template<typename V>
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() { std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
std::vector<ASIMDMatcher<V>> table = { std::vector<std::pair<const char*, ASIMDMatcher<V>>> table = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./asimd.inc" #include "./asimd.inc"
#undef INST #undef INST
}; };
// Exceptions to the rule of thumb. // Exceptions to the rule of thumb.
const std::set<std::string> comes_first{ const std::set<std::string> comes_first{
"VBIC, VMOV, VMVN, VORR (immediate)", "VBIC, VMOV, VMVN, VORR (immediate)",
@ -53,19 +50,21 @@ std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
"VQDMULH (scalar)", "VQDMULH (scalar)",
"VQRDMULH (scalar)", "VQRDMULH (scalar)",
}; };
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
return comes_first.count(matcher.GetName()) > 0; return comes_first.count(e.first) > 0;
}); });
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
return comes_last.count(matcher.GetName()) == 0; return comes_last.count(e.first) == 0;
}); });
// If a matcher has more bits in its mask it is more specific, so it should come first. // If a matcher has more bits in its mask it is more specific, so it should come first.
std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) {
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
}); });
std::vector<ASIMDMatcher<V>> final_table;
return table; std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) {
return e.second;
});
return final_table;
} }
template<typename V> template<typename V>

View file

@ -30,22 +30,18 @@ std::optional<std::reference_wrapper<const VFPMatcher<V>>> DecodeVFP(u32 instruc
static const struct Tables { static const struct Tables {
Table unconditional; Table unconditional;
Table conditional; Table conditional;
} tables = [] { } tables = []() {
Table list = { Table list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./vfp.inc" #include "./vfp.inc"
#undef INST #undef INST
}; };
auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
return (matcher.GetMask() & 0xF0000000) == 0xF0000000; return (matcher.GetMask() & 0xF0000000) == 0xF0000000;
}); });
return Tables{ return Tables{
Table{list.begin(), division}, Table{list.begin(), it},
Table{division, list.end()}, Table{it, list.end()},
}; };
}(); }();

View file

@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) {
template<typename V> template<typename V>
constexpr DecodeTable<V> GetDecodeTable() { constexpr DecodeTable<V> GetDecodeTable() {
std::vector<Matcher<V>> list = { std::vector<std::pair<const char*, Matcher<V>>> list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./a64.inc" #include "./a64.inc"
#undef INST #undef INST
}; };
// If a matcher has more bits in its mask it is more specific, so it should come first. // If a matcher has more bits in its mask it is more specific, so it should come first.
std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) {
// If a matcher has more bits in its mask it is more specific, so it should come first. // If a matcher has more bits in its mask it is more specific, so it should come first.
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
}); });
// Exceptions to the above rule of thumb. // Exceptions to the above rule of thumb.
std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { std::stable_partition(list.begin(), list.end(), [&](const auto& e) {
return std::set<std::string>{ return std::set<std::string>{
"MOVI, MVNI, ORR, BIC (vector, immediate)", "MOVI, MVNI, ORR, BIC (vector, immediate)",
"FMOV (vector, immediate)", "FMOV (vector, immediate)",
"Unallocated SIMD modified immediate", "Unallocated SIMD modified immediate",
}.count(matcher.GetName()) > 0; }.count(e.first) > 0;
}); });
DecodeTable<V> table{}; DecodeTable<V> table{};
for (size_t i = 0; i < table.size(); ++i) { for (size_t i = 0; i < table.size(); ++i) {
for (auto matcher : list) { for (auto const& e : list) {
const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); const auto expect = detail::ToFastLookupIndex(e.second.GetExpected());
const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); const auto mask = detail::ToFastLookupIndex(e.second.GetMask());
if ((i & mask) == expect) { if ((i & mask) == expect) {
table[i].push_back(matcher); table[i].push_back(e.second);
} }
} }
} }

View file

@ -70,11 +70,9 @@ struct detail {
return std::make_tuple(mask, expect); return std::make_tuple(mask, expect);
} }
/** /// @brief Generates the masks and shifts for each argument.
* Generates the masks and shifts for each argument. /// A '-' in a bitstring indicates that we don't care about that value.
* A '-' in a bitstring indicates that we don't care about that value. /// An argument is specified by a continuous string of the same character.
* An argument is specified by a continuous string of the same character.
*/
template<size_t N> template<size_t N>
static consteval auto GetArgInfo(std::array<char, opcode_bitsize> bitstring) { static consteval auto GetArgInfo(std::array<char, opcode_bitsize> bitstring) {
std::array<opcode_type, N> masks = {}; std::array<opcode_type, N> masks = {};
@ -98,7 +96,6 @@ struct detail {
if constexpr (N > 0) { if constexpr (N > 0) {
const size_t bit_position = opcode_bitsize - i - 1; const size_t bit_position = opcode_bitsize - i - 1;
if (arg_index >= N) if (arg_index >= N)
throw std::out_of_range("Unexpected field"); throw std::out_of_range("Unexpected field");
@ -109,20 +106,16 @@ struct detail {
} }
} }
} }
#if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__)
// Avoids a MSVC ICE, and avoids Android NDK issue. // Avoids a MSVC ICE, and avoids Android NDK issue.
ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; }));
#endif #endif
return std::make_tuple(masks, shifts); return std::make_tuple(masks, shifts);
} }
/** /// @brief This struct's Make member function generates a lambda which decodes an instruction
* This struct's Make member function generates a lambda which decodes an instruction based on /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is
* the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a /// provided as a template argument.
* template argument.
*/
template<typename FnT> template<typename FnT>
struct VisitorCaller; struct VisitorCaller;
@ -130,36 +123,36 @@ struct detail {
# pragma warning(push) # pragma warning(push)
# pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning)
#endif #endif
template<typename Visitor, typename... Args, typename CallRetT> template<typename V, typename... Args, typename ReturnType>
struct VisitorCaller<CallRetT (Visitor::*)(Args...)> { struct VisitorCaller<ReturnType (V::*)(Args...)> {
template<size_t... iota> template<size_t... iota>
static auto Make(std::integer_sequence<size_t, iota...>, static constexpr auto Make(std::integer_sequence<size_t, iota...>,
CallRetT (Visitor::*const fn)(Args...), ReturnType (V::*const fn)(Args...),
const std::array<opcode_type, sizeof...(iota)> arg_masks, const std::array<opcode_type, sizeof...(iota)> arg_masks,
const std::array<size_t, sizeof...(iota)> arg_shifts) { const std::array<size_t, sizeof...(iota)> arg_shifts) {
static_assert(std::is_same_v<visitor_type, Visitor>, "Member function is not from Matcher's Visitor"); static_assert(std::is_same_v<visitor_type, V>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) {
(void)instruction; (void)instruction;
(void)arg_masks; (void)arg_masks;
(void)arg_shifts; (void)arg_shifts;
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...); return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
}; };
} }
}; };
template<typename Visitor, typename... Args, typename CallRetT> template<typename V, typename... Args, typename ReturnType>
struct VisitorCaller<CallRetT (Visitor::*)(Args...) const> { struct VisitorCaller<ReturnType (V::*)(Args...) const> {
template<size_t... iota> template<size_t... iota>
static auto Make(std::integer_sequence<size_t, iota...>, static constexpr auto Make(std::integer_sequence<size_t, iota...>,
CallRetT (Visitor::*const fn)(Args...) const, ReturnType (V::*const fn)(Args...) const,
const std::array<opcode_type, sizeof...(iota)> arg_masks, const std::array<opcode_type, sizeof...(iota)> arg_masks,
const std::array<size_t, sizeof...(iota)> arg_shifts) { const std::array<size_t, sizeof...(iota)> arg_shifts) {
static_assert(std::is_same_v<visitor_type, const Visitor>, "Member function is not from Matcher's Visitor"); static_assert(std::is_same_v<visitor_type, const V>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) {
(void)instruction; (void)instruction;
(void)arg_masks; (void)arg_masks;
(void)arg_shifts; (void)arg_shifts;
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...); return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
}; };
} }
}; };
@ -167,27 +160,21 @@ struct detail {
# pragma warning(pop) # pragma warning(pop)
#endif #endif
/** /// @brief Creates a matcher that can match and parse instructions based on bitstring.
* Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
* See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template<auto bitstring, typename F>
*/ static constexpr auto GetMatcher(F fn) {
template<auto bitstring, typename FnT> constexpr size_t args_count = mcl::parameter_count_v<F>;
static auto GetMatcher(FnT fn, const char* const name) {
constexpr size_t args_count = mcl::parameter_count_v<FnT>;
constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring));
constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring));
constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring));
constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring));
const auto proxy_fn = VisitorCaller<F>::Make(std::make_index_sequence<args_count>(), fn, arg_masks, arg_shifts);
using Iota = std::make_index_sequence<args_count>; return MatcherT(mask, expect, proxy_fn);
const auto proxy_fn = VisitorCaller<FnT>::Make(Iota(), fn, arg_masks, arg_shifts);
return MatcherT(name, mask, expect, proxy_fn);
} }
}; };
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn, name) #define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn)
} // namespace detail } // namespace detail
} // namespace Dynarmic::Decoder } // namespace Dynarmic::Decoder

View file

@ -31,14 +31,8 @@ public:
using visitor_type = Visitor; using visitor_type = Visitor;
using handler_return_type = typename Visitor::instruction_return_type; using handler_return_type = typename Visitor::instruction_return_type;
using handler_function = std::function<handler_return_type(Visitor&, opcode_type)>; using handler_function = std::function<handler_return_type(Visitor&, opcode_type)>;
Matcher(opcode_type mask, opcode_type expected, handler_function func)
Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) : mask{mask}, expected{expected}, fn{std::move(func)} {}
: name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {}
/// Gets the name of this type of instruction.
const char* GetName() const {
return name;
}
/// Gets the mask for this instruction. /// Gets the mask for this instruction.
opcode_type GetMask() const { opcode_type GetMask() const {
@ -70,7 +64,6 @@ public:
} }
private: private:
const char* name;
opcode_type mask; opcode_type mask;
opcode_type expected; opcode_type expected;
handler_function fn; handler_function fn;

View file

@ -15,8 +15,6 @@
#include <fmt/format.h> #include <fmt/format.h>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/memory_pool.h"
#include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/frontend/A64/a64_types.h"
#include "dynarmic/ir/cond.h" #include "dynarmic/ir/cond.h"
@ -27,8 +25,7 @@ namespace Dynarmic::IR {
Block::Block(const LocationDescriptor& location) Block::Block(const LocationDescriptor& location)
: location{location}, : location{location},
end_location{location}, end_location{location},
cond{Cond::AL}, cond{Cond::AL}
instruction_alloc_pool{std::make_unique<std::remove_reference_t<decltype(*instruction_alloc_pool)>>()}
{ {
} }
@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location)
/// @param args A sequence of Value instances used as arguments for the instruction. /// @param args A sequence of Value instances used as arguments for the instruction.
/// @returns Iterator to the newly created instruction. /// @returns Iterator to the newly created instruction.
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept { Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); IR::Inst* inst = new IR::Inst(opcode);
DEBUG_ASSERT(args.size() == inst->NumArgs()); DEBUG_ASSERT(args.size() == inst->NumArgs());
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
inst->SetArg(index, arg); inst->SetArg(index, arg);

View file

@ -21,7 +21,6 @@
#include "dynarmic/ir/terminal.h" #include "dynarmic/ir/terminal.h"
#include "dynarmic/ir/value.h" #include "dynarmic/ir/value.h"
#include "dynarmic/ir/dense_list.h" #include "dynarmic/ir/dense_list.h"
#include "dynarmic/common/memory_pool.h"
namespace Dynarmic::IR { namespace Dynarmic::IR {
@ -174,8 +173,6 @@ private:
LocationDescriptor end_location; LocationDescriptor end_location;
/// Conditional to pass in order to execute this block /// Conditional to pass in order to execute this block
Cond cond; Cond cond;
/// Memory pool for instruction list
std::unique_ptr<Common::Pool<sizeof(Inst), 2097152UL / sizeof(Inst)>> instruction_alloc_pool;
/// Terminal instruction of this block. /// Terminal instruction of this block.
Terminal terminal = Term::Invalid{}; Terminal terminal = Term::Invalid{};
/// Number of cycles this block takes to execute if the conditional fails. /// Number of cycles this block takes to execute if the conditional fails.

View file

@ -1,21 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/ir/ir_emitter.h"
#include <vector>
#include "dynarmic/common/assert.h"
#include <mcl/bit_cast.hpp>
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::IR {
} // namespace Dynarmic::IR

View file

@ -1,70 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/interface/A32/config.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) {
for (auto& inst : block) {
switch (inst.GetOpcode()) {
case IR::Opcode::A32ReadMemory8: {
if (!inst.AreAllArgsImmediates()) {
break;
}
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u8 value_from_memory = cb->MemoryRead8(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});
}
break;
}
case IR::Opcode::A32ReadMemory16: {
if (!inst.AreAllArgsImmediates()) {
break;
}
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u16 value_from_memory = cb->MemoryRead16(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});
}
break;
}
case IR::Opcode::A32ReadMemory32: {
if (!inst.AreAllArgsImmediates()) {
break;
}
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u32 value_from_memory = cb->MemoryRead32(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});
}
break;
}
case IR::Opcode::A32ReadMemory64: {
if (!inst.AreAllArgsImmediates()) {
break;
}
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u64 value_from_memory = cb->MemoryRead64(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});
}
break;
}
default:
break;
}
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,382 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <algorithm>
#include <array>
#include <functional>
#include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/A32/a32_ir_emitter.h"
#include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/value.h"
namespace Dynarmic::Optimization {
namespace {
void FlagsPass(IR::Block& block) {
using Iterator = std::reverse_iterator<IR::Block::iterator>;
struct FlagInfo {
bool set_not_required = false;
bool has_value_request = false;
Iterator value_request = {};
};
struct ValuelessFlagInfo {
bool set_not_required = false;
};
ValuelessFlagInfo nzcvq;
ValuelessFlagInfo nzcv;
ValuelessFlagInfo nz;
FlagInfo c_flag;
FlagInfo ge;
auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) {
if (info.has_value_request) {
info.value_request->ReplaceUsesWith(value);
}
info.has_value_request = false;
if (info.set_not_required) {
inst->Invalidate();
}
info.set_not_required = true;
};
auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) {
if (info.set_not_required) {
inst->Invalidate();
}
info.set_not_required = true;
};
auto do_get = [](FlagInfo& info, Iterator inst) {
if (info.has_value_request) {
info.value_request->ReplaceUsesWith(IR::Value{&*inst});
}
info.has_value_request = true;
info.value_request = inst;
};
A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}};
for (auto inst = block.rbegin(); inst != block.rend(); ++inst) {
auto const opcode = inst->GetOpcode();
switch (opcode) {
case IR::Opcode::A32GetCFlag: {
do_get(c_flag, inst);
break;
}
case IR::Opcode::A32SetCpsrNZCV: {
if (c_flag.has_value_request) {
ir.SetInsertionPointBefore(inst.base()); // base is one ahead
IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)});
c_flag.value_request->ReplaceUsesWith(c);
c_flag.has_value_request = false;
break; // This case will be executed again because of the above
}
do_set_valueless(nzcv, inst);
nz = {.set_not_required = true};
c_flag = {.set_not_required = true};
break;
}
case IR::Opcode::A32SetCpsrNZCVRaw: {
if (c_flag.has_value_request) {
nzcv.set_not_required = false;
}
do_set_valueless(nzcv, inst);
nzcvq = {};
nz = {.set_not_required = true};
c_flag = {.set_not_required = true};
break;
}
case IR::Opcode::A32SetCpsrNZCVQ: {
if (c_flag.has_value_request) {
nzcvq.set_not_required = false;
}
do_set_valueless(nzcvq, inst);
nzcv = {.set_not_required = true};
nz = {.set_not_required = true};
c_flag = {.set_not_required = true};
break;
}
case IR::Opcode::A32SetCpsrNZ: {
do_set_valueless(nz, inst);
nzcvq = {};
nzcv = {};
break;
}
case IR::Opcode::A32SetCpsrNZC: {
if (c_flag.has_value_request) {
c_flag.value_request->ReplaceUsesWith(inst->GetArg(1));
c_flag.has_value_request = false;
}
if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) {
const auto nz_value = inst->GetArg(0);
inst->Invalidate();
ir.SetInsertionPointBefore(inst.base());
ir.SetCpsrNZ(IR::NZCV{nz_value});
nzcvq = {};
nzcv = {};
nz = {.set_not_required = true};
break;
}
if (nz.set_not_required && c_flag.set_not_required) {
inst->Invalidate();
} else if (nz.set_not_required) {
inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker());
}
nz.set_not_required = true;
c_flag.set_not_required = true;
nzcv = {};
nzcvq = {};
break;
}
case IR::Opcode::A32SetGEFlags: {
do_set(ge, inst->GetArg(0), inst);
break;
}
case IR::Opcode::A32GetGEFlags: {
do_get(ge, inst);
break;
}
case IR::Opcode::A32SetGEFlagsCompressed: {
ge = {.set_not_required = true};
break;
}
case IR::Opcode::A32OrQFlag: {
break;
}
default: {
if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) {
nzcvq = {};
nzcv = {};
nz = {};
c_flag = {};
ge = {};
}
break;
}
}
}
}
void RegisterPass(IR::Block& block) {
using Iterator = IR::Block::iterator;
struct RegInfo {
IR::Value register_value;
std::optional<Iterator> last_set_instruction;
};
std::array<RegInfo, 15> reg_info;
const auto do_get = [](RegInfo& info, Iterator get_inst) {
if (info.register_value.IsEmpty()) {
info.register_value = IR::Value(&*get_inst);
return;
}
get_inst->ReplaceUsesWith(info.register_value);
};
const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) {
if (info.last_set_instruction) {
(*info.last_set_instruction)->Invalidate();
}
info = {
.register_value = value,
.last_set_instruction = set_inst,
};
};
enum class ExtValueType {
Empty,
Single,
Double,
VectorDouble,
VectorQuad,
};
struct ExtRegInfo {
ExtValueType value_type = {};
IR::Value register_value;
std::optional<Iterator> last_set_instruction;
};
std::array<ExtRegInfo, 64> ext_reg_info;
const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) {
if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
for (auto& info : infos) {
info.get() = {
.value_type = type,
.register_value = IR::Value(&*get_inst),
.last_set_instruction = std::nullopt,
};
}
return;
}
get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value);
};
const auto do_ext_set = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, IR::Value value, Iterator set_inst) {
if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
if (std::data(infos)[0].get().last_set_instruction) {
(*std::data(infos)[0].get().last_set_instruction)->Invalidate();
}
}
for (auto& info : infos) {
info.get() = {
.value_type = type,
.register_value = value,
.last_set_instruction = set_inst,
};
}
};
// Location and version don't matter here.
A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}};
for (auto inst = block.begin(); inst != block.end(); ++inst) {
auto const opcode = inst->GetOpcode();
switch (opcode) {
case IR::Opcode::A32GetRegister: {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
ASSERT(reg != A32::Reg::PC);
const size_t reg_index = static_cast<size_t>(reg);
do_get(reg_info[reg_index], inst);
break;
}
case IR::Opcode::A32SetRegister: {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
if (reg == A32::Reg::PC) {
break;
}
const auto reg_index = static_cast<size_t>(reg);
do_set(reg_info[reg_index], inst->GetArg(1), inst);
break;
}
case IR::Opcode::A32GetExtendedRegister32: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst);
break;
}
case IR::Opcode::A32SetExtendedRegister32: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst);
break;
}
case IR::Opcode::A32GetExtendedRegister64: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_get(ExtValueType::Double,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
inst);
break;
}
case IR::Opcode::A32SetExtendedRegister64: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_set(ExtValueType::Double,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
inst->GetArg(1),
inst);
break;
}
case IR::Opcode::A32GetVector: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
if (A32::IsDoubleExtReg(reg)) {
do_ext_get(ExtValueType::VectorDouble,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
inst);
} else {
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
do_ext_get(ExtValueType::VectorQuad,
{
ext_reg_info[reg_index * 4 + 0],
ext_reg_info[reg_index * 4 + 1],
ext_reg_info[reg_index * 4 + 2],
ext_reg_info[reg_index * 4 + 3],
},
inst);
}
break;
}
case IR::Opcode::A32SetVector: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
if (A32::IsDoubleExtReg(reg)) {
ir.SetInsertionPointAfter(inst);
const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)});
do_ext_set(ExtValueType::VectorDouble,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
stored_value,
inst);
} else {
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
do_ext_set(ExtValueType::VectorQuad,
{
ext_reg_info[reg_index * 4 + 0],
ext_reg_info[reg_index * 4 + 1],
ext_reg_info[reg_index * 4 + 2],
ext_reg_info[reg_index * 4 + 3],
},
inst->GetArg(1),
inst);
}
break;
}
default: {
if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) {
reg_info = {};
ext_reg_info = {};
}
break;
}
}
}
}
} // namespace
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) {
FlagsPass(block);
RegisterPass(block);
}
} // namespace Dynarmic::Optimization

View file

@ -1,57 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/frontend/A64/a64_ir_emitter.h"
#include "dynarmic/interface/A64/config.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
if (conf.hook_data_cache_operations) {
return;
}
for (auto& inst : block) {
if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) {
continue;
}
const auto op = static_cast<A64::DataCacheOperation>(inst.GetArg(1).GetU64());
if (op == A64::DataCacheOperation::ZeroByVA) {
A64::IREmitter ir{block};
ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}};
ir.SetInsertionPointBefore(&inst);
size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111);
IR::U64 addr{inst.GetArg(2)};
const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0));
while (bytes >= 16) {
ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA);
addr = ir.Add(addr, ir.Imm64(16));
bytes -= 16;
}
while (bytes >= 8) {
ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA);
addr = ir.Add(addr, ir.Imm64(8));
bytes -= 8;
}
while (bytes >= 4) {
ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA);
addr = ir.Add(addr, ir.Imm64(4));
bytes -= 4;
}
}
inst.Invalidate();
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,165 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <array>
#include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/A64/a64_types.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/value.h"
namespace Dynarmic::Optimization {
void A64GetSetElimination(IR::Block& block) {
using Iterator = IR::Block::iterator;
enum class TrackingType {
W,
X,
S,
D,
Q,
SP,
NZCV,
NZCVRaw,
};
struct RegisterInfo {
IR::Value register_value;
TrackingType tracking_type;
bool set_instruction_present = false;
Iterator last_set_instruction;
};
std::array<RegisterInfo, 31> reg_info;
std::array<RegisterInfo, 32> vec_info;
RegisterInfo sp_info;
RegisterInfo nzcv_info;
const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) {
if (info.set_instruction_present) {
info.last_set_instruction->Invalidate();
block.Instructions().erase(info.last_set_instruction);
}
info.register_value = value;
info.tracking_type = tracking_type;
info.set_instruction_present = true;
info.last_set_instruction = set_inst;
};
const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) {
const auto do_nothing = [&] {
info = {};
info.register_value = IR::Value(&*get_inst);
info.tracking_type = tracking_type;
};
if (info.register_value.IsEmpty()) {
do_nothing();
return;
}
if (info.tracking_type == tracking_type) {
get_inst->ReplaceUsesWith(info.register_value);
return;
}
do_nothing();
};
for (auto inst = block.begin(); inst != block.end(); ++inst) {
auto const opcode = inst->GetOpcode();
switch (opcode) {
case IR::Opcode::A64GetW: {
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
do_get(reg_info.at(index), inst, TrackingType::W);
break;
}
case IR::Opcode::A64GetX: {
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
do_get(reg_info.at(index), inst, TrackingType::X);
break;
}
case IR::Opcode::A64GetS: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_get(vec_info.at(index), inst, TrackingType::S);
break;
}
case IR::Opcode::A64GetD: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_get(vec_info.at(index), inst, TrackingType::D);
break;
}
case IR::Opcode::A64GetQ: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_get(vec_info.at(index), inst, TrackingType::Q);
break;
}
case IR::Opcode::A64GetSP: {
do_get(sp_info, inst, TrackingType::SP);
break;
}
case IR::Opcode::A64GetNZCVRaw: {
do_get(nzcv_info, inst, TrackingType::NZCVRaw);
break;
}
case IR::Opcode::A64SetW: {
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W);
break;
}
case IR::Opcode::A64SetX: {
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X);
break;
}
case IR::Opcode::A64SetS: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S);
break;
}
case IR::Opcode::A64SetD: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D);
break;
}
case IR::Opcode::A64SetQ: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q);
break;
}
case IR::Opcode::A64SetSP: {
do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP);
break;
}
case IR::Opcode::A64SetNZCV: {
do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV);
break;
}
case IR::Opcode::A64SetNZCVRaw: {
do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw);
break;
}
default: {
if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) {
nzcv_info = {};
}
if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) {
reg_info = {};
vec_info = {};
sp_info = {};
}
break;
}
}
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,57 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <boost/variant/get.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
#include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/interface/A64/config.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) {
const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) {
const auto instruction = cb->MemoryReadCode(location.PC());
if (!instruction)
return false;
IR::Block new_block{location};
A64::TranslateSingleInstruction(new_block, location, *instruction);
if (!new_block.Instructions().empty())
return false;
const IR::Terminal terminal = new_block.GetTerminal();
if (auto term = boost::get<IR::Term::Interpret>(&terminal)) {
return term->next == location;
}
return false;
};
IR::Terminal terminal = block.GetTerminal();
auto term = boost::get<IR::Term::Interpret>(&terminal);
if (!term)
return;
A64::LocationDescriptor location{term->next};
size_t num_instructions = 1;
while (is_interpret_instruction(location.AdvancePC(static_cast<int>(num_instructions * 4)))) {
num_instructions++;
}
term->num_instructions = num_instructions;
block.ReplaceTerminal(terminal);
block.CycleCount() += num_instructions - 1;
}
} // namespace Dynarmic::Optimization

View file

@ -1,559 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <optional>
#include "dynarmic/common/assert.h"
#include <mcl/bit/rotate.hpp>
#include <mcl/bit/swap.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/common/safe_ops.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/ir_emitter.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
using Op = Dynarmic::IR::Opcode;
namespace {
// Tiny helper to avoid the need to store based off the opcode
// bit size all over the place within folding functions.
void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
if (is_32_bit) {
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
} else {
inst.ReplaceUsesWith(IR::Value{value});
}
}
IR::Value Value(bool is_32_bit, u64 value) {
return is_32_bit ? IR::Value{static_cast<u32>(value)} : IR::Value{value};
}
template<typename ImmFn>
bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
const auto lhs = inst.GetArg(0);
const auto rhs = inst.GetArg(1);
const bool is_lhs_immediate = lhs.IsImmediate();
const bool is_rhs_immediate = rhs.IsImmediate();
if (is_lhs_immediate && is_rhs_immediate) {
const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64());
ReplaceUsesWith(inst, is_32_bit, result);
return false;
}
if (is_lhs_immediate && !is_rhs_immediate) {
const IR::Inst* rhs_inst = rhs.GetInstRecursive();
if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) {
const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64());
inst.SetArg(0, rhs_inst->GetArg(0));
inst.SetArg(1, Value(is_32_bit, combined));
} else {
// Normalize
inst.SetArg(0, rhs);
inst.SetArg(1, lhs);
}
}
if (!is_lhs_immediate && is_rhs_immediate) {
const IR::Inst* lhs_inst = lhs.GetInstRecursive();
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) {
const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64());
inst.SetArg(0, lhs_inst->GetArg(0));
inst.SetArg(1, Value(is_32_bit, combined));
}
}
return true;
}
void FoldAdd(IR::Inst& inst, bool is_32_bit) {
const auto lhs = inst.GetArg(0);
const auto rhs = inst.GetArg(1);
const auto carry = inst.GetArg(2);
if (lhs.IsImmediate() && !rhs.IsImmediate()) {
// Normalize
inst.SetArg(0, rhs);
inst.SetArg(1, lhs);
FoldAdd(inst, is_32_bit);
return;
}
if (inst.HasAssociatedPseudoOperation()) {
return;
}
if (!lhs.IsImmediate() && rhs.IsImmediate()) {
const IR::Inst* lhs_inst = lhs.GetInstRecursive();
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) {
const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1();
if (combined == 0) {
inst.ReplaceUsesWith(lhs_inst->GetArg(0));
return;
}
inst.SetArg(0, lhs_inst->GetArg(0));
inst.SetArg(1, Value(is_32_bit, combined));
return;
}
if (rhs.IsZero() && carry.IsZero()) {
inst.ReplaceUsesWith(lhs);
return;
}
}
if (inst.AreAllArgsImmediates()) {
const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1();
ReplaceUsesWith(inst, is_32_bit, result);
return;
}
}
/// Folds AND operations based on the following:
///
/// 1. imm_x & imm_y -> result
/// 2. x & 0 -> 0
/// 3. 0 & y -> 0
/// 4. x & y -> y (where x has all bits set to 1)
/// 5. x & y -> x (where y has all bits set to 1)
///
void FoldAND(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
ReplaceUsesWith(inst, is_32_bit, 0);
} else if (rhs.HasAllBitsSet()) {
inst.ReplaceUsesWith(inst.GetArg(0));
}
}
}
/// Folds byte reversal opcodes based on the following:
///
/// 1. imm -> swap(imm)
///
void FoldByteReverse(IR::Inst& inst, Op op) {
const auto operand = inst.GetArg(0);
if (!operand.IsImmediate()) {
return;
}
if (op == Op::ByteReverseWord) {
const u32 result = mcl::bit::swap_bytes_32(static_cast<u32>(operand.GetImmediateAsU64()));
inst.ReplaceUsesWith(IR::Value{result});
} else if (op == Op::ByteReverseHalf) {
const u16 result = mcl::bit::swap_bytes_16(static_cast<u16>(operand.GetImmediateAsU64()));
inst.ReplaceUsesWith(IR::Value{result});
} else {
const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64());
inst.ReplaceUsesWith(IR::Value{result});
}
}
/// Folds division operations based on the following:
///
/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual)
/// 2. imm_x / imm_y -> result
/// 3. x / 1 -> x
///
void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
ReplaceUsesWith(inst, is_32_bit, 0);
return;
}
const auto lhs = inst.GetArg(0);
if (lhs.IsImmediate() && rhs.IsImmediate()) {
if (is_signed) {
const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64();
ReplaceUsesWith(inst, is_32_bit, static_cast<u64>(result));
} else {
const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64();
ReplaceUsesWith(inst, is_32_bit, result);
}
} else if (rhs.IsUnsignedImmediate(1)) {
inst.ReplaceUsesWith(IR::Value{lhs});
}
}
// Folds EOR operations based on the following:
//
// 1. imm_x ^ imm_y -> result
// 2. x ^ 0 -> x
// 3. 0 ^ y -> y
//
void FoldEOR(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
inst.ReplaceUsesWith(inst.GetArg(0));
}
}
}
void FoldLeastSignificantByte(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const auto operand = inst.GetArg(0);
inst.ReplaceUsesWith(IR::Value{static_cast<u8>(operand.GetImmediateAsU64())});
}
void FoldLeastSignificantHalf(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const auto operand = inst.GetArg(0);
inst.ReplaceUsesWith(IR::Value{static_cast<u16>(operand.GetImmediateAsU64())});
}
void FoldLeastSignificantWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const auto operand = inst.GetArg(0);
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64())});
}
void FoldMostSignificantBit(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const auto operand = inst.GetArg(0);
inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0});
}
void FoldMostSignificantWord(IR::Inst& inst) {
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
if (!inst.AreAllArgsImmediates()) {
return;
}
const auto operand = inst.GetArg(0);
if (carry_inst) {
carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())});
}
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64() >> 32)});
}
// Folds multiplication operations based on the following:
//
// 1. imm_x * imm_y -> result
// 2. x * 0 -> 0
// 3. 0 * y -> 0
// 4. x * 1 -> x
// 5. 1 * y -> y
//
void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
ReplaceUsesWith(inst, is_32_bit, 0);
} else if (rhs.IsUnsignedImmediate(1)) {
inst.ReplaceUsesWith(inst.GetArg(0));
}
}
}
// Folds NOT operations if the contained value is an immediate.
void FoldNOT(IR::Inst& inst, bool is_32_bit) {
const auto operand = inst.GetArg(0);
if (!operand.IsImmediate()) {
return;
}
const u64 result = ~operand.GetImmediateAsU64();
ReplaceUsesWith(inst, is_32_bit, result);
}
// Folds OR operations based on the following:
//
// 1. imm_x | imm_y -> result
// 2. x | 0 -> x
// 3. 0 | y -> y
//
void FoldOR(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
inst.ReplaceUsesWith(inst.GetArg(0));
}
}
}
bool FoldShifts(IR::Inst& inst) {
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
// The 32-bit variants can contain 3 arguments, while the
// 64-bit variants only contain 2.
if (inst.NumArgs() == 3 && !carry_inst) {
inst.SetArg(2, IR::Value(false));
}
const auto shift_amount = inst.GetArg(1);
if (shift_amount.IsZero()) {
if (carry_inst) {
carry_inst->ReplaceUsesWith(inst.GetArg(2));
}
inst.ReplaceUsesWith(inst.GetArg(0));
return false;
}
if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) {
inst.SetArg(2, IR::Value(false));
}
if (!inst.AreAllArgsImmediates() || carry_inst) {
return false;
}
return true;
}
void FoldSignExtendXToWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const s64 value = inst.GetArg(0).GetImmediateAsS64();
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
}
void FoldSignExtendXToLong(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const s64 value = inst.GetArg(0).GetImmediateAsS64();
inst.ReplaceUsesWith(IR::Value{static_cast<u64>(value)});
}
void FoldSub(IR::Inst& inst, bool is_32_bit) {
if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
return;
}
const auto lhs = inst.GetArg(0);
const auto rhs = inst.GetArg(1);
const auto carry = inst.GetArg(2);
const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1();
ReplaceUsesWith(inst, is_32_bit, result);
}
void FoldZeroExtendXToWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const u64 value = inst.GetArg(0).GetImmediateAsU64();
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
}
void FoldZeroExtendXToLong(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const u64 value = inst.GetArg(0).GetImmediateAsU64();
inst.ReplaceUsesWith(IR::Value{value});
}
} // Anonymous namespace
void ConstantPropagation(IR::Block& block) {
for (auto& inst : block) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
case Op::LeastSignificantWord:
FoldLeastSignificantWord(inst);
break;
case Op::MostSignificantWord:
FoldMostSignificantWord(inst);
break;
case Op::LeastSignificantHalf:
FoldLeastSignificantHalf(inst);
break;
case Op::LeastSignificantByte:
FoldLeastSignificantByte(inst);
break;
case Op::MostSignificantBit:
FoldMostSignificantBit(inst);
break;
case Op::IsZero32:
if (inst.AreAllArgsImmediates()) {
inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0});
}
break;
case Op::IsZero64:
if (inst.AreAllArgsImmediates()) {
inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0});
}
break;
case Op::LogicalShiftLeft32:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
}
break;
case Op::LogicalShiftLeft64:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
}
break;
case Op::LogicalShiftRight32:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, true, Safe::LogicalShiftRight<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
}
break;
case Op::LogicalShiftRight64:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, false, Safe::LogicalShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
}
break;
case Op::ArithmeticShiftRight32:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
}
break;
case Op::ArithmeticShiftRight64:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
}
break;
case Op::RotateRight32:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
}
break;
case Op::RotateRight64:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
}
break;
case Op::LogicalShiftLeftMasked32:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f));
}
break;
case Op::LogicalShiftLeftMasked64:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f));
}
break;
case Op::LogicalShiftRightMasked32:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f));
}
break;
case Op::LogicalShiftRightMasked64:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f));
}
break;
case Op::ArithmeticShiftRightMasked32:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, true, static_cast<s32>(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f));
}
break;
case Op::ArithmeticShiftRightMasked64:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, false, static_cast<s64>(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f));
}
break;
case Op::RotateRightMasked32:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32()));
}
break;
case Op::RotateRightMasked64:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64()));
}
break;
case Op::Add32:
case Op::Add64:
FoldAdd(inst, opcode == Op::Add32);
break;
case Op::Sub32:
case Op::Sub64:
FoldSub(inst, opcode == Op::Sub32);
break;
case Op::Mul32:
case Op::Mul64:
FoldMultiply(inst, opcode == Op::Mul32);
break;
case Op::SignedDiv32:
case Op::SignedDiv64:
FoldDivide(inst, opcode == Op::SignedDiv32, true);
break;
case Op::UnsignedDiv32:
case Op::UnsignedDiv64:
FoldDivide(inst, opcode == Op::UnsignedDiv32, false);
break;
case Op::And32:
case Op::And64:
FoldAND(inst, opcode == Op::And32);
break;
case Op::Eor32:
case Op::Eor64:
FoldEOR(inst, opcode == Op::Eor32);
break;
case Op::Or32:
case Op::Or64:
FoldOR(inst, opcode == Op::Or32);
break;
case Op::Not32:
case Op::Not64:
FoldNOT(inst, opcode == Op::Not32);
break;
case Op::SignExtendByteToWord:
case Op::SignExtendHalfToWord:
FoldSignExtendXToWord(inst);
break;
case Op::SignExtendByteToLong:
case Op::SignExtendHalfToLong:
case Op::SignExtendWordToLong:
FoldSignExtendXToLong(inst);
break;
case Op::ZeroExtendByteToWord:
case Op::ZeroExtendHalfToWord:
FoldZeroExtendXToWord(inst);
break;
case Op::ZeroExtendByteToLong:
case Op::ZeroExtendHalfToLong:
case Op::ZeroExtendWordToLong:
FoldZeroExtendXToLong(inst);
break;
case Op::ByteReverseWord:
case Op::ByteReverseHalf:
case Op::ByteReverseDual:
FoldByteReverse(inst, opcode);
break;
default:
break;
}
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,23 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <mcl/iterator/reverse.hpp>
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
void DeadCodeElimination(IR::Block& block) {
// We iterate over the instructions in reverse order.
// This is because removing an instruction reduces the number of uses for earlier instructions.
for (auto& inst : mcl::iterator::reverse(block)) {
if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) {
inst.Invalidate();
}
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,44 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2020 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <vector>
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
void IdentityRemovalPass(IR::Block& block) {
std::vector<IR::Inst*> to_invalidate;
auto iter = block.begin();
while (iter != block.end()) {
IR::Inst& inst = *iter;
const size_t num_args = inst.NumArgs();
for (size_t i = 0; i < num_args; i++) {
while (true) {
IR::Value arg = inst.GetArg(i);
if (!arg.IsIdentity())
break;
inst.SetArg(i, arg.GetInst()->GetArg(0));
}
}
if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) {
iter = block.Instructions().erase(inst);
to_invalidate.push_back(&inst);
} else {
++iter;
}
}
for (IR::Inst* inst : to_invalidate) {
inst->Invalidate();
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,127 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2020 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <optional>
#include <tuple>
#include <mp/metafunction/apply.h>
#include <mp/typelist/concat.h>
#include <mp/typelist/drop.h>
#include <mp/typelist/get.h>
#include <mp/typelist/head.h>
#include <mp/typelist/list.h>
#include <mp/typelist/prepend.h>
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/value.h"
namespace Dynarmic::Optimization::IRMatcher {
struct CaptureValue {
using ReturnType = std::tuple<IR::Value>;
static std::optional<ReturnType> Match(IR::Value value) {
return std::tuple(value);
}
};
struct CaptureInst {
using ReturnType = std::tuple<IR::Inst*>;
static std::optional<ReturnType> Match(IR::Value value) {
if (value.IsImmediate())
return std::nullopt;
return std::tuple(value.GetInstRecursive());
}
};
struct CaptureUImm {
using ReturnType = std::tuple<u64>;
static std::optional<ReturnType> Match(IR::Value value) {
return std::tuple(value.GetImmediateAsU64());
}
};
struct CaptureSImm {
using ReturnType = std::tuple<s64>;
static std::optional<ReturnType> Match(IR::Value value) {
return std::tuple(value.GetImmediateAsS64());
}
};
template<u64 Value>
struct UImm {
using ReturnType = std::tuple<>;
static std::optional<std::tuple<>> Match(IR::Value value) {
if (value.GetImmediateAsU64() == Value)
return std::tuple();
return std::nullopt;
}
};
template<s64 Value>
struct SImm {
using ReturnType = std::tuple<>;
static std::optional<std::tuple<>> Match(IR::Value value) {
if (value.GetImmediateAsS64() == Value)
return std::tuple();
return std::nullopt;
}
};
template<IR::Opcode Opcode, typename... Args>
struct Inst {
public:
using ReturnType = mp::concat<std::tuple<>, typename Args::ReturnType...>;
static std::optional<ReturnType> Match(const IR::Inst& inst) {
if (inst.GetOpcode() != Opcode)
return std::nullopt;
if (inst.HasAssociatedPseudoOperation())
return std::nullopt;
return MatchArgs<0>(inst);
}
static std::optional<ReturnType> Match(IR::Value value) {
if (value.IsImmediate())
return std::nullopt;
return Match(*value.GetInstRecursive());
}
private:
template<size_t I>
static auto MatchArgs(const IR::Inst& inst) -> std::optional<mp::apply<mp::concat, mp::prepend<mp::drop<I, mp::list<typename Args::ReturnType...>>, std::tuple<>>>> {
if constexpr (I >= sizeof...(Args)) {
return std::tuple();
} else {
using Arg = mp::get<I, mp::list<Args...>>;
if (const auto arg = Arg::Match(inst.GetArg(I))) {
if (const auto rest = MatchArgs<I + 1>(inst)) {
return std::tuple_cat(*arg, *rest);
}
}
return std::nullopt;
}
}
};
inline bool IsSameInst(std::tuple<IR::Inst*, IR::Inst*> t) {
return std::get<0>(t) == std::get<1>(t);
}
inline bool IsSameInst(std::tuple<IR::Inst*, IR::Inst*, IR::Inst*> t) {
return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t);
}
} // namespace Dynarmic::Optimization::IRMatcher

View file

@ -1,18 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2023 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
namespace Dynarmic::Optimization {
void NamingPass(IR::Block& block) {
unsigned name = 1;
for (auto& inst : block) {
inst.SetName(name++);
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,47 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
namespace Dynarmic::A32 {
struct UserCallbacks;
}
namespace Dynarmic::A64 {
struct UserCallbacks;
struct UserConfig;
} // namespace Dynarmic::A64
namespace Dynarmic::IR {
class Block;
}
namespace Dynarmic::Optimization {
struct PolyfillOptions {
bool sha256 = false;
bool vector_multiply_widen = false;
bool operator==(const PolyfillOptions&) const = default;
};
struct A32GetSetEliminationOptions {
bool convert_nzc_to_nz = false;
bool convert_nz_to_nzc = false;
};
void PolyfillPass(IR::Block& block, const PolyfillOptions& opt);
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb);
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt);
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf);
void A64GetSetElimination(IR::Block& block);
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb);
void ConstantPropagation(IR::Block& block);
void DeadCodeElimination(IR::Block& block);
void IdentityRemovalPass(IR::Block& block);
void VerificationPass(const IR::Block& block);
void NamingPass(IR::Block& block);
} // namespace Dynarmic::Optimization

View file

@ -1,218 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/ir_emitter.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
namespace {
void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 x = (IR::U128)inst.GetArg(0);
const IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 t = ir.VectorExtract(x, y, 32);
IR::U128 result = ir.ZeroVector();
for (size_t i = 0; i < 4; i++) {
const IR::U32 modified_element = [&] {
const IR::U32 element = ir.VectorGetElement(32, t, i);
const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7));
const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18));
const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3));
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
}();
result = ir.VectorSetElement(32, result, i, modified_element);
}
result = ir.VectorAdd(32, result, x);
inst.ReplaceUsesWith(result);
}
void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 x = (IR::U128)inst.GetArg(0);
const IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 z = (IR::U128)inst.GetArg(2);
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
const IR::U128 lower_half = [&] {
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64);
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19);
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10);
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0));
return ir.VectorZeroUpper(tmp5);
}();
const IR::U64 upper_half = [&] {
const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17);
const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19);
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10);
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
// Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2]
const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64);
const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64);
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0));
return ir.VectorGetElement(64, tmp5, 0);
}();
const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half);
inst.ReplaceUsesWith(result);
}
IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
return ir.Eor(ir.And(ir.Eor(y, z), x), z);
}
IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z));
}
IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2));
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13));
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22));
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
}
IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6));
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11));
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25));
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
}
void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
IR::U128 x = (IR::U128)inst.GetArg(0);
IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 w = (IR::U128)inst.GetArg(2);
const bool part1 = inst.GetArg(3).GetU1();
for (size_t i = 0; i < 4; i++) {
const IR::U32 low_x = ir.VectorGetElement(32, x, 0);
const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1);
const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2);
const IR::U32 high_x = ir.VectorGetElement(32, x, 3);
const IR::U32 low_y = ir.VectorGetElement(32, y, 0);
const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1);
const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2);
const IR::U32 high_y = ir.VectorGetElement(32, y, 3);
const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y);
const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x);
const IR::U32 t = [&] {
const IR::U32 w_element = ir.VectorGetElement(32, w, i);
const IR::U32 sig = SHAhashSIGMA1(ir, low_y);
return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element)));
}();
const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority));
const IR::U32 new_low_y = ir.Add(t, high_x);
// Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3]
const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96);
const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96);
x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x);
y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y);
}
inst.ReplaceUsesWith(part1 ? x : y);
}
template<size_t esize, bool is_signed>
void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
IR::U128 n = (IR::U128)inst.GetArg(0);
IR::U128 m = (IR::U128)inst.GetArg(1);
const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n);
const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m);
const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m);
inst.ReplaceUsesWith(result);
}
} // namespace
void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
if (polyfill == PolyfillOptions{}) {
return;
}
IR::IREmitter ir{block};
for (auto& inst : block) {
ir.SetInsertionPointBefore(&inst);
switch (inst.GetOpcode()) {
case IR::Opcode::SHA256MessageSchedule0:
if (polyfill.sha256) {
PolyfillSHA256MessageSchedule0(ir, inst);
}
break;
case IR::Opcode::SHA256MessageSchedule1:
if (polyfill.sha256) {
PolyfillSHA256MessageSchedule1(ir, inst);
}
break;
case IR::Opcode::SHA256Hash:
if (polyfill.sha256) {
PolyfillSHA256Hash(ir, inst);
}
break;
case IR::Opcode::VectorMultiplySignedWiden8:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplySignedWiden16:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplySignedWiden32:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden8:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, false>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden16:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, false>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden32:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, false>(ir, inst);
}
break;
default:
break;
}
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,51 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <cstdio>
#include <map>
#include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h"
#include <ankerl/unordered_dense.h>
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/type.h"
namespace Dynarmic::Optimization {
void VerificationPass(const IR::Block& block) {
for (const auto& inst : block) {
for (size_t i = 0; i < inst.NumArgs(); i++) {
const IR::Type t1 = inst.GetArg(i).GetType();
const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i);
if (!IR::AreTypesCompatible(t1, t2)) {
std::puts(IR::DumpBlock(block).c_str());
ASSERT_FALSE("above block failed validation");
}
}
}
ankerl::unordered_dense::map<IR::Inst*, size_t> actual_uses;
for (const auto& inst : block) {
for (size_t i = 0; i < inst.NumArgs(); i++) {
const auto arg = inst.GetArg(i);
if (!arg.IsImmediate()) {
actual_uses[arg.GetInst()]++;
}
}
}
for (const auto& pair : actual_uses) {
ASSERT(pair.first->UseCount() == pair.second);
}
}
} // namespace Dynarmic::Optimization

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,37 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
namespace Dynarmic::A32 {
struct UserCallbacks;
struct UserConfig;
}
namespace Dynarmic::A64 {
struct UserCallbacks;
struct UserConfig;
}
namespace Dynarmic::IR {
class Block;
}
namespace Dynarmic::Optimization {
struct PolyfillOptions {
bool sha256 = false;
bool vector_multiply_widen = false;
bool operator==(const PolyfillOptions&) const = default;
};
void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options);
void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options);
} // namespace Dynarmic::Optimization

View file

@ -29,7 +29,7 @@
#include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/a32.h"
#include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opt/passes.h" #include "dynarmic/ir/opt_passes.h"
using namespace Dynarmic; using namespace Dynarmic;
@ -179,13 +179,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn<Th
while (num_insts < instructions_to_execute_count) { while (num_insts < instructions_to_execute_count) {
A32::LocationDescriptor descriptor = {u32(num_insts * 4), cpsr, A32::FPSCR{}}; A32::LocationDescriptor descriptor = {u32(num_insts * 4), cpsr, A32::FPSCR{}};
IR::Block ir_block = A32::Translate(descriptor, &test_env, {}); IR::Block ir_block = A32::Translate(descriptor, &test_env, {});
Optimization::NamingPass(ir_block); Optimization::Optimize(ir_block, &test_env, {});
Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true});
Optimization::DeadCodeElimination(ir_block);
Optimization::A32ConstantMemoryReads(ir_block, &test_env);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
Optimization::VerificationPass(ir_block);
printf("\n\nIR:\n%s", IR::DumpBlock(ir_block).c_str()); printf("\n\nIR:\n%s", IR::DumpBlock(ir_block).c_str());
printf("\n\nx86_64:\n"); printf("\n\nx86_64:\n");
jit.DumpDisassembly(); jit.DumpDisassembly();

View file

@ -17,7 +17,6 @@
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/a32.h"
#include "../native/testenv.h"
template<typename InstructionType_, u32 infinite_loop_u32> template<typename InstructionType_, u32 infinite_loop_u32>
class A32TestEnv : public Dynarmic::A32::UserCallbacks { class A32TestEnv : public Dynarmic::A32::UserCallbacks {

View file

@ -28,7 +28,7 @@
#include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opcodes.h" #include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h" #include "dynarmic/ir/opt_passes.h"
// Must be declared last for all necessary operator<< to be declared prior to this. // Must be declared last for all necessary operator<< to be declared prior to this.
#include <fmt/format.h> #include <fmt/format.h>

View file

@ -12,7 +12,6 @@
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/interface/A64/a64.h" #include "dynarmic/interface/A64/a64.h"
#include "../native/testenv.h"
using Vector = Dynarmic::A64::Vector; using Vector = Dynarmic::A64::Vector;

View file

@ -6,10 +6,7 @@ add_executable(dynarmic_tests
fp/mantissa_util_tests.cpp fp/mantissa_util_tests.cpp
fp/unpacked_tests.cpp fp/unpacked_tests.cpp
rand_int.h rand_int.h
) # A32
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic_tests PRIVATE
A32/test_arm_disassembler.cpp A32/test_arm_disassembler.cpp
A32/test_arm_instructions.cpp A32/test_arm_instructions.cpp
A32/test_coprocessor.cpp A32/test_coprocessor.cpp
@ -17,13 +14,7 @@ if ("A32" IN_LIST DYNARMIC_FRONTENDS)
A32/test_thumb_instructions.cpp A32/test_thumb_instructions.cpp
A32/testenv.h A32/testenv.h
decoder_tests.cpp decoder_tests.cpp
) # A64
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_link_libraries(dynarmic_tests PRIVATE merry::oaknut)
target_sources(dynarmic_tests PRIVATE
A64/a64.cpp A64/a64.cpp
A64/fibonacci.cpp A64/fibonacci.cpp
A64/fp_min_max.cpp A64/fp_min_max.cpp
@ -31,8 +22,8 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS)
A64/test_invalidation.cpp A64/test_invalidation.cpp
A64/real_world.cpp A64/real_world.cpp
A64/testenv.h A64/testenv.h
) )
endif() target_link_libraries(dynarmic_tests PRIVATE merry::oaknut)
if (DYNARMIC_TESTS_USE_UNICORN) if (DYNARMIC_TESTS_USE_UNICORN)
target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn) target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn)
@ -40,25 +31,17 @@ if (DYNARMIC_TESTS_USE_UNICORN)
target_sources(dynarmic_tests PRIVATE target_sources(dynarmic_tests PRIVATE
fuzz_util.cpp fuzz_util.cpp
fuzz_util.h fuzz_util.h
) # A32
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic_tests PRIVATE
A32/fuzz_arm.cpp A32/fuzz_arm.cpp
A32/fuzz_thumb.cpp A32/fuzz_thumb.cpp
unicorn_emu/a32_unicorn.cpp unicorn_emu/a32_unicorn.cpp
unicorn_emu/a32_unicorn.h unicorn_emu/a32_unicorn.h
) # A64
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic_tests PRIVATE
A64/fuzz_with_unicorn.cpp A64/fuzz_with_unicorn.cpp
A64/verify_unicorn.cpp A64/verify_unicorn.cpp
unicorn_emu/a64_unicorn.cpp unicorn_emu/a64_unicorn.cpp
unicorn_emu/a64_unicorn.h unicorn_emu/a64_unicorn.h
) )
endif()
endif() endif()
if ("riscv" IN_LIST ARCHITECTURE) if ("riscv" IN_LIST ARCHITECTURE)
@ -69,9 +52,6 @@ if ("x86_64" IN_LIST ARCHITECTURE)
target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak) target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak)
target_architecture_specific_sources(dynarmic_tests "x86_64" target_architecture_specific_sources(dynarmic_tests "x86_64"
x64_cpu_info.cpp x64_cpu_info.cpp
)
target_architecture_specific_sources(dynarmic_tests "x86_64"
native/preserve_xmm.cpp native/preserve_xmm.cpp
) )
@ -85,50 +65,70 @@ endif()
include(CreateDirectoryGroups) include(CreateDirectoryGroups)
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) #
add_executable(dynarmic_print_info # dynarmic_print_info
#
add_executable(dynarmic_print_info
print_info.cpp print_info.cpp
) )
create_target_directory_groups(dynarmic_print_info)
create_target_directory_groups(dynarmic_print_info) target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl)
if (BOOST_NO_HEADERS)
target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool)
target_include_directories(dynarmic_print_info PRIVATE . ../src) else()
target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) target_link_libraries(dynarmic_print_info PRIVATE Boost::headers)
target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
endif() endif()
target_include_directories(dynarmic_print_info PRIVATE . ../src)
target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) #
add_executable(dynarmic_test_generator # dynarmic_test_generator
#
add_executable(dynarmic_test_generator
fuzz_util.cpp fuzz_util.cpp
fuzz_util.h fuzz_util.h
test_generator.cpp test_generator.cpp
) )
create_target_directory_groups(dynarmic_test_generator) create_target_directory_groups(dynarmic_test_generator)
target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl)
target_include_directories(dynarmic_test_generator PRIVATE . ../src) if (BOOST_NO_HEADERS)
target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool)
target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) else()
target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers)
endif() endif()
target_include_directories(dynarmic_test_generator PRIVATE . ../src)
target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) #
add_executable(dynarmic_test_reader # dynarmic_test_reader
#
add_executable(dynarmic_test_reader
test_reader.cpp test_reader.cpp
) )
create_target_directory_groups(dynarmic_test_reader)
create_target_directory_groups(dynarmic_test_reader) target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl)
if (BOOST_NO_HEADERS)
target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool)
target_include_directories(dynarmic_test_reader PRIVATE . ../src) else()
target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers)
target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
endif() endif()
target_include_directories(dynarmic_test_reader PRIVATE . ../src)
target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
#
create_target_directory_groups(dynarmic_tests) create_target_directory_groups(dynarmic_tests)
target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl)
if (BOOST_NO_HEADERS)
target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool)
else()
target_link_libraries(dynarmic_tests PRIVATE Boost::headers)
endif()
target_include_directories(dynarmic_tests PRIVATE . ../src) target_include_directories(dynarmic_tests PRIVATE . ../src)
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)

View file

@ -34,7 +34,7 @@
#include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/a32.h"
#include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/interface/A32/disassembler.h"
#include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opt/passes.h" #include "dynarmic/ir/opt_passes.h"
using namespace Dynarmic; using namespace Dynarmic;