Compare commits
28 commits
12e1396997
...
84ca484c65
Author | SHA1 | Date | |
---|---|---|---|
84ca484c65 | |||
2383195ca6 | |||
4591a428e4 | |||
114a598ea3 | |||
015dd8c1f2 | |||
15acd76bb6 | |||
a7adb3d92d | |||
160ba69020 | |||
5be411b1eb | |||
77904828ae | |||
90184d4d6f | |||
80a54f8541 | |||
db1c2280cd | |||
6166a18093 | |||
09daf3496e | |||
111b017260 | |||
024b2c0fa7 | |||
3d7ecc0798 | |||
bb174a149c | |||
6895400e03 | |||
85c899a787 | |||
ff44a6567a | |||
978a6e4eab | |||
a3276fb200 | |||
7606e6a2c7 | |||
9636a916d3 | |||
4db2d390c8 | |||
d623e04606 |
83 changed files with 2378 additions and 2742 deletions
|
@ -107,7 +107,6 @@ add_library(core STATIC
|
|||
file_sys/fssystem/fssystem_nca_header.cpp
|
||||
file_sys/fssystem/fssystem_nca_header.h
|
||||
file_sys/fssystem/fssystem_nca_reader.cpp
|
||||
file_sys/fssystem/fssystem_passthrough_storage.h
|
||||
file_sys/fssystem/fssystem_sparse_storage.cpp
|
||||
file_sys/fssystem/fssystem_sparse_storage.h
|
||||
file_sys/fssystem/fssystem_switch_storage.h
|
||||
|
|
|
@ -210,7 +210,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* pa
|
|||
config.wall_clock_cntpct = m_uses_wall_clock;
|
||||
config.enable_cycle_counting = !m_uses_wall_clock;
|
||||
|
||||
// Code cache size
|
||||
// Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
config.code_cache_size = std::uint32_t(128_MiB);
|
||||
#else
|
||||
|
|
|
@ -269,7 +269,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
|
|||
config.wall_clock_cntpct = m_uses_wall_clock;
|
||||
config.enable_cycle_counting = !m_uses_wall_clock;
|
||||
|
||||
// Code cache size
|
||||
// Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
config.code_cache_size = std::uint32_t(128_MiB);
|
||||
#else
|
||||
|
|
|
@ -39,9 +39,6 @@ option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF)
|
|||
option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF)
|
||||
option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT})
|
||||
option(DYNARMIC_ENABLE_LTO "Enable LTO" OFF)
|
||||
if (NOT DEFINED DYNARMIC_FRONTENDS)
|
||||
set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable")
|
||||
endif()
|
||||
|
||||
# Default to a Release build
|
||||
if (NOT CMAKE_BUILD_TYPE)
|
||||
|
|
|
@ -273,52 +273,73 @@ Exclusive OR (i.e.: XOR)
|
|||
|
||||
### Callback: {Read,Write}Memory{8,16,32,64}
|
||||
|
||||
<u8> ReadMemory8(<u32> vaddr)
|
||||
<u8> ReadMemory16(<u32> vaddr)
|
||||
<u8> ReadMemory32(<u32> vaddr)
|
||||
<u8> ReadMemory64(<u32> vaddr)
|
||||
<void> WriteMemory8(<u32> vaddr, <u8> value_to_store)
|
||||
<void> WriteMemory16(<u32> vaddr, <u16> value_to_store)
|
||||
<void> WriteMemory32(<u32> vaddr, <u32> value_to_store)
|
||||
<void> WriteMemory64(<u32> vaddr, <u64> value_to_store)
|
||||
```c++
|
||||
<u8> ReadMemory8(<u32> vaddr)
|
||||
<u8> ReadMemory16(<u32> vaddr)
|
||||
<u8> ReadMemory32(<u32> vaddr)
|
||||
<u8> ReadMemory64(<u32> vaddr)
|
||||
<void> WriteMemory8(<u32> vaddr, <u8> value_to_store)
|
||||
<void> WriteMemory16(<u32> vaddr, <u16> value_to_store)
|
||||
<void> WriteMemory32(<u32> vaddr, <u32> value_to_store)
|
||||
<void> WriteMemory64(<u32> vaddr, <u64> value_to_store)
|
||||
```
|
||||
|
||||
Memory access.
|
||||
|
||||
### Terminal: Interpret
|
||||
|
||||
SetTerm(IR::Term::Interpret{next})
|
||||
```c++
|
||||
SetTerm(IR::Term::Interpret{next})
|
||||
```
|
||||
|
||||
This terminal instruction calls the interpreter, starting at `next`.
|
||||
The interpreter must interpret exactly one instruction.
|
||||
|
||||
### Terminal: ReturnToDispatch
|
||||
|
||||
SetTerm(IR::Term::ReturnToDispatch{})
|
||||
```c++
|
||||
SetTerm(IR::Term::ReturnToDispatch{})
|
||||
```
|
||||
|
||||
This terminal instruction returns control to the dispatcher.
|
||||
The dispatcher will use the value in R15 to determine what comes next.
|
||||
|
||||
### Terminal: LinkBlock
|
||||
|
||||
SetTerm(IR::Term::LinkBlock{next})
|
||||
```c++
|
||||
SetTerm(IR::Term::LinkBlock{next})
|
||||
```
|
||||
|
||||
This terminal instruction jumps to the basic block described by `next` if we have enough
|
||||
cycles remaining. If we do not have enough cycles remaining, we return to the
|
||||
dispatcher, which will return control to the host.
|
||||
|
||||
### Terminal: LinkBlockFast
|
||||
|
||||
```c++
|
||||
SetTerm(IR::Term::LinkBlockFast{next})
|
||||
```
|
||||
|
||||
This terminal instruction jumps to the basic block described by `next` unconditionally.
|
||||
This promises guarantees that must be held at runtime - i.e that the program wont hang,
|
||||
|
||||
### Terminal: PopRSBHint
|
||||
|
||||
SetTerm(IR::Term::PopRSBHint{})
|
||||
```c++
|
||||
SetTerm(IR::Term::PopRSBHint{})
|
||||
```
|
||||
|
||||
This terminal instruction checks the top of the Return Stack Buffer against R15.
|
||||
If RSB lookup fails, control is returned to the dispatcher.
|
||||
This is an optimization for faster function calls. A backend that doesn't support
|
||||
this optimization or doesn't have a RSB may choose to implement this exactly as
|
||||
ReturnToDispatch.
|
||||
`ReturnToDispatch`.
|
||||
|
||||
### Terminal: If
|
||||
|
||||
SetTerm(IR::Term::If{cond, term_then, term_else})
|
||||
```c++
|
||||
SetTerm(IR::Term::If{cond, term_then, term_else})
|
||||
```
|
||||
|
||||
This terminal instruction conditionally executes one terminal or another depending
|
||||
on the run-time state of the ARM flags.
|
||||
|
|
19
src/dynarmic/docs/FastMemory.md
Normal file
19
src/dynarmic/docs/FastMemory.md
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Fast memory (Fastmem)
|
||||
|
||||
The main way of accessing memory in JITed programs is via an invoked function, say "Read()" and "Write()". On our translator, such functions usually take a sizable amounts of code space (push + call + pop). Trash the i-cache (due to an indirect call) and overall make code emission more bloated.
|
||||
|
||||
The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput.
|
||||
|
||||
Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
In x86_64 for example, when a page fault occurs, the CPU will transmit via control registers and the stack (see `IRETQ`) the appropriate arguments for a page fault handler, the OS then will transform that into something that can be sent into userspace.
|
||||
|
||||
Most modern OSes implement kernel-page-table-isolation, which means a set of system calls will invoke a context switch (not often used syscalls), whereas others are handled by the same process address space (the smaller kernel portion, often used syscalls) without needing a context switch. This effect can be negated on systems with PCID (up to 4096 unique IDs).
|
||||
|
||||
Signal dispatching takes a performance hit from reloading `%cr3` - but Linux does something more clever to avoid reloads: VDSO will take care of the entire thing in the same address space. Making dispatching as costly as an indirect call - without the hazards of increased code size.
|
||||
|
||||
The main downside from this is the constant i-cache trashing and pipeline hazards introduced by the VDSO signal handlers. However on most benchmarks fastmem does perform faster than without (Linux only). This also abuses the fact of continous address space emulation by using an arena - which can then be potentially transparently mapped into a hugepage, reducing TLB walk times.
|
4
src/dynarmic/docs/Fastmem.svg
Normal file
4
src/dynarmic/docs/Fastmem.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 128 KiB |
4
src/dynarmic/docs/HostToGuest.svg
Normal file
4
src/dynarmic/docs/HostToGuest.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 98 KiB |
|
@ -16,19 +16,34 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun
|
|||
|
||||
The member functions on `RegAlloc` are just a combination of the above concepts.
|
||||
|
||||
The following registers are reserved for internal use and should NOT participate in register allocation:
|
||||
- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access.
|
||||
- `%rsp`: Stack pointer.
|
||||
- `%r15`: JIT pointer
|
||||
- `%r14`: Page table pointer.
|
||||
- `%r13`: Fastmem pointer.
|
||||
|
||||
The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate.
|
||||
|
||||
Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration.
|
||||
|
||||
### `Scratch`
|
||||
|
||||
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr)
|
||||
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm)
|
||||
```c++
|
||||
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr);
|
||||
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm);
|
||||
```
|
||||
|
||||
At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope.
|
||||
|
||||
### Pure `Use`
|
||||
|
||||
Xbyak::Reg64 UseGpr(Argument& arg);
|
||||
Xbyak::Xmm UseXmm(Argument& arg);
|
||||
OpArg UseOpArg(Argument& arg);
|
||||
void Use(Argument& arg, HostLoc host_loc);
|
||||
```c++
|
||||
Xbyak::Reg64 UseGpr(Argument& arg);
|
||||
Xbyak::Xmm UseXmm(Argument& arg);
|
||||
OpArg UseOpArg(Argument& arg);
|
||||
void Use(Argument& arg, HostLoc host_loc);
|
||||
```
|
||||
|
||||
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
|
||||
which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it
|
||||
|
@ -39,9 +54,11 @@ This register **must not** have it's value changed.
|
|||
|
||||
### `UseScratch`
|
||||
|
||||
Xbyak::Reg64 UseScratchGpr(Argument& arg);
|
||||
Xbyak::Xmm UseScratchXmm(Argument& arg);
|
||||
void UseScratch(Argument& arg, HostLoc host_loc);
|
||||
```c++
|
||||
Xbyak::Reg64 UseScratchGpr(Argument& arg);
|
||||
Xbyak::Xmm UseScratchXmm(Argument& arg);
|
||||
void UseScratch(Argument& arg, HostLoc host_loc);
|
||||
```
|
||||
|
||||
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
|
||||
which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it
|
||||
|
@ -55,7 +72,9 @@ You are free to modify the value in the register. The register is discarded at t
|
|||
|
||||
A `Define` is the defintion of a value. This is the only time when a value may be set.
|
||||
|
||||
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
|
||||
```c++
|
||||
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
|
||||
```
|
||||
|
||||
By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the
|
||||
value to the specified register `reg`.
|
||||
|
@ -64,7 +83,9 @@ value to the specified register `reg`.
|
|||
|
||||
Adding a `Define` to an existing value.
|
||||
|
||||
void DefineValue(IR::Inst* inst, Argument& arg);
|
||||
```c++
|
||||
void DefineValue(IR::Inst* inst, Argument& arg);
|
||||
```
|
||||
|
||||
You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are
|
||||
emitted.
|
||||
|
|
|
@ -23,15 +23,17 @@ One complication dynarmic has is that a compiled block is not uniquely identifia
|
|||
the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by
|
||||
computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
|
||||
|
||||
u64 LocationDescriptor::UniqueHash() const {
|
||||
// This value MUST BE UNIQUE.
|
||||
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
|
||||
u64 pc_u64 = u64(arm_pc) << 32;
|
||||
u64 fpscr_u64 = u64(fpscr.Value());
|
||||
u64 t_u64 = cpsr.T() ? 1 : 0;
|
||||
u64 e_u64 = cpsr.E() ? 2 : 0;
|
||||
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
|
||||
}
|
||||
```c++
|
||||
u64 LocationDescriptor::UniqueHash() const {
|
||||
// This value MUST BE UNIQUE.
|
||||
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
|
||||
u64 pc_u64 = u64(arm_pc) << 32;
|
||||
u64 fpscr_u64 = u64(fpscr.Value());
|
||||
u64 t_u64 = cpsr.T() ? 1 : 0;
|
||||
u64 e_u64 = cpsr.E() ? 2 : 0;
|
||||
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
|
||||
}
|
||||
```
|
||||
|
||||
## Our implementation isn't actually a stack
|
||||
|
||||
|
@ -49,97 +51,107 @@ host addresses for the corresponding the compiled blocks.
|
|||
size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8
|
||||
showed degraded performance.
|
||||
|
||||
struct JitState {
|
||||
// ...
|
||||
```c++
|
||||
struct JitState {
|
||||
// ...
|
||||
|
||||
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
||||
u32 rsb_ptr = 0;
|
||||
std::array<u64, RSBSize> rsb_location_descriptors;
|
||||
std::array<u64, RSBSize> rsb_codeptrs;
|
||||
void ResetRSB();
|
||||
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
||||
u32 rsb_ptr = 0;
|
||||
std::array<u64, RSBSize> rsb_location_descriptors;
|
||||
std::array<u64, RSBSize> rsb_codeptrs;
|
||||
void ResetRSB();
|
||||
|
||||
// ...
|
||||
};
|
||||
// ...
|
||||
};
|
||||
```
|
||||
|
||||
### RSB Push
|
||||
|
||||
We insert our prediction at the insertion point iff the RSB doesn't already
|
||||
contain a prediction with the same `UniqueHash`.
|
||||
|
||||
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
|
||||
using namespace Xbyak::util;
|
||||
```c++
|
||||
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
ASSERT(inst->GetArg(0).IsImmediate());
|
||||
u64 imm64 = inst->GetArg(0).GetU64();
|
||||
ASSERT(inst->GetArg(0).IsImmediate());
|
||||
u64 imm64 = inst->GetArg(0).GetU64();
|
||||
|
||||
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
|
||||
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
|
||||
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
|
||||
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
|
||||
? u64(unique_hash_to_code_ptr[imm64])
|
||||
: u64(code->GetReturnFromRunCodeAddress());
|
||||
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
|
||||
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
|
||||
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
|
||||
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
|
||||
? u64(unique_hash_to_code_ptr[imm64])
|
||||
: u64(code->GetReturnFromRunCodeAddress());
|
||||
|
||||
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
|
||||
code->add(index_reg, 1);
|
||||
code->and_(index_reg, u32(JitState::RSBSize - 1));
|
||||
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
|
||||
code->add(index_reg, 1);
|
||||
code->and_(index_reg, u32(JitState::RSBSize - 1));
|
||||
|
||||
code->mov(loc_desc_reg, u64(imm64));
|
||||
CodePtr patch_location = code->getCurr<CodePtr>();
|
||||
patch_unique_hash_locations[imm64].emplace_back(patch_location);
|
||||
code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
|
||||
code->EnsurePatchLocationSize(patch_location, 10);
|
||||
code->mov(loc_desc_reg, u64(imm64));
|
||||
CodePtr patch_location = code->getCurr<CodePtr>();
|
||||
patch_unique_hash_locations[imm64].emplace_back(patch_location);
|
||||
code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
|
||||
code->EnsurePatchLocationSize(patch_location, 10);
|
||||
|
||||
Xbyak::Label label;
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->je(label, code->T_SHORT);
|
||||
}
|
||||
|
||||
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
||||
code->L(label);
|
||||
Xbyak::Label label;
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->je(label, code->T_SHORT);
|
||||
}
|
||||
|
||||
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
||||
code->L(label);
|
||||
}
|
||||
```
|
||||
|
||||
In pseudocode:
|
||||
|
||||
for (i := 0 .. RSBSize-1)
|
||||
if (rsb_location_descriptors[i] == imm64)
|
||||
goto label;
|
||||
rsb_ptr++;
|
||||
rsb_ptr %= RSBSize;
|
||||
rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
|
||||
rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
|
||||
label:
|
||||
```c++
|
||||
for (i := 0 .. RSBSize-1)
|
||||
if (rsb_location_descriptors[i] == imm64)
|
||||
goto label;
|
||||
rsb_ptr++;
|
||||
rsb_ptr %= RSBSize;
|
||||
rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
|
||||
rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
|
||||
label:
|
||||
```
|
||||
|
||||
## RSB Pop
|
||||
|
||||
To check if a predicition is in the RSB, we linearly scan the RSB.
|
||||
|
||||
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
|
||||
using namespace Xbyak::util;
|
||||
```c++
|
||||
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
// This calculation has to match up with IREmitter::PushRSB
|
||||
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
||||
code->shl(rcx, 32);
|
||||
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
|
||||
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
|
||||
code->or_(rbx, rcx);
|
||||
// This calculation has to match up with IREmitter::PushRSB
|
||||
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
||||
code->shl(rcx, 32);
|
||||
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
|
||||
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
|
||||
code->or_(rbx, rcx);
|
||||
|
||||
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
||||
}
|
||||
|
||||
code->jmp(rax);
|
||||
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
||||
}
|
||||
|
||||
code->jmp(rax);
|
||||
}
|
||||
```
|
||||
|
||||
In pseudocode:
|
||||
|
||||
rbx := ComputeUniqueHash()
|
||||
rax := ReturnToDispatch
|
||||
for (i := 0 .. RSBSize-1)
|
||||
if (rbx == rsb_location_descriptors[i])
|
||||
rax = rsb_codeptrs[i]
|
||||
goto rax
|
||||
```c++
|
||||
rbx := ComputeUniqueHash()
|
||||
rax := ReturnToDispatch
|
||||
for (i := 0 .. RSBSize-1)
|
||||
if (rbx == rsb_location_descriptors[i])
|
||||
rax = rsb_codeptrs[i]
|
||||
goto rax
|
||||
```
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
include(TargetArchitectureSpecificSources)
|
||||
|
||||
add_library(dynarmic
|
||||
|
@ -56,14 +58,11 @@ add_library(dynarmic
|
|||
common/lut_from_list.h
|
||||
common/math_util.cpp
|
||||
common/math_util.h
|
||||
common/memory_pool.cpp
|
||||
common/memory_pool.h
|
||||
common/safe_ops.h
|
||||
common/spin_lock.h
|
||||
common/string_util.h
|
||||
common/u128.cpp
|
||||
common/u128.h
|
||||
common/variant_util.h
|
||||
frontend/A32/a32_types.cpp
|
||||
frontend/A32/a32_types.h
|
||||
frontend/A64/a64_types.cpp
|
||||
|
@ -78,7 +77,6 @@ add_library(dynarmic
|
|||
ir/basic_block.cpp
|
||||
ir/basic_block.h
|
||||
ir/cond.h
|
||||
ir/ir_emitter.cpp
|
||||
ir/ir_emitter.h
|
||||
ir/location_descriptor.cpp
|
||||
ir/location_descriptor.h
|
||||
|
@ -87,78 +85,59 @@ add_library(dynarmic
|
|||
ir/opcodes.cpp
|
||||
ir/opcodes.h
|
||||
ir/opcodes.inc
|
||||
ir/opt/constant_propagation_pass.cpp
|
||||
ir/opt/dead_code_elimination_pass.cpp
|
||||
ir/opt/identity_removal_pass.cpp
|
||||
ir/opt/ir_matcher.h
|
||||
ir/opt/naming_pass.cpp
|
||||
ir/opt/passes.h
|
||||
ir/opt/polyfill_pass.cpp
|
||||
ir/opt/verification_pass.cpp
|
||||
ir/opt_passes.cpp
|
||||
ir/opt_passes.h
|
||||
ir/terminal.h
|
||||
ir/type.cpp
|
||||
ir/type.h
|
||||
ir/value.cpp
|
||||
ir/value.h
|
||||
# A32
|
||||
frontend/A32/a32_ir_emitter.cpp
|
||||
frontend/A32/a32_ir_emitter.h
|
||||
frontend/A32/a32_location_descriptor.cpp
|
||||
frontend/A32/a32_location_descriptor.h
|
||||
frontend/A32/decoder/arm.h
|
||||
frontend/A32/decoder/arm.inc
|
||||
frontend/A32/decoder/asimd.h
|
||||
frontend/A32/decoder/asimd.inc
|
||||
frontend/A32/decoder/thumb16.h
|
||||
frontend/A32/decoder/thumb16.inc
|
||||
frontend/A32/decoder/thumb32.h
|
||||
frontend/A32/decoder/thumb32.inc
|
||||
frontend/A32/decoder/vfp.h
|
||||
frontend/A32/decoder/vfp.inc
|
||||
frontend/A32/disassembler/disassembler.h
|
||||
frontend/A32/disassembler/disassembler_arm.cpp
|
||||
frontend/A32/disassembler/disassembler_thumb.cpp
|
||||
frontend/A32/FPSCR.h
|
||||
frontend/A32/ITState.h
|
||||
frontend/A32/PSR.h
|
||||
frontend/A32/translate/a32_translate.cpp
|
||||
frontend/A32/translate/a32_translate.h
|
||||
frontend/A32/translate/conditional_state.cpp
|
||||
frontend/A32/translate/conditional_state.h
|
||||
frontend/A32/translate/translate_arm.cpp
|
||||
frontend/A32/translate/translate_thumb.cpp
|
||||
interface/A32/a32.h
|
||||
interface/A32/arch_version.h
|
||||
interface/A32/config.h
|
||||
interface/A32/coprocessor.h
|
||||
interface/A32/coprocessor_util.h
|
||||
interface/A32/disassembler.h
|
||||
# A64
|
||||
frontend/A64/a64_ir_emitter.cpp
|
||||
frontend/A64/a64_ir_emitter.h
|
||||
frontend/A64/a64_location_descriptor.cpp
|
||||
frontend/A64/a64_location_descriptor.h
|
||||
frontend/A64/decoder/a64.h
|
||||
frontend/A64/decoder/a64.inc
|
||||
frontend/A64/translate/a64_translate.cpp
|
||||
frontend/A64/translate/a64_translate.h
|
||||
interface/A64/a64.h
|
||||
interface/A64/config.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic PRIVATE
|
||||
frontend/A32/a32_ir_emitter.cpp
|
||||
frontend/A32/a32_ir_emitter.h
|
||||
frontend/A32/a32_location_descriptor.cpp
|
||||
frontend/A32/a32_location_descriptor.h
|
||||
frontend/A32/decoder/arm.h
|
||||
frontend/A32/decoder/arm.inc
|
||||
frontend/A32/decoder/asimd.h
|
||||
frontend/A32/decoder/asimd.inc
|
||||
frontend/A32/decoder/thumb16.h
|
||||
frontend/A32/decoder/thumb16.inc
|
||||
frontend/A32/decoder/thumb32.h
|
||||
frontend/A32/decoder/thumb32.inc
|
||||
frontend/A32/decoder/vfp.h
|
||||
frontend/A32/decoder/vfp.inc
|
||||
frontend/A32/disassembler/disassembler.h
|
||||
frontend/A32/disassembler/disassembler_arm.cpp
|
||||
frontend/A32/disassembler/disassembler_thumb.cpp
|
||||
frontend/A32/FPSCR.h
|
||||
frontend/A32/ITState.h
|
||||
frontend/A32/PSR.h
|
||||
frontend/A32/translate/a32_translate.cpp
|
||||
frontend/A32/translate/a32_translate.h
|
||||
frontend/A32/translate/conditional_state.cpp
|
||||
frontend/A32/translate/conditional_state.h
|
||||
frontend/A32/translate/translate_arm.cpp
|
||||
frontend/A32/translate/translate_thumb.cpp
|
||||
interface/A32/a32.h
|
||||
interface/A32/arch_version.h
|
||||
interface/A32/config.h
|
||||
interface/A32/coprocessor.h
|
||||
interface/A32/coprocessor_util.h
|
||||
interface/A32/disassembler.h
|
||||
ir/opt/a32_constant_memory_reads_pass.cpp
|
||||
ir/opt/a32_get_set_elimination_pass.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic PRIVATE
|
||||
frontend/A64/a64_ir_emitter.cpp
|
||||
frontend/A64/a64_ir_emitter.h
|
||||
frontend/A64/a64_location_descriptor.cpp
|
||||
frontend/A64/a64_location_descriptor.h
|
||||
frontend/A64/decoder/a64.h
|
||||
frontend/A64/decoder/a64.inc
|
||||
frontend/A64/translate/a64_translate.cpp
|
||||
frontend/A64/translate/a64_translate.h
|
||||
interface/A64/a64.h
|
||||
interface/A64/config.h
|
||||
ir/opt/a64_callback_config_pass.cpp
|
||||
ir/opt/a64_get_set_elimination_pass.cpp
|
||||
ir/opt/a64_merge_interpret_blocks.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
# Newer versions of xbyak (>= 7.25.0) have stricter checks that currently
|
||||
# fail in dynarmic
|
||||
|
@ -215,29 +194,21 @@ if ("x86_64" IN_LIST ARCHITECTURE)
|
|||
common/spin_lock_x64.h
|
||||
common/x64_disassemble.cpp
|
||||
common/x64_disassemble.h
|
||||
# A32
|
||||
backend/x64/a32_emit_x64.cpp
|
||||
backend/x64/a32_emit_x64.h
|
||||
backend/x64/a32_emit_x64_memory.cpp
|
||||
backend/x64/a32_interface.cpp
|
||||
backend/x64/a32_jitstate.cpp
|
||||
backend/x64/a32_jitstate.h
|
||||
# A64
|
||||
backend/x64/a64_emit_x64.cpp
|
||||
backend/x64/a64_emit_x64.h
|
||||
backend/x64/a64_emit_x64_memory.cpp
|
||||
backend/x64/a64_interface.cpp
|
||||
backend/x64/a64_jitstate.cpp
|
||||
backend/x64/a64_jitstate.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "x86_64"
|
||||
backend/x64/a32_emit_x64.cpp
|
||||
backend/x64/a32_emit_x64.h
|
||||
backend/x64/a32_emit_x64_memory.cpp
|
||||
backend/x64/a32_interface.cpp
|
||||
backend/x64/a32_jitstate.cpp
|
||||
backend/x64/a32_jitstate.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "x86_64"
|
||||
backend/x64/a64_emit_x64.cpp
|
||||
backend/x64/a64_emit_x64.h
|
||||
backend/x64/a64_emit_x64_memory.cpp
|
||||
backend/x64/a64_interface.cpp
|
||||
backend/x64/a64_jitstate.cpp
|
||||
backend/x64/a64_jitstate.h
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ("arm64" IN_LIST ARCHITECTURE)
|
||||
|
@ -281,25 +252,17 @@ if ("arm64" IN_LIST ARCHITECTURE)
|
|||
backend/arm64/verbose_debugging_output.h
|
||||
common/spin_lock_arm64.cpp
|
||||
common/spin_lock_arm64.h
|
||||
# A32
|
||||
backend/arm64/a32_address_space.cpp
|
||||
backend/arm64/a32_address_space.h
|
||||
backend/arm64/a32_core.h
|
||||
backend/arm64/a32_interface.cpp
|
||||
# A64
|
||||
backend/arm64/a64_address_space.cpp
|
||||
backend/arm64/a64_address_space.h
|
||||
backend/arm64/a64_core.h
|
||||
backend/arm64/a64_interface.cpp
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "arm64"
|
||||
backend/arm64/a32_address_space.cpp
|
||||
backend/arm64/a32_address_space.h
|
||||
backend/arm64/a32_core.h
|
||||
backend/arm64/a32_interface.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_architecture_specific_sources(dynarmic "arm64"
|
||||
backend/arm64/a64_address_space.cpp
|
||||
backend/arm64/a64_address_space.h
|
||||
backend/arm64/a64_core.h
|
||||
backend/arm64/a64_interface.cpp
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ("riscv" IN_LIST ARCHITECTURE)
|
||||
|
@ -328,21 +291,14 @@ if ("riscv" IN_LIST ARCHITECTURE)
|
|||
backend/riscv64/reg_alloc.cpp
|
||||
backend/riscv64/reg_alloc.h
|
||||
backend/riscv64/stack_layout.h
|
||||
# A32
|
||||
backend/riscv64/a32_address_space.cpp
|
||||
backend/riscv64/a32_address_space.h
|
||||
backend/riscv64/a32_core.h
|
||||
backend/riscv64/a32_interface.cpp
|
||||
backend/riscv64/code_block.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic PRIVATE
|
||||
backend/riscv64/a32_address_space.cpp
|
||||
backend/riscv64/a32_address_space.h
|
||||
backend/riscv64/a32_core.h
|
||||
backend/riscv64/a32_interface.cpp
|
||||
backend/riscv64/code_block.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
|
||||
endif()
|
||||
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
|
@ -420,7 +376,7 @@ target_link_libraries(dynarmic
|
|||
)
|
||||
|
||||
if (BOOST_NO_HEADERS)
|
||||
target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
else()
|
||||
target_link_libraries(dynarmic PRIVATE Boost::headers)
|
||||
endif()
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -16,7 +19,7 @@
|
|||
#include "dynarmic/frontend/A32/translate/a32_translate.h"
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
|
@ -163,21 +166,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
|
|||
|
||||
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
||||
|
||||
Optimization::PolyfillPass(ir_block, {});
|
||||
Optimization::NamingPass(ir_block);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
Optimization::IdentityRemovalPass(ir_block);
|
||||
Optimization::VerificationPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, conf, {});
|
||||
return ir_block;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -15,7 +18,7 @@
|
|||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
||||
|
@ -331,22 +334,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
|||
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
||||
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code,
|
||||
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
|
||||
|
||||
Optimization::A64CallbackConfigPass(ir_block, conf);
|
||||
Optimization::NamingPass(ir_block);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
|
||||
Optimization::A64GetSetElimination(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
|
||||
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
|
||||
}
|
||||
Optimization::VerificationPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, conf, {});
|
||||
return ir_block;
|
||||
}
|
||||
|
||||
|
|
|
@ -15,15 +15,15 @@
|
|||
#include <mcl/macro/architecture.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
class BlockOfCode;
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
namespace oaknut {
|
||||
class CodeBlock;
|
||||
} // namespace oaknut
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
class CodeBlock;
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
@ -33,16 +33,16 @@ class CodeBlock;
|
|||
|
||||
namespace Dynarmic::Backend {
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
struct FakeCall {
|
||||
u64 call_rip;
|
||||
u64 ret_rip;
|
||||
};
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
struct FakeCall {
|
||||
u64 call_pc;
|
||||
};
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
struct FakeCall {
|
||||
};
|
||||
#else
|
||||
|
@ -54,11 +54,11 @@ public:
|
|||
ExceptionHandler();
|
||||
~ExceptionHandler();
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
void Register(X64::BlockOfCode& code);
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
void Register(oaknut::CodeBlock& mem, std::size_t mem_size);
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
void Register(RV64::CodeBlock& mem, std::size_t mem_size);
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
|
|
|
@ -13,15 +13,15 @@ struct ExceptionHandler::Impl final {
|
|||
ExceptionHandler::ExceptionHandler() = default;
|
||||
ExceptionHandler::~ExceptionHandler() = default;
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
void ExceptionHandler::Register(X64::BlockOfCode&) {
|
||||
// Do nothing
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
void ExceptionHandler::Register(oaknut::CodeBlock&, std::size_t) {
|
||||
// Do nothing
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
void ExceptionHandler::Register(RV64::CodeBlock&, std::size_t) {
|
||||
// Do nothing
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
#include "dynarmic/backend/exception_handler.h"
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
|
||||
# include "dynarmic/backend/x64/block_of_code.h"
|
||||
# define mig_external extern "C"
|
||||
|
@ -36,7 +36,7 @@
|
|||
|
||||
using dynarmic_thread_state_t = x86_thread_state64_t;
|
||||
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
|
||||
# include <oaknut/code_block.hpp>
|
||||
# define mig_external extern "C"
|
||||
|
@ -133,7 +133,7 @@ void MachHandler::MessagePump() {
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) {
|
||||
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||
|
||||
|
@ -151,7 +151,7 @@ kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) {
|
|||
|
||||
return KERN_SUCCESS;
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
kern_return_t MachHandler::HandleRequest(arm_thread_state64_t* ts) {
|
||||
std::lock_guard<std::mutex> guard(code_block_infos_mutex);
|
||||
|
||||
|
@ -269,13 +269,13 @@ private:
|
|||
ExceptionHandler::ExceptionHandler() = default;
|
||||
ExceptionHandler::~ExceptionHandler() = default;
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
void ExceptionHandler::Register(X64::BlockOfCode& code) {
|
||||
const u64 code_begin = mcl::bit_cast<u64>(code.getCode());
|
||||
const u64 code_end = code_begin + code.GetTotalCodeSize();
|
||||
impl = std::make_unique<Impl>(code_begin, code_end);
|
||||
}
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) {
|
||||
const u64 code_begin = mcl::bit_cast<u64>(mem.ptr());
|
||||
const u64 code_end = code_begin + size;
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
|
||||
#include <mcl/macro/architecture.hpp>
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
# include "dynarmic/backend/x64/mig/mach_exc_server.c"
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
# include "dynarmic/backend/arm64/mig/mach_exc_server.c"
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
|
|
|
@ -12,31 +12,17 @@
|
|||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
#include <optional>
|
||||
#include <sys/mman.h>
|
||||
#ifdef __APPLE__
|
||||
# include <signal.h>
|
||||
# include <sys/ucontext.h>
|
||||
#else
|
||||
# include <signal.h>
|
||||
# ifndef __OpenBSD__
|
||||
# include <ucontext.h>
|
||||
# endif
|
||||
# ifdef __sun__
|
||||
# include <sys/regset.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/backend/exception_handler.h"
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/context.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
# include "dynarmic/backend/x64/block_of_code.h"
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
# include <oaknut/code_block.hpp>
|
||||
|
||||
# include "dynarmic/backend/arm64/abi.h"
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
# include "dynarmic/backend/riscv64/code_block.h"
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
|
@ -129,35 +115,8 @@ void RegisterHandler() {
|
|||
|
||||
void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
||||
DEBUG_ASSERT(sig == SIGSEGV || sig == SIGBUS);
|
||||
#ifndef MCL_ARCHITECTURE_RISCV
|
||||
ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context);
|
||||
#ifndef __OpenBSD__
|
||||
auto& mctx = ucontext->uc_mcontext;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
# if defined(__APPLE__)
|
||||
# define CTX_RIP (mctx->__ss.__rip)
|
||||
# define CTX_RSP (mctx->__ss.__rsp)
|
||||
# elif defined(__linux__)
|
||||
# define CTX_RIP (mctx.gregs[REG_RIP])
|
||||
# define CTX_RSP (mctx.gregs[REG_RSP])
|
||||
# elif defined(__FreeBSD__)
|
||||
# define CTX_RIP (mctx.mc_rip)
|
||||
# define CTX_RSP (mctx.mc_rsp)
|
||||
# elif defined(__NetBSD__)
|
||||
# define CTX_RIP (mctx.__gregs[_REG_RIP])
|
||||
# define CTX_RSP (mctx.__gregs[_REG_RSP])
|
||||
# elif defined(__OpenBSD__)
|
||||
# define CTX_RIP (ucontext->sc_rip)
|
||||
# define CTX_RSP (ucontext->sc_rsp)
|
||||
# elif defined(__sun__)
|
||||
# define CTX_RIP (mctx.gregs[REG_RIP])
|
||||
# define CTX_RSP (mctx.gregs[REG_RSP])
|
||||
# else
|
||||
# error "Unknown platform"
|
||||
# endif
|
||||
CTX_DECLARE(raw_context);
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
{
|
||||
std::shared_lock guard(sig_handler->code_block_infos_mutex);
|
||||
if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) {
|
||||
|
@ -169,48 +128,7 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
|||
}
|
||||
}
|
||||
fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP);
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
# if defined(__APPLE__)
|
||||
# define CTX_PC (mctx->__ss.__pc)
|
||||
# define CTX_SP (mctx->__ss.__sp)
|
||||
# define CTX_LR (mctx->__ss.__lr)
|
||||
# define CTX_X(i) (mctx->__ss.__x[i])
|
||||
# define CTX_Q(i) (mctx->__ns.__v[i])
|
||||
# elif defined(__linux__)
|
||||
# define CTX_PC (mctx.pc)
|
||||
# define CTX_SP (mctx.sp)
|
||||
# define CTX_LR (mctx.regs[30])
|
||||
# define CTX_X(i) (mctx.regs[i])
|
||||
# define CTX_Q(i) (fpctx->vregs[i])
|
||||
[[maybe_unused]] const auto fpctx = [&mctx] {
|
||||
_aarch64_ctx* header = (_aarch64_ctx*)&mctx.__reserved;
|
||||
while (header->magic != FPSIMD_MAGIC) {
|
||||
ASSERT(header->magic && header->size);
|
||||
header = (_aarch64_ctx*)((char*)header + header->size);
|
||||
}
|
||||
return (fpsimd_context*)header;
|
||||
}();
|
||||
# elif defined(__FreeBSD__)
|
||||
# define CTX_PC (mctx.mc_gpregs.gp_elr)
|
||||
# define CTX_SP (mctx.mc_gpregs.gp_sp)
|
||||
# define CTX_LR (mctx.mc_gpregs.gp_lr)
|
||||
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
|
||||
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
|
||||
# elif defined(__NetBSD__)
|
||||
# define CTX_PC (mctx.mc_gpregs.gp_elr)
|
||||
# define CTX_SP (mctx.mc_gpregs.gp_sp)
|
||||
# define CTX_LR (mctx.mc_gpregs.gp_lr)
|
||||
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
|
||||
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
|
||||
# elif defined(__OpenBSD__)
|
||||
# define CTX_PC (ucontext->sc_elr)
|
||||
# define CTX_SP (ucontext->sc_sp)
|
||||
# define CTX_LR (ucontext->sc_lr)
|
||||
# define CTX_X(i) (ucontext->sc_x[i])
|
||||
# define CTX_Q(i) (ucontext->sc_q[i])
|
||||
# else
|
||||
# error "Unknown platform"
|
||||
# endif
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
{
|
||||
std::shared_lock guard(sig_handler->code_block_infos_mutex);
|
||||
if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) {
|
||||
|
@ -220,7 +138,7 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
|||
}
|
||||
}
|
||||
fmt::print(stderr, "Unhandled {} at pc {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_PC);
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
ASSERT_FALSE("Unimplemented");
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
|
||||
#include <mcl/macro/architecture.hpp>
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
# include "dynarmic/backend/x64/exception_handler_windows.cpp"
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
# include "dynarmic/backend/exception_handler_generic.cpp"
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include "dynarmic/backend/riscv64/stack_layout.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A32/translate/a32_translate.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
|
||||
|
@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
|
|||
|
||||
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
||||
|
||||
Optimization::PolyfillPass(ir_block, {});
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
Optimization::VerificationPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, conf, {});
|
||||
return ir_block;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
#include "dynarmic/backend/x64/nzcv_util.h"
|
||||
#include "dynarmic/backend/x64/perf_map.h"
|
||||
#include "dynarmic/backend/x64/stack_layout.h"
|
||||
#include "dynarmic/common/variant_util.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/interface/A32/coprocessor.h"
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/location_descriptor.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
namespace Dynarmic::A32 {
|
||||
|
||||
|
@ -217,19 +217,7 @@ private:
|
|||
block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE);
|
||||
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
||||
Optimization::PolyfillPass(ir_block, polyfill_options);
|
||||
Optimization::NamingPass(ir_block);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
|
||||
Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
Optimization::IdentityRemovalPass(ir_block);
|
||||
Optimization::VerificationPass(ir_block);
|
||||
Optimization::Optimize(ir_block, conf, polyfill_options);
|
||||
return emitter.Emit(ir_block);
|
||||
}
|
||||
|
||||
|
|
|
@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
|
|||
auto const opcode = inst.GetOpcode();
|
||||
// Call the relevant Emit* member function.
|
||||
switch (opcode) {
|
||||
#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch;
|
||||
#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch;
|
||||
#define A32OPC(name, type, ...)
|
||||
#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch;
|
||||
#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch;
|
||||
#include "dynarmic/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
|
@ -764,7 +764,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
|
|||
target_code_ptr = code.GetReturnFromRunCodeAddress();
|
||||
}
|
||||
const CodePtr patch_location = code.getCurr();
|
||||
code.mov(code.rcx, reinterpret_cast<u64>(target_code_ptr));
|
||||
code.mov(code.rcx, u64(target_code_ptr));
|
||||
code.EnsurePatchLocationSize(patch_location, 10);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
namespace Dynarmic::A64 {
|
||||
|
||||
|
@ -80,16 +80,16 @@ public:
|
|||
};
|
||||
|
||||
// TODO: Check code alignment
|
||||
|
||||
const CodePtr current_code_ptr = [this] {
|
||||
const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15));
|
||||
const CodePtr current_code_ptr = [this, aligned_code_ptr] {
|
||||
// RSB optimization
|
||||
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask;
|
||||
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
|
||||
jit_state.rsb_ptr = new_rsb_ptr;
|
||||
return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]);
|
||||
return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]);
|
||||
}
|
||||
|
||||
return GetCurrentBlock();
|
||||
return aligned_code_ptr;
|
||||
//return GetCurrentBlock();
|
||||
}();
|
||||
|
||||
const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr);
|
||||
|
@ -275,21 +275,7 @@ private:
|
|||
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
||||
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
|
||||
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
|
||||
Optimization::PolyfillPass(ir_block, polyfill_options);
|
||||
Optimization::A64CallbackConfigPass(ir_block, conf);
|
||||
Optimization::NamingPass(ir_block);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
|
||||
Optimization::A64GetSetElimination(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
|
||||
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
|
||||
}
|
||||
Optimization::VerificationPass(ir_block);
|
||||
Optimization::Optimize(ir_block, conf, polyfill_options);
|
||||
return emitter.Emit(ir_block).entrypoint;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
|
||||
#include <algorithm>
|
||||
|
||||
#include <mcl/iterator/reverse.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
|
@ -76,7 +75,8 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
|
|||
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||
|
||||
size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE);
|
||||
for (auto const xmm : mcl::iterator::reverse(regs)) {
|
||||
for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
|
||||
auto const xmm = *it;
|
||||
if (HostLocIsXMM(xmm)) {
|
||||
xmm_offset -= XMM_SIZE;
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
|
@ -88,9 +88,11 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
|
|||
}
|
||||
if (frame_info.stack_subtraction != 0)
|
||||
code.add(rsp, u32(frame_info.stack_subtraction));
|
||||
for (auto const gpr : mcl::iterator::reverse(regs))
|
||||
for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
|
||||
auto const gpr = *it;
|
||||
if (HostLocIsGPR(gpr))
|
||||
code.pop(HostLocToReg64(gpr));
|
||||
}
|
||||
}
|
||||
|
||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) {
|
||||
|
|
|
@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
|
||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
|
||||
lock();
|
||||
or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
|
||||
lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
|
||||
|
||||
SwitchMxcsrOnEntry();
|
||||
jmp(ABI_PARAM2);
|
||||
|
@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
}
|
||||
|
||||
xor_(eax, eax);
|
||||
lock();
|
||||
xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax);
|
||||
|
||||
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <iterator>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <boost/variant/detail/apply_visitor_binary.hpp>
|
||||
#include <mcl/bit/bit_field.hpp>
|
||||
#include <mcl/scope_exit.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
@ -21,7 +22,6 @@
|
|||
#include "dynarmic/backend/x64/perf_map.h"
|
||||
#include "dynarmic/backend/x64/stack_layout.h"
|
||||
#include "dynarmic/backend/x64/verbose_debugging_output.h"
|
||||
#include "dynarmic/common/variant_util.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de
|
|||
}
|
||||
|
||||
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
Common::VisitVariant<void>(terminal, [this, initial_location, is_single_step](auto x) {
|
||||
boost::apply_visitor([this, initial_location, is_single_step](auto x) {
|
||||
using T = std::decay_t<decltype(x)>;
|
||||
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
|
||||
this->EmitTerminalImpl(x, initial_location, is_single_step);
|
||||
} else {
|
||||
ASSERT_MSG(false, "Invalid terminal");
|
||||
}
|
||||
});
|
||||
}, terminal);
|
||||
}
|
||||
|
||||
void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
|
||||
|
|
|
@ -92,13 +92,10 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list<Xbyak::Xmm> t
|
|||
FpFixup::Norm_Src,
|
||||
FpFixup::Norm_Src,
|
||||
FpFixup::Norm_Src);
|
||||
|
||||
const Xbyak::Xmm tmp = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm0;
|
||||
FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero));
|
||||
|
||||
for (const Xbyak::Xmm& xmm : to_daz) {
|
||||
for (const Xbyak::Xmm& xmm : to_daz)
|
||||
FCODE(vfixupimms)(xmm, xmm, tmp, u8(0));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm right_shift = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm17;
|
||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
||||
code.vpxord(right_shift, right_shift, right_shift);
|
||||
|
@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm right_shift = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm17;
|
||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF));
|
||||
code.vpxorq(right_shift, right_shift, right_shift);
|
||||
|
@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm right_shift = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm17;
|
||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
||||
code.vpxord(right_shift, right_shift, right_shift);
|
||||
|
@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst)
|
|||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
|
||||
const Xbyak::Xmm c = xmm16;
|
||||
const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
||||
code.vpsraq(c, a, 32);
|
||||
code.vpsllq(a, a, 32);
|
||||
code.vpsraq(a, a, 32);
|
||||
|
@ -5461,7 +5461,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
|||
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) {
|
||||
const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm masked = xmm16;
|
||||
const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "dynarmic/backend/x64/reg_alloc.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
|
||||
|
@ -118,7 +119,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept {
|
|||
values.push_back(inst);
|
||||
ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits<uint16_t>::max)());
|
||||
total_uses += inst->UseCount();
|
||||
max_bit_width = std::max<uint8_t>(max_bit_width, GetBitWidth(inst->GetType()));
|
||||
max_bit_width = std::max<uint8_t>(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType())));
|
||||
}
|
||||
|
||||
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept {
|
||||
|
@ -152,19 +153,19 @@ bool Argument::GetImmediateU1() const noexcept {
|
|||
|
||||
u8 Argument::GetImmediateU8() const noexcept {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x100);
|
||||
ASSERT(imm <= u64(std::numeric_limits<u8>::max()));
|
||||
return u8(imm);
|
||||
}
|
||||
|
||||
u16 Argument::GetImmediateU16() const noexcept {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x10000);
|
||||
ASSERT(imm <= u64(std::numeric_limits<u16>::max()));
|
||||
return u16(imm);
|
||||
}
|
||||
|
||||
u32 Argument::GetImmediateU32() const noexcept {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x100000000);
|
||||
ASSERT(imm <= u64(std::numeric_limits<u32>::max()));
|
||||
return u32(imm);
|
||||
}
|
||||
|
||||
|
@ -366,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
|||
if (result_def) {
|
||||
DefineValueImpl(result_def, ABI_RETURN);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
if (args[i]) {
|
||||
UseScratch(*args[i], args_hostloc[i]);
|
||||
} else {
|
||||
ScratchGpr(args_hostloc[i]); // TODO: Force spill
|
||||
}
|
||||
}
|
||||
// Must match with with ScratchImpl
|
||||
for (auto const gpr : other_caller_save) {
|
||||
MoveOutOfTheWay(gpr);
|
||||
LocInfo(gpr).WriteLock();
|
||||
}
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
if (args[i] && !args[i]->get().IsVoid()) {
|
||||
UseScratch(*args[i], args_hostloc[i]);
|
||||
// LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
|
||||
const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
|
||||
switch (args[i]->get().GetType()) {
|
||||
|
@ -389,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < args.size(); i++)
|
||||
if (!args[i]) {
|
||||
// TODO: Force spill
|
||||
ScratchGpr(args_hostloc[i]);
|
||||
}
|
||||
for (auto const caller_saved : other_caller_save)
|
||||
ScratchImpl({caller_saved});
|
||||
}
|
||||
|
||||
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
|
||||
|
@ -559,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept {
|
|||
}
|
||||
|
||||
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
||||
#if 0
|
||||
// TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows
|
||||
// but it's fine anyways. We can find other ways to cheat it later - but which?!?!
|
||||
// we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end?
|
||||
// TODO(lizzie): This needs to be investigated further later.
|
||||
// Do not spill XMM into other XMM silly
|
||||
if (!is_xmm) {
|
||||
/*if (!is_xmm) {
|
||||
// TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY
|
||||
// Intel recommends to spill GPR onto XMM registers IF POSSIBLE
|
||||
// TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call?
|
||||
|
@ -573,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
|||
for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i)
|
||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||
return loc;
|
||||
}
|
||||
#endif
|
||||
}*/
|
||||
// TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits
|
||||
// of spilling on XMM versus the potential cost of using XMM registers.....
|
||||
// Otherwise go to stack spilling
|
||||
for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i)
|
||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <functional>
|
||||
#include <optional>
|
||||
|
||||
#include "boost/container/small_vector.hpp"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
@ -77,13 +78,13 @@ public:
|
|||
return std::find(values.begin(), values.end(), inst) != values.end();
|
||||
}
|
||||
inline size_t GetMaxBitWidth() const noexcept {
|
||||
return max_bit_width;
|
||||
return 1 << max_bit_width;
|
||||
}
|
||||
void AddValue(IR::Inst* inst) noexcept;
|
||||
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept;
|
||||
private:
|
||||
//non trivial
|
||||
std::vector<IR::Inst*> values; //24
|
||||
boost::container::small_vector<IR::Inst*, 3> values; //24
|
||||
// Block state
|
||||
uint16_t total_uses = 0; //8
|
||||
//sometimes zeroed
|
||||
|
@ -93,10 +94,10 @@ private:
|
|||
uint16_t is_being_used_count = 0; //8
|
||||
uint16_t current_references = 0; //8
|
||||
// Value state
|
||||
uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128
|
||||
uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6)
|
||||
uint8_t lru_counter : 2 = 0; //1
|
||||
bool is_scratch : 1 = false; //1
|
||||
bool is_set_last_use : 1 = false; //1
|
||||
alignas(16) uint8_t lru_counter = 0; //1
|
||||
friend class RegAlloc;
|
||||
};
|
||||
static_assert(sizeof(HostLocInfo) == 64);
|
||||
|
|
120
src/dynarmic/src/dynarmic/common/context.h
Normal file
120
src/dynarmic/src/dynarmic/common/context.h
Normal file
|
@ -0,0 +1,120 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __APPLE__
|
||||
# include <signal.h>
|
||||
# include <sys/ucontext.h>
|
||||
#else
|
||||
# include <signal.h>
|
||||
# ifndef __OpenBSD__
|
||||
# include <ucontext.h>
|
||||
# endif
|
||||
# ifdef __sun__
|
||||
# include <sys/regset.h>
|
||||
# endif
|
||||
# ifdef __linux__
|
||||
# include <sys/syscall.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
# ifdef __OpenBSD__
|
||||
# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context);
|
||||
# else
|
||||
# define CTX_DECLARE(raw_context) \
|
||||
ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context); \
|
||||
[[maybe_unused]] auto& mctx = ucontext->uc_mcontext;
|
||||
# endif
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
# ifdef __OpenBSD__
|
||||
# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context);
|
||||
# else
|
||||
# define CTX_DECLARE(raw_context) \
|
||||
ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(raw_context); \
|
||||
[[maybe_unused]] auto& mctx = ucontext->uc_mcontext; \
|
||||
[[maybe_unused]] const auto fpctx = GetFloatingPointState(mctx);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
# if defined(__APPLE__)
|
||||
# define CTX_RIP (mctx->__ss.__rip)
|
||||
# define CTX_RSP (mctx->__ss.__rsp)
|
||||
# elif defined(__linux__)
|
||||
# define CTX_RIP (mctx.gregs[REG_RIP])
|
||||
# define CTX_RSP (mctx.gregs[REG_RSP])
|
||||
# elif defined(__FreeBSD__)
|
||||
# define CTX_RIP (mctx.mc_rip)
|
||||
# define CTX_RSP (mctx.mc_rsp)
|
||||
# elif defined(__NetBSD__)
|
||||
# define CTX_RIP (mctx.__gregs[_REG_RIP])
|
||||
# define CTX_RSP (mctx.__gregs[_REG_RSP])
|
||||
# elif defined(__OpenBSD__)
|
||||
# define CTX_RIP (ucontext->sc_rip)
|
||||
# define CTX_RSP (ucontext->sc_rsp)
|
||||
# elif defined(__sun__)
|
||||
# define CTX_RIP (mctx.gregs[REG_RIP])
|
||||
# define CTX_RSP (mctx.gregs[REG_RSP])
|
||||
# else
|
||||
# error "Unknown platform"
|
||||
# endif
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
# if defined(__APPLE__)
|
||||
# define CTX_PC (mctx->__ss.__pc)
|
||||
# define CTX_SP (mctx->__ss.__sp)
|
||||
# define CTX_LR (mctx->__ss.__lr)
|
||||
# define CTX_PSTATE (mctx->__ss.__cpsr)
|
||||
# define CTX_X(i) (mctx->__ss.__x[i])
|
||||
# define CTX_Q(i) (mctx->__ns.__v[i])
|
||||
# define CTX_FPSR (mctx->__ns.__fpsr)
|
||||
# define CTX_FPCR (mctx->__ns.__fpcr)
|
||||
# elif defined(__linux__)
|
||||
# define CTX_PC (mctx.pc)
|
||||
# define CTX_SP (mctx.sp)
|
||||
# define CTX_LR (mctx.regs[30])
|
||||
# define CTX_PSTATE (mctx.pstate)
|
||||
# define CTX_X(i) (mctx.regs[i])
|
||||
# define CTX_Q(i) (fpctx->vregs[i])
|
||||
# define CTX_FPSR (fpctx->fpsr)
|
||||
# define CTX_FPCR (fpctx->fpcr)
|
||||
# elif defined(__FreeBSD__)
|
||||
# define CTX_PC (mctx.mc_gpregs.gp_elr)
|
||||
# define CTX_SP (mctx.mc_gpregs.gp_sp)
|
||||
# define CTX_LR (mctx.mc_gpregs.gp_lr)
|
||||
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
|
||||
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
|
||||
# elif defined(__NetBSD__)
|
||||
# define CTX_PC (mctx.mc_gpregs.gp_elr)
|
||||
# define CTX_SP (mctx.mc_gpregs.gp_sp)
|
||||
# define CTX_LR (mctx.mc_gpregs.gp_lr)
|
||||
# define CTX_X(i) (mctx.mc_gpregs.gp_x[i])
|
||||
# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i])
|
||||
# elif defined(__OpenBSD__)
|
||||
# define CTX_PC (ucontext->sc_elr)
|
||||
# define CTX_SP (ucontext->sc_sp)
|
||||
# define CTX_LR (ucontext->sc_lr)
|
||||
# define CTX_X(i) (ucontext->sc_x[i])
|
||||
# define CTX_Q(i) (ucontext->sc_q[i])
|
||||
# else
|
||||
# error "Unknown platform"
|
||||
# endif
|
||||
#else
|
||||
# error "unimplemented"
|
||||
#endif
|
||||
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
#ifdef __APPLE__
|
||||
inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) {
|
||||
return &(host_ctx->__ns);
|
||||
}
|
||||
#elif defined(__linux__)
|
||||
inline fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
|
||||
_aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved);
|
||||
while (header->magic != FPSIMD_MAGIC)
|
||||
header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast<char*>(header) + header->size);
|
||||
return reinterpret_cast<fpsimd_context*>(header);
|
||||
}
|
||||
#endif
|
||||
#endif
|
|
@ -1,13 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
namespace Dynarmic::Common {
|
||||
|
||||
|
||||
} // namespace Dynarmic::Common
|
|
@ -1,61 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
namespace Dynarmic::Common {
|
||||
|
||||
/// @tparam object_size Byte-size of objects to construct
|
||||
/// @tparam slab_size Number of objects to have per slab
|
||||
template<size_t object_size, size_t slab_size>
|
||||
class Pool {
|
||||
public:
|
||||
inline Pool() noexcept {
|
||||
AllocateNewSlab();
|
||||
}
|
||||
inline ~Pool() noexcept {
|
||||
std::free(current_slab);
|
||||
for (char* slab : slabs) {
|
||||
std::free(slab);
|
||||
}
|
||||
}
|
||||
|
||||
Pool(const Pool&) = delete;
|
||||
Pool(Pool&&) = delete;
|
||||
|
||||
Pool& operator=(const Pool&) = delete;
|
||||
Pool& operator=(Pool&&) = delete;
|
||||
|
||||
/// @brief Returns a pointer to an `object_size`-bytes block of memory.
|
||||
[[nodiscard]] void* Alloc() noexcept {
|
||||
if (remaining == 0) {
|
||||
slabs.push_back(current_slab);
|
||||
AllocateNewSlab();
|
||||
}
|
||||
void* ret = static_cast<void*>(current_ptr);
|
||||
current_ptr += object_size;
|
||||
remaining--;
|
||||
return ret;
|
||||
}
|
||||
private:
|
||||
/// @brief Allocates a completely new memory slab.
|
||||
/// Used when an entirely new slab is needed
|
||||
/// due the current one running out of usable space.
|
||||
void AllocateNewSlab() noexcept {
|
||||
current_slab = static_cast<char*>(std::malloc(object_size * slab_size));
|
||||
current_ptr = current_slab;
|
||||
remaining = slab_size;
|
||||
}
|
||||
|
||||
std::vector<char*> slabs;
|
||||
char* current_slab = nullptr;
|
||||
char* current_ptr = nullptr;
|
||||
size_t remaining = 0;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Common
|
|
@ -1,29 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/variant.hpp>
|
||||
|
||||
namespace Dynarmic::Common {
|
||||
namespace detail {
|
||||
|
||||
template<typename ReturnT, typename Lambda>
|
||||
struct VariantVisitor : boost::static_visitor<ReturnT>
|
||||
, Lambda {
|
||||
VariantVisitor(Lambda&& lambda)
|
||||
: Lambda(std::move(lambda)) {}
|
||||
|
||||
using Lambda::operator();
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template<typename ReturnT, typename Variant, typename Lambda>
|
||||
inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) {
|
||||
return boost::apply_visitor(detail::VariantVisitor<ReturnT, Lambda>(std::move(lambda)), variant);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Common
|
|
@ -9,12 +9,9 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/interface/A32/coprocessor_util.h"
|
||||
#include "dynarmic/ir/cond.h"
|
||||
|
||||
|
@ -89,24 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) {
|
|||
|
||||
inline size_t RegNumber(Reg reg) {
|
||||
ASSERT(reg != Reg::INVALID_REG);
|
||||
return static_cast<size_t>(reg);
|
||||
return size_t(reg);
|
||||
}
|
||||
|
||||
inline size_t RegNumber(ExtReg reg) {
|
||||
if (IsSingleExtReg(reg)) {
|
||||
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::S0);
|
||||
return size_t(reg) - size_t(ExtReg::S0);
|
||||
} else if (IsDoubleExtReg(reg)) {
|
||||
return size_t(reg) - size_t(ExtReg::D0);
|
||||
}
|
||||
|
||||
if (IsDoubleExtReg(reg)) {
|
||||
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::D0);
|
||||
}
|
||||
|
||||
if (IsQuadExtReg(reg)) {
|
||||
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::Q0);
|
||||
}
|
||||
|
||||
ASSERT_MSG(false, "Invalid extended register");
|
||||
return 0;
|
||||
ASSERT(IsQuadExtReg(reg));
|
||||
return size_t(reg) - size_t(ExtReg::Q0);
|
||||
}
|
||||
|
||||
inline Reg operator+(Reg reg, size_t number) {
|
||||
|
|
|
@ -30,13 +30,13 @@ template<typename Visitor>
|
|||
using ArmDecodeTable = std::array<std::vector<ArmMatcher<Visitor>>, 0x1000>;
|
||||
|
||||
namespace detail {
|
||||
inline size_t ToFastLookupIndexArm(u32 instruction) {
|
||||
inline size_t ToFastLookupIndexArm(u32 instruction) noexcept {
|
||||
return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0);
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
template<typename V>
|
||||
constexpr ArmDecodeTable<V> GetArmDecodeTable() {
|
||||
constexpr ArmDecodeTable<V> GetArmDecodeTable() noexcept {
|
||||
std::vector<ArmMatcher<V>> list = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
#include "./arm.inc"
|
||||
|
@ -62,15 +62,27 @@ constexpr ArmDecodeTable<V> GetArmDecodeTable() {
|
|||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const ArmMatcher<V>>> DecodeArm(u32 instruction) {
|
||||
std::optional<std::reference_wrapper<const ArmMatcher<V>>> DecodeArm(u32 instruction) noexcept {
|
||||
alignas(64) static const auto table = GetArmDecodeTable<V>();
|
||||
const auto matches_instruction = [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
};
|
||||
|
||||
const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)];
|
||||
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction);
|
||||
return iter != subtable.end() ? std::optional<std::reference_wrapper<const ArmMatcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameARM(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, ArmMatcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./arm.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -26,15 +26,12 @@ template<typename Visitor>
|
|||
using ASIMDMatcher = Decoder::Matcher<Visitor, u32>;
|
||||
|
||||
template<typename V>
|
||||
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
|
||||
std::vector<ASIMDMatcher<V>> table = {
|
||||
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() noexcept {
|
||||
std::vector<std::pair<const char*, ASIMDMatcher<V>>> table = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./asimd.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
// Exceptions to the rule of thumb.
|
||||
const std::set<std::string> comes_first{
|
||||
"VBIC, VMOV, VMVN, VORR (immediate)",
|
||||
|
@ -53,29 +50,43 @@ std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
|
|||
"VQDMULH (scalar)",
|
||||
"VQRDMULH (scalar)",
|
||||
};
|
||||
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
|
||||
return comes_first.count(matcher.GetName()) > 0;
|
||||
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
|
||||
return comes_first.count(e.first) > 0;
|
||||
});
|
||||
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
|
||||
return comes_last.count(matcher.GetName()) == 0;
|
||||
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
|
||||
return comes_last.count(e.first) == 0;
|
||||
});
|
||||
|
||||
// If a matcher has more bits in its mask it is more specific, so it should come first.
|
||||
std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) {
|
||||
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
|
||||
std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) {
|
||||
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
|
||||
});
|
||||
|
||||
return table;
|
||||
std::vector<ASIMDMatcher<V>> final_table;
|
||||
std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) {
|
||||
return e.second;
|
||||
});
|
||||
return final_table;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const ASIMDMatcher<V>>> DecodeASIMD(u32 instruction) {
|
||||
static const auto table = GetASIMDDecodeTable<V>();
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
std::optional<std::reference_wrapper<const ASIMDMatcher<V>>> DecodeASIMD(u32 instruction) noexcept {
|
||||
alignas(64) static const auto table = GetASIMDDecodeTable<V>();
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const ASIMDMatcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameASIMD(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, ASIMDMatcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./asimd.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher<Visitor, u16>;
|
|||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const Thumb16Matcher<V>>> DecodeThumb16(u16 instruction) {
|
||||
static const std::vector<Thumb16Matcher<V>> table = {
|
||||
|
||||
alignas(64) static const std::vector<Thumb16Matcher<V>> table = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)),
|
||||
#include "./thumb16.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb16Matcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameThumb16(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, Thumb16Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) },
|
||||
#include "./thumb16.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher<Visitor, u32>;
|
|||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const Thumb32Matcher<V>>> DecodeThumb32(u32 instruction) {
|
||||
static const std::vector<Thumb32Matcher<V>> table = {
|
||||
|
||||
alignas(64) static const std::vector<Thumb32Matcher<V>> table = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
#include "./thumb32.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb32Matcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameThumb32(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, Thumb32Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./thumb32.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -26,36 +26,42 @@ using VFPMatcher = Decoder::Matcher<Visitor, u32>;
|
|||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const VFPMatcher<V>>> DecodeVFP(u32 instruction) {
|
||||
using Table = std::vector<VFPMatcher<V>>;
|
||||
|
||||
static const struct Tables {
|
||||
alignas(64) static const struct Tables {
|
||||
Table unconditional;
|
||||
Table conditional;
|
||||
} tables = [] {
|
||||
} tables = []() {
|
||||
Table list = {
|
||||
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
#include "./vfp.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
|
||||
auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
|
||||
return (matcher.GetMask() & 0xF0000000) == 0xF0000000;
|
||||
});
|
||||
|
||||
return Tables{
|
||||
Table{list.begin(), division},
|
||||
Table{division, list.end()},
|
||||
Table{list.begin(), it},
|
||||
Table{it, list.end()},
|
||||
};
|
||||
}();
|
||||
|
||||
const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000;
|
||||
const Table& table = is_unconditional ? tables.unconditional : tables.conditional;
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const VFPMatcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameVFP(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, VFPMatcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./vfp.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) {
|
|||
return 0xF7F0A000; // UDF
|
||||
}
|
||||
|
||||
bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) {
|
||||
inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept {
|
||||
return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000;
|
||||
}
|
||||
|
||||
|
|
|
@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) {
|
|||
|
||||
template<typename V>
|
||||
constexpr DecodeTable<V> GetDecodeTable() {
|
||||
std::vector<Matcher<V>> list = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
std::vector<std::pair<const char*, Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./a64.inc"
|
||||
#undef INST
|
||||
};
|
||||
|
||||
// If a matcher has more bits in its mask it is more specific, so it should come first.
|
||||
std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) {
|
||||
std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) {
|
||||
// If a matcher has more bits in its mask it is more specific, so it should come first.
|
||||
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
|
||||
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
|
||||
});
|
||||
|
||||
// Exceptions to the above rule of thumb.
|
||||
std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
|
||||
std::stable_partition(list.begin(), list.end(), [&](const auto& e) {
|
||||
return std::set<std::string>{
|
||||
"MOVI, MVNI, ORR, BIC (vector, immediate)",
|
||||
"FMOV (vector, immediate)",
|
||||
"Unallocated SIMD modified immediate",
|
||||
}.count(matcher.GetName()) > 0;
|
||||
}.count(e.first) > 0;
|
||||
});
|
||||
|
||||
DecodeTable<V> table{};
|
||||
for (size_t i = 0; i < table.size(); ++i) {
|
||||
for (auto matcher : list) {
|
||||
const auto expect = detail::ToFastLookupIndex(matcher.GetExpected());
|
||||
const auto mask = detail::ToFastLookupIndex(matcher.GetMask());
|
||||
for (auto const& e : list) {
|
||||
const auto expect = detail::ToFastLookupIndex(e.second.GetExpected());
|
||||
const auto mask = detail::ToFastLookupIndex(e.second.GetMask());
|
||||
if ((i & mask) == expect) {
|
||||
table[i].push_back(matcher);
|
||||
table[i].push_back(e.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -74,12 +71,24 @@ constexpr DecodeTable<V> GetDecodeTable() {
|
|||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const Matcher<V>>> Decode(u32 instruction) {
|
||||
alignas(64) static const auto table = GetDecodeTable<V>();
|
||||
const auto matches_instruction = [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
};
|
||||
const auto& subtable = table[detail::ToFastLookupIndex(instruction)];
|
||||
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction);
|
||||
auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != subtable.end() ? std::optional<std::reference_wrapper<const Matcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetName(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./a64.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A64
|
||||
|
|
|
@ -20,9 +20,12 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) {
|
|||
bool TranslatorVisitor::B_uncond(Imm<26> imm26) {
|
||||
const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend<s64>();
|
||||
const u64 target = ir.PC() + offset;
|
||||
|
||||
//ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)});
|
||||
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)});
|
||||
// Pattern to halt execution (B .)
|
||||
if (target == ir.PC()) {
|
||||
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)});
|
||||
return false;
|
||||
}
|
||||
ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)});
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -70,11 +70,9 @@ struct detail {
|
|||
return std::make_tuple(mask, expect);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the masks and shifts for each argument.
|
||||
* A '-' in a bitstring indicates that we don't care about that value.
|
||||
* An argument is specified by a continuous string of the same character.
|
||||
*/
|
||||
/// @brief Generates the masks and shifts for each argument.
|
||||
/// A '-' in a bitstring indicates that we don't care about that value.
|
||||
/// An argument is specified by a continuous string of the same character.
|
||||
template<size_t N>
|
||||
static consteval auto GetArgInfo(std::array<char, opcode_bitsize> bitstring) {
|
||||
std::array<opcode_type, N> masks = {};
|
||||
|
@ -98,7 +96,6 @@ struct detail {
|
|||
|
||||
if constexpr (N > 0) {
|
||||
const size_t bit_position = opcode_bitsize - i - 1;
|
||||
|
||||
if (arg_index >= N)
|
||||
throw std::out_of_range("Unexpected field");
|
||||
|
||||
|
@ -109,20 +106,16 @@ struct detail {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__)
|
||||
// Avoids a MSVC ICE, and avoids Android NDK issue.
|
||||
ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; }));
|
||||
#endif
|
||||
|
||||
return std::make_tuple(masks, shifts);
|
||||
}
|
||||
|
||||
/**
|
||||
* This struct's Make member function generates a lambda which decodes an instruction based on
|
||||
* the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a
|
||||
* template argument.
|
||||
*/
|
||||
/// @brief This struct's Make member function generates a lambda which decodes an instruction
|
||||
/// based on the provided arg_masks and arg_shifts. The Visitor member function to call is
|
||||
/// provided as a template argument.
|
||||
template<typename FnT>
|
||||
struct VisitorCaller;
|
||||
|
||||
|
@ -130,36 +123,36 @@ struct detail {
|
|||
# pragma warning(push)
|
||||
# pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning)
|
||||
#endif
|
||||
template<typename Visitor, typename... Args, typename CallRetT>
|
||||
struct VisitorCaller<CallRetT (Visitor::*)(Args...)> {
|
||||
template<typename V, typename... Args, typename ReturnType>
|
||||
struct VisitorCaller<ReturnType (V::*)(Args...)> {
|
||||
template<size_t... iota>
|
||||
static auto Make(std::integer_sequence<size_t, iota...>,
|
||||
CallRetT (Visitor::*const fn)(Args...),
|
||||
static constexpr auto Make(std::integer_sequence<size_t, iota...>,
|
||||
ReturnType (V::*const fn)(Args...),
|
||||
const std::array<opcode_type, sizeof...(iota)> arg_masks,
|
||||
const std::array<size_t, sizeof...(iota)> arg_shifts) {
|
||||
static_assert(std::is_same_v<visitor_type, Visitor>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) {
|
||||
static_assert(std::is_same_v<visitor_type, V>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) {
|
||||
(void)instruction;
|
||||
(void)arg_masks;
|
||||
(void)arg_shifts;
|
||||
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Visitor, typename... Args, typename CallRetT>
|
||||
struct VisitorCaller<CallRetT (Visitor::*)(Args...) const> {
|
||||
template<typename V, typename... Args, typename ReturnType>
|
||||
struct VisitorCaller<ReturnType (V::*)(Args...) const> {
|
||||
template<size_t... iota>
|
||||
static auto Make(std::integer_sequence<size_t, iota...>,
|
||||
CallRetT (Visitor::*const fn)(Args...) const,
|
||||
static constexpr auto Make(std::integer_sequence<size_t, iota...>,
|
||||
ReturnType (V::*const fn)(Args...) const,
|
||||
const std::array<opcode_type, sizeof...(iota)> arg_masks,
|
||||
const std::array<size_t, sizeof...(iota)> arg_shifts) {
|
||||
static_assert(std::is_same_v<visitor_type, const Visitor>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) {
|
||||
static_assert(std::is_same_v<visitor_type, const V>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) {
|
||||
(void)instruction;
|
||||
(void)arg_masks;
|
||||
(void)arg_shifts;
|
||||
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
@ -167,27 +160,21 @@ struct detail {
|
|||
# pragma warning(pop)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Creates a matcher that can match and parse instructions based on bitstring.
|
||||
* See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
|
||||
*/
|
||||
template<auto bitstring, typename FnT>
|
||||
static auto GetMatcher(FnT fn, const char* const name) {
|
||||
constexpr size_t args_count = mcl::parameter_count_v<FnT>;
|
||||
|
||||
/// @brief Creates a matcher that can match and parse instructions based on bitstring.
|
||||
/// See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
|
||||
template<auto bitstring, typename F>
|
||||
static constexpr auto GetMatcher(F fn) {
|
||||
constexpr size_t args_count = mcl::parameter_count_v<F>;
|
||||
constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring));
|
||||
constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring));
|
||||
constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring));
|
||||
constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring));
|
||||
|
||||
using Iota = std::make_index_sequence<args_count>;
|
||||
|
||||
const auto proxy_fn = VisitorCaller<FnT>::Make(Iota(), fn, arg_masks, arg_shifts);
|
||||
return MatcherT(name, mask, expect, proxy_fn);
|
||||
const auto proxy_fn = VisitorCaller<F>::Make(std::make_index_sequence<args_count>(), fn, arg_masks, arg_shifts);
|
||||
return MatcherT(mask, expect, proxy_fn);
|
||||
}
|
||||
};
|
||||
|
||||
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn, name)
|
||||
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn)
|
||||
|
||||
} // namespace detail
|
||||
} // namespace Dynarmic::Decoder
|
||||
|
|
|
@ -14,16 +14,12 @@
|
|||
|
||||
namespace Dynarmic::Decoder {
|
||||
|
||||
/**
|
||||
* Generic instruction handling construct.
|
||||
*
|
||||
* @tparam Visitor An arbitrary visitor type that will be passed through
|
||||
* to the function being handled. This type must be the
|
||||
* type of the first parameter in a handler function.
|
||||
*
|
||||
* @tparam OpcodeType Type representing an opcode. This must be the
|
||||
* type of the second parameter in a handler function.
|
||||
*/
|
||||
/// Generic instruction handling construct.
|
||||
/// @tparam Visitor An arbitrary visitor type that will be passed through
|
||||
/// to the function being handled. This type must be the
|
||||
/// type of the first parameter in a handler function.
|
||||
/// @tparam OpcodeType Type representing an opcode. This must be the
|
||||
/// type of the second parameter in a handler function.
|
||||
template<typename Visitor, typename OpcodeType>
|
||||
class Matcher {
|
||||
public:
|
||||
|
@ -31,46 +27,35 @@ public:
|
|||
using visitor_type = Visitor;
|
||||
using handler_return_type = typename Visitor::instruction_return_type;
|
||||
using handler_function = std::function<handler_return_type(Visitor&, opcode_type)>;
|
||||
|
||||
Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func)
|
||||
: name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {}
|
||||
|
||||
/// Gets the name of this type of instruction.
|
||||
const char* GetName() const {
|
||||
return name;
|
||||
}
|
||||
Matcher(opcode_type mask, opcode_type expected, handler_function func)
|
||||
: mask{mask}, expected{expected}, fn{std::move(func)} {}
|
||||
|
||||
/// Gets the mask for this instruction.
|
||||
opcode_type GetMask() const {
|
||||
inline opcode_type GetMask() const noexcept {
|
||||
return mask;
|
||||
}
|
||||
|
||||
/// Gets the expected value after masking for this instruction.
|
||||
opcode_type GetExpected() const {
|
||||
inline opcode_type GetExpected() const noexcept {
|
||||
return expected;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests to see if the given instruction is the instruction this matcher represents.
|
||||
* @param instruction The instruction to test
|
||||
* @returns true if the given instruction matches.
|
||||
*/
|
||||
bool Matches(opcode_type instruction) const {
|
||||
/// Tests to see if the given instruction is the instruction this matcher represents.
|
||||
/// @param instruction The instruction to test
|
||||
/// @returns true if the given instruction matches.
|
||||
inline bool Matches(opcode_type instruction) const noexcept {
|
||||
return (instruction & mask) == expected;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls the corresponding instruction handler on visitor for this type of instruction.
|
||||
* @param v The visitor to use
|
||||
* @param instruction The instruction to decode.
|
||||
*/
|
||||
handler_return_type call(Visitor& v, opcode_type instruction) const {
|
||||
/// Calls the corresponding instruction handler on visitor for this type of instruction.
|
||||
/// @param v The visitor to use
|
||||
/// @param instruction The instruction to decode.
|
||||
inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept {
|
||||
ASSERT(Matches(instruction));
|
||||
return fn(v, instruction);
|
||||
}
|
||||
|
||||
private:
|
||||
const char* name;
|
||||
opcode_type mask;
|
||||
opcode_type expected;
|
||||
handler_function fn;
|
||||
|
|
|
@ -34,6 +34,8 @@ enum class OptimizationFlag : std::uint32_t {
|
|||
MiscIROpt = 0x00000020,
|
||||
/// Optimize for code speed rather than for code size (this serves well for tight loops)
|
||||
CodeSpeed = 0x00000040,
|
||||
/// Disable verification passes
|
||||
DisableVerification = 0x00000080,
|
||||
|
||||
/// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations.
|
||||
/// This unfuses fused instructions to improve performance on host CPUs without FMA support.
|
||||
|
|
|
@ -15,8 +15,6 @@
|
|||
|
||||
#include <fmt/format.h>
|
||||
#include "dynarmic/common/assert.h"
|
||||
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/frontend/A64/a64_types.h"
|
||||
#include "dynarmic/ir/cond.h"
|
||||
|
@ -27,8 +25,7 @@ namespace Dynarmic::IR {
|
|||
Block::Block(const LocationDescriptor& location)
|
||||
: location{location},
|
||||
end_location{location},
|
||||
cond{Cond::AL},
|
||||
instruction_alloc_pool{std::make_unique<std::remove_reference_t<decltype(*instruction_alloc_pool)>>()}
|
||||
cond{Cond::AL}
|
||||
{
|
||||
|
||||
}
|
||||
|
@ -40,7 +37,21 @@ Block::Block(const LocationDescriptor& location)
|
|||
/// @param args A sequence of Value instances used as arguments for the instruction.
|
||||
/// @returns Iterator to the newly created instruction.
|
||||
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
|
||||
IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode);
|
||||
// First try using the "inline" buffer, otherwise fallback to a slower slab-like allocation scheme
|
||||
// purpouse is to avoid many calls to new/delete which invoke malloc which invokes mmap
|
||||
// just pool it!!! - reason why there is an inline buffer is because many small blocks are created
|
||||
// with few instructions due to subpar optimisations on other passes... plus branch-heavy code will
|
||||
// hugely benefit from the coherency of faster allocations...
|
||||
IR::Inst* inst;
|
||||
if (inlined_inst.size() < inlined_inst.max_size()) {
|
||||
inst = &inlined_inst[inlined_inst.size()];
|
||||
inlined_inst.emplace_back(opcode);
|
||||
} else {
|
||||
if (pooled_inst.empty() || pooled_inst.back().size() == pooled_inst.back().max_size())
|
||||
pooled_inst.emplace_back();
|
||||
inst = &pooled_inst.back()[pooled_inst.back().size()];
|
||||
pooled_inst.back().emplace_back(opcode);
|
||||
}
|
||||
DEBUG_ASSERT(args.size() == inst->NumArgs());
|
||||
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
|
||||
inst->SetArg(index, arg);
|
||||
|
|
|
@ -13,6 +13,9 @@
|
|||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
#include <boost/container/container_fwd.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <boost/container/stable_vector.hpp>
|
||||
#include <mcl/container/intrusive_list.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
|
@ -21,7 +24,6 @@
|
|||
#include "dynarmic/ir/terminal.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
#include "dynarmic/ir/dense_list.h"
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
|
||||
|
@ -164,8 +166,12 @@ public:
|
|||
return cycle_count;
|
||||
}
|
||||
private:
|
||||
/// "Hot cache" for small blocks so we don't call global allocator
|
||||
boost::container::static_vector<Inst, 14> inlined_inst;
|
||||
/// List of instructions in this block.
|
||||
instruction_list_type instructions;
|
||||
/// "Long/far" memory pool
|
||||
boost::container::stable_vector<boost::container::static_vector<Inst, 32>> pooled_inst;
|
||||
/// Block to execute next if `cond` did not pass.
|
||||
std::optional<LocationDescriptor> cond_failed = {};
|
||||
/// Description of the starting location of this block
|
||||
|
@ -174,8 +180,6 @@ private:
|
|||
LocationDescriptor end_location;
|
||||
/// Conditional to pass in order to execute this block
|
||||
Cond cond;
|
||||
/// Memory pool for instruction list
|
||||
std::unique_ptr<Common::Pool<sizeof(Inst), 2097152UL / sizeof(Inst)>> instruction_alloc_pool;
|
||||
/// Terminal instruction of this block.
|
||||
Terminal terminal = Term::Invalid{};
|
||||
/// Number of cycles this block takes to execute if the conditional fails.
|
||||
|
@ -183,6 +187,7 @@ private:
|
|||
/// Number of cycles this block takes to execute.
|
||||
size_t cycle_count = 0;
|
||||
};
|
||||
static_assert(sizeof(Block) == 2048);
|
||||
|
||||
/// Returns a string representation of the contents of block. Intended for debugging.
|
||||
std::string DumpBlock(const IR::Block& block) noexcept;
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/ir/ir_emitter.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/bit_cast.hpp>
|
||||
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
|
||||
|
||||
} // namespace Dynarmic::IR
|
|
@ -1,70 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) {
|
||||
for (auto& inst : block) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::A32ReadMemory8: {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 vaddr = inst.GetArg(1).GetU32();
|
||||
if (cb->IsReadOnlyMemory(vaddr)) {
|
||||
const u8 value_from_memory = cb->MemoryRead8(vaddr);
|
||||
inst.ReplaceUsesWith(IR::Value{value_from_memory});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32ReadMemory16: {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 vaddr = inst.GetArg(1).GetU32();
|
||||
if (cb->IsReadOnlyMemory(vaddr)) {
|
||||
const u16 value_from_memory = cb->MemoryRead16(vaddr);
|
||||
inst.ReplaceUsesWith(IR::Value{value_from_memory});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32ReadMemory32: {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 vaddr = inst.GetArg(1).GetU32();
|
||||
if (cb->IsReadOnlyMemory(vaddr)) {
|
||||
const u32 value_from_memory = cb->MemoryRead32(vaddr);
|
||||
inst.ReplaceUsesWith(IR::Value{value_from_memory});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32ReadMemory64: {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 vaddr = inst.GetArg(1).GetU32();
|
||||
if (cb->IsReadOnlyMemory(vaddr)) {
|
||||
const u64 value_from_memory = cb->MemoryRead64(vaddr);
|
||||
inst.ReplaceUsesWith(IR::Value{value_from_memory});
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,382 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <functional>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/frontend/A32/a32_ir_emitter.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
namespace {
|
||||
|
||||
void FlagsPass(IR::Block& block) {
|
||||
using Iterator = std::reverse_iterator<IR::Block::iterator>;
|
||||
|
||||
struct FlagInfo {
|
||||
bool set_not_required = false;
|
||||
bool has_value_request = false;
|
||||
Iterator value_request = {};
|
||||
};
|
||||
struct ValuelessFlagInfo {
|
||||
bool set_not_required = false;
|
||||
};
|
||||
ValuelessFlagInfo nzcvq;
|
||||
ValuelessFlagInfo nzcv;
|
||||
ValuelessFlagInfo nz;
|
||||
FlagInfo c_flag;
|
||||
FlagInfo ge;
|
||||
|
||||
auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) {
|
||||
if (info.has_value_request) {
|
||||
info.value_request->ReplaceUsesWith(value);
|
||||
}
|
||||
info.has_value_request = false;
|
||||
|
||||
if (info.set_not_required) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
info.set_not_required = true;
|
||||
};
|
||||
|
||||
auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) {
|
||||
if (info.set_not_required) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
info.set_not_required = true;
|
||||
};
|
||||
|
||||
auto do_get = [](FlagInfo& info, Iterator inst) {
|
||||
if (info.has_value_request) {
|
||||
info.value_request->ReplaceUsesWith(IR::Value{&*inst});
|
||||
}
|
||||
info.has_value_request = true;
|
||||
info.value_request = inst;
|
||||
};
|
||||
|
||||
A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}};
|
||||
|
||||
for (auto inst = block.rbegin(); inst != block.rend(); ++inst) {
|
||||
auto const opcode = inst->GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::A32GetCFlag: {
|
||||
do_get(c_flag, inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetCpsrNZCV: {
|
||||
if (c_flag.has_value_request) {
|
||||
ir.SetInsertionPointBefore(inst.base()); // base is one ahead
|
||||
IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)});
|
||||
c_flag.value_request->ReplaceUsesWith(c);
|
||||
c_flag.has_value_request = false;
|
||||
break; // This case will be executed again because of the above
|
||||
}
|
||||
|
||||
do_set_valueless(nzcv, inst);
|
||||
|
||||
nz = {.set_not_required = true};
|
||||
c_flag = {.set_not_required = true};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetCpsrNZCVRaw: {
|
||||
if (c_flag.has_value_request) {
|
||||
nzcv.set_not_required = false;
|
||||
}
|
||||
|
||||
do_set_valueless(nzcv, inst);
|
||||
|
||||
nzcvq = {};
|
||||
nz = {.set_not_required = true};
|
||||
c_flag = {.set_not_required = true};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetCpsrNZCVQ: {
|
||||
if (c_flag.has_value_request) {
|
||||
nzcvq.set_not_required = false;
|
||||
}
|
||||
|
||||
do_set_valueless(nzcvq, inst);
|
||||
|
||||
nzcv = {.set_not_required = true};
|
||||
nz = {.set_not_required = true};
|
||||
c_flag = {.set_not_required = true};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetCpsrNZ: {
|
||||
do_set_valueless(nz, inst);
|
||||
|
||||
nzcvq = {};
|
||||
nzcv = {};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetCpsrNZC: {
|
||||
if (c_flag.has_value_request) {
|
||||
c_flag.value_request->ReplaceUsesWith(inst->GetArg(1));
|
||||
c_flag.has_value_request = false;
|
||||
}
|
||||
|
||||
if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) {
|
||||
const auto nz_value = inst->GetArg(0);
|
||||
|
||||
inst->Invalidate();
|
||||
|
||||
ir.SetInsertionPointBefore(inst.base());
|
||||
ir.SetCpsrNZ(IR::NZCV{nz_value});
|
||||
|
||||
nzcvq = {};
|
||||
nzcv = {};
|
||||
nz = {.set_not_required = true};
|
||||
break;
|
||||
}
|
||||
|
||||
if (nz.set_not_required && c_flag.set_not_required) {
|
||||
inst->Invalidate();
|
||||
} else if (nz.set_not_required) {
|
||||
inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker());
|
||||
}
|
||||
nz.set_not_required = true;
|
||||
c_flag.set_not_required = true;
|
||||
|
||||
nzcv = {};
|
||||
nzcvq = {};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetGEFlags: {
|
||||
do_set(ge, inst->GetArg(0), inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32GetGEFlags: {
|
||||
do_get(ge, inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetGEFlagsCompressed: {
|
||||
ge = {.set_not_required = true};
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32OrQFlag: {
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) {
|
||||
nzcvq = {};
|
||||
nzcv = {};
|
||||
nz = {};
|
||||
c_flag = {};
|
||||
ge = {};
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterPass(IR::Block& block) {
|
||||
using Iterator = IR::Block::iterator;
|
||||
|
||||
struct RegInfo {
|
||||
IR::Value register_value;
|
||||
std::optional<Iterator> last_set_instruction;
|
||||
};
|
||||
std::array<RegInfo, 15> reg_info;
|
||||
|
||||
const auto do_get = [](RegInfo& info, Iterator get_inst) {
|
||||
if (info.register_value.IsEmpty()) {
|
||||
info.register_value = IR::Value(&*get_inst);
|
||||
return;
|
||||
}
|
||||
get_inst->ReplaceUsesWith(info.register_value);
|
||||
};
|
||||
|
||||
const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) {
|
||||
if (info.last_set_instruction) {
|
||||
(*info.last_set_instruction)->Invalidate();
|
||||
}
|
||||
info = {
|
||||
.register_value = value,
|
||||
.last_set_instruction = set_inst,
|
||||
};
|
||||
};
|
||||
|
||||
enum class ExtValueType {
|
||||
Empty,
|
||||
Single,
|
||||
Double,
|
||||
VectorDouble,
|
||||
VectorQuad,
|
||||
};
|
||||
struct ExtRegInfo {
|
||||
ExtValueType value_type = {};
|
||||
IR::Value register_value;
|
||||
std::optional<Iterator> last_set_instruction;
|
||||
};
|
||||
std::array<ExtRegInfo, 64> ext_reg_info;
|
||||
|
||||
const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) {
|
||||
if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
|
||||
for (auto& info : infos) {
|
||||
info.get() = {
|
||||
.value_type = type,
|
||||
.register_value = IR::Value(&*get_inst),
|
||||
.last_set_instruction = std::nullopt,
|
||||
};
|
||||
}
|
||||
return;
|
||||
}
|
||||
get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value);
|
||||
};
|
||||
|
||||
const auto do_ext_set = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, IR::Value value, Iterator set_inst) {
|
||||
if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
|
||||
if (std::data(infos)[0].get().last_set_instruction) {
|
||||
(*std::data(infos)[0].get().last_set_instruction)->Invalidate();
|
||||
}
|
||||
}
|
||||
for (auto& info : infos) {
|
||||
info.get() = {
|
||||
.value_type = type,
|
||||
.register_value = value,
|
||||
.last_set_instruction = set_inst,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// Location and version don't matter here.
|
||||
A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}};
|
||||
|
||||
for (auto inst = block.begin(); inst != block.end(); ++inst) {
|
||||
auto const opcode = inst->GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::A32GetRegister: {
|
||||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
|
||||
ASSERT(reg != A32::Reg::PC);
|
||||
const size_t reg_index = static_cast<size_t>(reg);
|
||||
do_get(reg_info[reg_index], inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetRegister: {
|
||||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
|
||||
if (reg == A32::Reg::PC) {
|
||||
break;
|
||||
}
|
||||
const auto reg_index = static_cast<size_t>(reg);
|
||||
do_set(reg_info[reg_index], inst->GetArg(1), inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32GetExtendedRegister32: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetExtendedRegister32: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32GetExtendedRegister64: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
do_ext_get(ExtValueType::Double,
|
||||
{
|
||||
ext_reg_info[reg_index * 2 + 0],
|
||||
ext_reg_info[reg_index * 2 + 1],
|
||||
},
|
||||
inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetExtendedRegister64: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
do_ext_set(ExtValueType::Double,
|
||||
{
|
||||
ext_reg_info[reg_index * 2 + 0],
|
||||
ext_reg_info[reg_index * 2 + 1],
|
||||
},
|
||||
inst->GetArg(1),
|
||||
inst);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32GetVector: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
if (A32::IsDoubleExtReg(reg)) {
|
||||
do_ext_get(ExtValueType::VectorDouble,
|
||||
{
|
||||
ext_reg_info[reg_index * 2 + 0],
|
||||
ext_reg_info[reg_index * 2 + 1],
|
||||
},
|
||||
inst);
|
||||
} else {
|
||||
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
|
||||
do_ext_get(ExtValueType::VectorQuad,
|
||||
{
|
||||
ext_reg_info[reg_index * 4 + 0],
|
||||
ext_reg_info[reg_index * 4 + 1],
|
||||
ext_reg_info[reg_index * 4 + 2],
|
||||
ext_reg_info[reg_index * 4 + 3],
|
||||
},
|
||||
inst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A32SetVector: {
|
||||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
|
||||
const size_t reg_index = A32::RegNumber(reg);
|
||||
if (A32::IsDoubleExtReg(reg)) {
|
||||
ir.SetInsertionPointAfter(inst);
|
||||
const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)});
|
||||
do_ext_set(ExtValueType::VectorDouble,
|
||||
{
|
||||
ext_reg_info[reg_index * 2 + 0],
|
||||
ext_reg_info[reg_index * 2 + 1],
|
||||
},
|
||||
stored_value,
|
||||
inst);
|
||||
} else {
|
||||
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
|
||||
do_ext_set(ExtValueType::VectorQuad,
|
||||
{
|
||||
ext_reg_info[reg_index * 4 + 0],
|
||||
ext_reg_info[reg_index * 4 + 1],
|
||||
ext_reg_info[reg_index * 4 + 2],
|
||||
ext_reg_info[reg_index * 4 + 3],
|
||||
},
|
||||
inst->GetArg(1),
|
||||
inst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) {
|
||||
reg_info = {};
|
||||
ext_reg_info = {};
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) {
|
||||
FlagsPass(block);
|
||||
RegisterPass(block);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,57 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/frontend/A64/a64_ir_emitter.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
|
||||
if (conf.hook_data_cache_operations) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto& inst : block) {
|
||||
if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto op = static_cast<A64::DataCacheOperation>(inst.GetArg(1).GetU64());
|
||||
if (op == A64::DataCacheOperation::ZeroByVA) {
|
||||
A64::IREmitter ir{block};
|
||||
ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}};
|
||||
ir.SetInsertionPointBefore(&inst);
|
||||
|
||||
size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111);
|
||||
IR::U64 addr{inst.GetArg(2)};
|
||||
|
||||
const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0));
|
||||
while (bytes >= 16) {
|
||||
ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA);
|
||||
addr = ir.Add(addr, ir.Imm64(16));
|
||||
bytes -= 16;
|
||||
}
|
||||
|
||||
while (bytes >= 8) {
|
||||
ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA);
|
||||
addr = ir.Add(addr, ir.Imm64(8));
|
||||
bytes -= 8;
|
||||
}
|
||||
|
||||
while (bytes >= 4) {
|
||||
ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA);
|
||||
addr = ir.Add(addr, ir.Imm64(4));
|
||||
bytes -= 4;
|
||||
}
|
||||
}
|
||||
inst.Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,165 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/frontend/A64/a64_types.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void A64GetSetElimination(IR::Block& block) {
|
||||
using Iterator = IR::Block::iterator;
|
||||
|
||||
enum class TrackingType {
|
||||
W,
|
||||
X,
|
||||
S,
|
||||
D,
|
||||
Q,
|
||||
SP,
|
||||
NZCV,
|
||||
NZCVRaw,
|
||||
};
|
||||
struct RegisterInfo {
|
||||
IR::Value register_value;
|
||||
TrackingType tracking_type;
|
||||
bool set_instruction_present = false;
|
||||
Iterator last_set_instruction;
|
||||
};
|
||||
std::array<RegisterInfo, 31> reg_info;
|
||||
std::array<RegisterInfo, 32> vec_info;
|
||||
RegisterInfo sp_info;
|
||||
RegisterInfo nzcv_info;
|
||||
|
||||
const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) {
|
||||
if (info.set_instruction_present) {
|
||||
info.last_set_instruction->Invalidate();
|
||||
block.Instructions().erase(info.last_set_instruction);
|
||||
}
|
||||
|
||||
info.register_value = value;
|
||||
info.tracking_type = tracking_type;
|
||||
info.set_instruction_present = true;
|
||||
info.last_set_instruction = set_inst;
|
||||
};
|
||||
|
||||
const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) {
|
||||
const auto do_nothing = [&] {
|
||||
info = {};
|
||||
info.register_value = IR::Value(&*get_inst);
|
||||
info.tracking_type = tracking_type;
|
||||
};
|
||||
|
||||
if (info.register_value.IsEmpty()) {
|
||||
do_nothing();
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.tracking_type == tracking_type) {
|
||||
get_inst->ReplaceUsesWith(info.register_value);
|
||||
return;
|
||||
}
|
||||
|
||||
do_nothing();
|
||||
};
|
||||
|
||||
for (auto inst = block.begin(); inst != block.end(); ++inst) {
|
||||
auto const opcode = inst->GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::A64GetW: {
|
||||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
|
||||
do_get(reg_info.at(index), inst, TrackingType::W);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetX: {
|
||||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
|
||||
do_get(reg_info.at(index), inst, TrackingType::X);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetS: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_get(vec_info.at(index), inst, TrackingType::S);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetD: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_get(vec_info.at(index), inst, TrackingType::D);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetQ: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_get(vec_info.at(index), inst, TrackingType::Q);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetSP: {
|
||||
do_get(sp_info, inst, TrackingType::SP);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64GetNZCVRaw: {
|
||||
do_get(nzcv_info, inst, TrackingType::NZCVRaw);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetW: {
|
||||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
|
||||
do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetX: {
|
||||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
|
||||
do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetS: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetD: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetQ: {
|
||||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
|
||||
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetSP: {
|
||||
do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetNZCV: {
|
||||
do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::A64SetNZCVRaw: {
|
||||
do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) {
|
||||
nzcv_info = {};
|
||||
}
|
||||
if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) {
|
||||
reg_info = {};
|
||||
vec_info = {};
|
||||
sp_info = {};
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,57 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <boost/variant/get.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/interface/A64/config.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) {
|
||||
const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) {
|
||||
const auto instruction = cb->MemoryReadCode(location.PC());
|
||||
if (!instruction)
|
||||
return false;
|
||||
|
||||
IR::Block new_block{location};
|
||||
A64::TranslateSingleInstruction(new_block, location, *instruction);
|
||||
|
||||
if (!new_block.Instructions().empty())
|
||||
return false;
|
||||
|
||||
const IR::Terminal terminal = new_block.GetTerminal();
|
||||
if (auto term = boost::get<IR::Term::Interpret>(&terminal)) {
|
||||
return term->next == location;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
IR::Terminal terminal = block.GetTerminal();
|
||||
auto term = boost::get<IR::Term::Interpret>(&terminal);
|
||||
if (!term)
|
||||
return;
|
||||
|
||||
A64::LocationDescriptor location{term->next};
|
||||
size_t num_instructions = 1;
|
||||
|
||||
while (is_interpret_instruction(location.AdvancePC(static_cast<int>(num_instructions * 4)))) {
|
||||
num_instructions++;
|
||||
}
|
||||
|
||||
term->num_instructions = num_instructions;
|
||||
block.ReplaceTerminal(terminal);
|
||||
block.CycleCount() += num_instructions - 1;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,559 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/bit/rotate.hpp>
|
||||
#include <mcl/bit/swap.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/common/safe_ops.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/ir_emitter.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
using Op = Dynarmic::IR::Opcode;
|
||||
|
||||
namespace {
|
||||
|
||||
// Tiny helper to avoid the need to store based off the opcode
|
||||
// bit size all over the place within folding functions.
|
||||
void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
|
||||
if (is_32_bit) {
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||
} else {
|
||||
inst.ReplaceUsesWith(IR::Value{value});
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value Value(bool is_32_bit, u64 value) {
|
||||
return is_32_bit ? IR::Value{static_cast<u32>(value)} : IR::Value{value};
|
||||
}
|
||||
|
||||
template<typename ImmFn>
|
||||
bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
|
||||
const auto lhs = inst.GetArg(0);
|
||||
const auto rhs = inst.GetArg(1);
|
||||
|
||||
const bool is_lhs_immediate = lhs.IsImmediate();
|
||||
const bool is_rhs_immediate = rhs.IsImmediate();
|
||||
|
||||
if (is_lhs_immediate && is_rhs_immediate) {
|
||||
const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64());
|
||||
ReplaceUsesWith(inst, is_32_bit, result);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_lhs_immediate && !is_rhs_immediate) {
|
||||
const IR::Inst* rhs_inst = rhs.GetInstRecursive();
|
||||
if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) {
|
||||
const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64());
|
||||
inst.SetArg(0, rhs_inst->GetArg(0));
|
||||
inst.SetArg(1, Value(is_32_bit, combined));
|
||||
} else {
|
||||
// Normalize
|
||||
inst.SetArg(0, rhs);
|
||||
inst.SetArg(1, lhs);
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_lhs_immediate && is_rhs_immediate) {
|
||||
const IR::Inst* lhs_inst = lhs.GetInstRecursive();
|
||||
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) {
|
||||
const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64());
|
||||
inst.SetArg(0, lhs_inst->GetArg(0));
|
||||
inst.SetArg(1, Value(is_32_bit, combined));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void FoldAdd(IR::Inst& inst, bool is_32_bit) {
|
||||
const auto lhs = inst.GetArg(0);
|
||||
const auto rhs = inst.GetArg(1);
|
||||
const auto carry = inst.GetArg(2);
|
||||
|
||||
if (lhs.IsImmediate() && !rhs.IsImmediate()) {
|
||||
// Normalize
|
||||
inst.SetArg(0, rhs);
|
||||
inst.SetArg(1, lhs);
|
||||
FoldAdd(inst, is_32_bit);
|
||||
return;
|
||||
}
|
||||
|
||||
if (inst.HasAssociatedPseudoOperation()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!lhs.IsImmediate() && rhs.IsImmediate()) {
|
||||
const IR::Inst* lhs_inst = lhs.GetInstRecursive();
|
||||
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) {
|
||||
const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1();
|
||||
if (combined == 0) {
|
||||
inst.ReplaceUsesWith(lhs_inst->GetArg(0));
|
||||
return;
|
||||
}
|
||||
inst.SetArg(0, lhs_inst->GetArg(0));
|
||||
inst.SetArg(1, Value(is_32_bit, combined));
|
||||
return;
|
||||
}
|
||||
if (rhs.IsZero() && carry.IsZero()) {
|
||||
inst.ReplaceUsesWith(lhs);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1();
|
||||
ReplaceUsesWith(inst, is_32_bit, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// Folds AND operations based on the following:
|
||||
///
|
||||
/// 1. imm_x & imm_y -> result
|
||||
/// 2. x & 0 -> 0
|
||||
/// 3. 0 & y -> 0
|
||||
/// 4. x & y -> y (where x has all bits set to 1)
|
||||
/// 5. x & y -> x (where y has all bits set to 1)
|
||||
///
|
||||
void FoldAND(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
ReplaceUsesWith(inst, is_32_bit, 0);
|
||||
} else if (rhs.HasAllBitsSet()) {
|
||||
inst.ReplaceUsesWith(inst.GetArg(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Folds byte reversal opcodes based on the following:
|
||||
///
|
||||
/// 1. imm -> swap(imm)
|
||||
///
|
||||
void FoldByteReverse(IR::Inst& inst, Op op) {
|
||||
const auto operand = inst.GetArg(0);
|
||||
|
||||
if (!operand.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (op == Op::ByteReverseWord) {
|
||||
const u32 result = mcl::bit::swap_bytes_32(static_cast<u32>(operand.GetImmediateAsU64()));
|
||||
inst.ReplaceUsesWith(IR::Value{result});
|
||||
} else if (op == Op::ByteReverseHalf) {
|
||||
const u16 result = mcl::bit::swap_bytes_16(static_cast<u16>(operand.GetImmediateAsU64()));
|
||||
inst.ReplaceUsesWith(IR::Value{result});
|
||||
} else {
|
||||
const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64());
|
||||
inst.ReplaceUsesWith(IR::Value{result});
|
||||
}
|
||||
}
|
||||
|
||||
/// Folds division operations based on the following:
|
||||
///
|
||||
/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual)
|
||||
/// 2. imm_x / imm_y -> result
|
||||
/// 3. x / 1 -> x
|
||||
///
|
||||
void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
|
||||
if (rhs.IsZero()) {
|
||||
ReplaceUsesWith(inst, is_32_bit, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto lhs = inst.GetArg(0);
|
||||
if (lhs.IsImmediate() && rhs.IsImmediate()) {
|
||||
if (is_signed) {
|
||||
const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64();
|
||||
ReplaceUsesWith(inst, is_32_bit, static_cast<u64>(result));
|
||||
} else {
|
||||
const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64();
|
||||
ReplaceUsesWith(inst, is_32_bit, result);
|
||||
}
|
||||
} else if (rhs.IsUnsignedImmediate(1)) {
|
||||
inst.ReplaceUsesWith(IR::Value{lhs});
|
||||
}
|
||||
}
|
||||
|
||||
// Folds EOR operations based on the following:
|
||||
//
|
||||
// 1. imm_x ^ imm_y -> result
|
||||
// 2. x ^ 0 -> x
|
||||
// 3. 0 ^ y -> y
|
||||
//
|
||||
void FoldEOR(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
inst.ReplaceUsesWith(inst.GetArg(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FoldLeastSignificantByte(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto operand = inst.GetArg(0);
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u8>(operand.GetImmediateAsU64())});
|
||||
}
|
||||
|
||||
void FoldLeastSignificantHalf(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto operand = inst.GetArg(0);
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u16>(operand.GetImmediateAsU64())});
|
||||
}
|
||||
|
||||
void FoldLeastSignificantWord(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto operand = inst.GetArg(0);
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64())});
|
||||
}
|
||||
|
||||
void FoldMostSignificantBit(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto operand = inst.GetArg(0);
|
||||
inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0});
|
||||
}
|
||||
|
||||
void FoldMostSignificantWord(IR::Inst& inst) {
|
||||
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
|
||||
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto operand = inst.GetArg(0);
|
||||
if (carry_inst) {
|
||||
carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())});
|
||||
}
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64() >> 32)});
|
||||
}
|
||||
|
||||
// Folds multiplication operations based on the following:
|
||||
//
|
||||
// 1. imm_x * imm_y -> result
|
||||
// 2. x * 0 -> 0
|
||||
// 3. 0 * y -> 0
|
||||
// 4. x * 1 -> x
|
||||
// 5. 1 * y -> y
|
||||
//
|
||||
void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
ReplaceUsesWith(inst, is_32_bit, 0);
|
||||
} else if (rhs.IsUnsignedImmediate(1)) {
|
||||
inst.ReplaceUsesWith(inst.GetArg(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Folds NOT operations if the contained value is an immediate.
|
||||
void FoldNOT(IR::Inst& inst, bool is_32_bit) {
|
||||
const auto operand = inst.GetArg(0);
|
||||
|
||||
if (!operand.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 result = ~operand.GetImmediateAsU64();
|
||||
ReplaceUsesWith(inst, is_32_bit, result);
|
||||
}
|
||||
|
||||
// Folds OR operations based on the following:
|
||||
//
|
||||
// 1. imm_x | imm_y -> result
|
||||
// 2. x | 0 -> x
|
||||
// 3. 0 | y -> y
|
||||
//
|
||||
void FoldOR(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
inst.ReplaceUsesWith(inst.GetArg(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool FoldShifts(IR::Inst& inst) {
|
||||
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
|
||||
|
||||
// The 32-bit variants can contain 3 arguments, while the
|
||||
// 64-bit variants only contain 2.
|
||||
if (inst.NumArgs() == 3 && !carry_inst) {
|
||||
inst.SetArg(2, IR::Value(false));
|
||||
}
|
||||
|
||||
const auto shift_amount = inst.GetArg(1);
|
||||
|
||||
if (shift_amount.IsZero()) {
|
||||
if (carry_inst) {
|
||||
carry_inst->ReplaceUsesWith(inst.GetArg(2));
|
||||
}
|
||||
inst.ReplaceUsesWith(inst.GetArg(0));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) {
|
||||
inst.SetArg(2, IR::Value(false));
|
||||
}
|
||||
|
||||
if (!inst.AreAllArgsImmediates() || carry_inst) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void FoldSignExtendXToWord(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const s64 value = inst.GetArg(0).GetImmediateAsS64();
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||
}
|
||||
|
||||
void FoldSignExtendXToLong(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const s64 value = inst.GetArg(0).GetImmediateAsS64();
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u64>(value)});
|
||||
}
|
||||
|
||||
void FoldSub(IR::Inst& inst, bool is_32_bit) {
|
||||
if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto lhs = inst.GetArg(0);
|
||||
const auto rhs = inst.GetArg(1);
|
||||
const auto carry = inst.GetArg(2);
|
||||
|
||||
const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1();
|
||||
ReplaceUsesWith(inst, is_32_bit, result);
|
||||
}
|
||||
|
||||
void FoldZeroExtendXToWord(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 value = inst.GetArg(0).GetImmediateAsU64();
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||
}
|
||||
|
||||
void FoldZeroExtendXToLong(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 value = inst.GetArg(0).GetImmediateAsU64();
|
||||
inst.ReplaceUsesWith(IR::Value{value});
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void ConstantPropagation(IR::Block& block) {
|
||||
for (auto& inst : block) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
|
||||
switch (opcode) {
|
||||
case Op::LeastSignificantWord:
|
||||
FoldLeastSignificantWord(inst);
|
||||
break;
|
||||
case Op::MostSignificantWord:
|
||||
FoldMostSignificantWord(inst);
|
||||
break;
|
||||
case Op::LeastSignificantHalf:
|
||||
FoldLeastSignificantHalf(inst);
|
||||
break;
|
||||
case Op::LeastSignificantByte:
|
||||
FoldLeastSignificantByte(inst);
|
||||
break;
|
||||
case Op::MostSignificantBit:
|
||||
FoldMostSignificantBit(inst);
|
||||
break;
|
||||
case Op::IsZero32:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0});
|
||||
}
|
||||
break;
|
||||
case Op::IsZero64:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0});
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftLeft32:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftLeft64:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftRight32:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, true, Safe::LogicalShiftRight<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftRight64:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, false, Safe::LogicalShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::ArithmeticShiftRight32:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::ArithmeticShiftRight64:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::RotateRight32:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::RotateRight64:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftLeftMasked32:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftLeftMasked64:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftRightMasked32:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f));
|
||||
}
|
||||
break;
|
||||
case Op::LogicalShiftRightMasked64:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f));
|
||||
}
|
||||
break;
|
||||
case Op::ArithmeticShiftRightMasked32:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, true, static_cast<s32>(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f));
|
||||
}
|
||||
break;
|
||||
case Op::ArithmeticShiftRightMasked64:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, false, static_cast<s64>(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f));
|
||||
}
|
||||
break;
|
||||
case Op::RotateRightMasked32:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32()));
|
||||
}
|
||||
break;
|
||||
case Op::RotateRightMasked64:
|
||||
if (inst.AreAllArgsImmediates()) {
|
||||
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64()));
|
||||
}
|
||||
break;
|
||||
case Op::Add32:
|
||||
case Op::Add64:
|
||||
FoldAdd(inst, opcode == Op::Add32);
|
||||
break;
|
||||
case Op::Sub32:
|
||||
case Op::Sub64:
|
||||
FoldSub(inst, opcode == Op::Sub32);
|
||||
break;
|
||||
case Op::Mul32:
|
||||
case Op::Mul64:
|
||||
FoldMultiply(inst, opcode == Op::Mul32);
|
||||
break;
|
||||
case Op::SignedDiv32:
|
||||
case Op::SignedDiv64:
|
||||
FoldDivide(inst, opcode == Op::SignedDiv32, true);
|
||||
break;
|
||||
case Op::UnsignedDiv32:
|
||||
case Op::UnsignedDiv64:
|
||||
FoldDivide(inst, opcode == Op::UnsignedDiv32, false);
|
||||
break;
|
||||
case Op::And32:
|
||||
case Op::And64:
|
||||
FoldAND(inst, opcode == Op::And32);
|
||||
break;
|
||||
case Op::Eor32:
|
||||
case Op::Eor64:
|
||||
FoldEOR(inst, opcode == Op::Eor32);
|
||||
break;
|
||||
case Op::Or32:
|
||||
case Op::Or64:
|
||||
FoldOR(inst, opcode == Op::Or32);
|
||||
break;
|
||||
case Op::Not32:
|
||||
case Op::Not64:
|
||||
FoldNOT(inst, opcode == Op::Not32);
|
||||
break;
|
||||
case Op::SignExtendByteToWord:
|
||||
case Op::SignExtendHalfToWord:
|
||||
FoldSignExtendXToWord(inst);
|
||||
break;
|
||||
case Op::SignExtendByteToLong:
|
||||
case Op::SignExtendHalfToLong:
|
||||
case Op::SignExtendWordToLong:
|
||||
FoldSignExtendXToLong(inst);
|
||||
break;
|
||||
case Op::ZeroExtendByteToWord:
|
||||
case Op::ZeroExtendHalfToWord:
|
||||
FoldZeroExtendXToWord(inst);
|
||||
break;
|
||||
case Op::ZeroExtendByteToLong:
|
||||
case Op::ZeroExtendHalfToLong:
|
||||
case Op::ZeroExtendWordToLong:
|
||||
FoldZeroExtendXToLong(inst);
|
||||
break;
|
||||
case Op::ByteReverseWord:
|
||||
case Op::ByteReverseHalf:
|
||||
case Op::ByteReverseDual:
|
||||
FoldByteReverse(inst, opcode);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,23 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <mcl/iterator/reverse.hpp>
|
||||
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void DeadCodeElimination(IR::Block& block) {
|
||||
// We iterate over the instructions in reverse order.
|
||||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
||||
for (auto& inst : mcl::iterator::reverse(block)) {
|
||||
if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) {
|
||||
inst.Invalidate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,44 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2020 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void IdentityRemovalPass(IR::Block& block) {
|
||||
std::vector<IR::Inst*> to_invalidate;
|
||||
|
||||
auto iter = block.begin();
|
||||
while (iter != block.end()) {
|
||||
IR::Inst& inst = *iter;
|
||||
|
||||
const size_t num_args = inst.NumArgs();
|
||||
for (size_t i = 0; i < num_args; i++) {
|
||||
while (true) {
|
||||
IR::Value arg = inst.GetArg(i);
|
||||
if (!arg.IsIdentity())
|
||||
break;
|
||||
inst.SetArg(i, arg.GetInst()->GetArg(0));
|
||||
}
|
||||
}
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) {
|
||||
iter = block.Instructions().erase(inst);
|
||||
to_invalidate.push_back(&inst);
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
|
||||
for (IR::Inst* inst : to_invalidate) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,127 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2020 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
|
||||
#include <mp/metafunction/apply.h>
|
||||
#include <mp/typelist/concat.h>
|
||||
#include <mp/typelist/drop.h>
|
||||
#include <mp/typelist/get.h>
|
||||
#include <mp/typelist/head.h>
|
||||
#include <mp/typelist/list.h>
|
||||
#include <mp/typelist/prepend.h>
|
||||
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
|
||||
namespace Dynarmic::Optimization::IRMatcher {
|
||||
|
||||
struct CaptureValue {
|
||||
using ReturnType = std::tuple<IR::Value>;
|
||||
|
||||
static std::optional<ReturnType> Match(IR::Value value) {
|
||||
return std::tuple(value);
|
||||
}
|
||||
};
|
||||
|
||||
struct CaptureInst {
|
||||
using ReturnType = std::tuple<IR::Inst*>;
|
||||
|
||||
static std::optional<ReturnType> Match(IR::Value value) {
|
||||
if (value.IsImmediate())
|
||||
return std::nullopt;
|
||||
return std::tuple(value.GetInstRecursive());
|
||||
}
|
||||
};
|
||||
|
||||
struct CaptureUImm {
|
||||
using ReturnType = std::tuple<u64>;
|
||||
|
||||
static std::optional<ReturnType> Match(IR::Value value) {
|
||||
return std::tuple(value.GetImmediateAsU64());
|
||||
}
|
||||
};
|
||||
|
||||
struct CaptureSImm {
|
||||
using ReturnType = std::tuple<s64>;
|
||||
|
||||
static std::optional<ReturnType> Match(IR::Value value) {
|
||||
return std::tuple(value.GetImmediateAsS64());
|
||||
}
|
||||
};
|
||||
|
||||
template<u64 Value>
|
||||
struct UImm {
|
||||
using ReturnType = std::tuple<>;
|
||||
|
||||
static std::optional<std::tuple<>> Match(IR::Value value) {
|
||||
if (value.GetImmediateAsU64() == Value)
|
||||
return std::tuple();
|
||||
return std::nullopt;
|
||||
}
|
||||
};
|
||||
|
||||
template<s64 Value>
|
||||
struct SImm {
|
||||
using ReturnType = std::tuple<>;
|
||||
|
||||
static std::optional<std::tuple<>> Match(IR::Value value) {
|
||||
if (value.GetImmediateAsS64() == Value)
|
||||
return std::tuple();
|
||||
return std::nullopt;
|
||||
}
|
||||
};
|
||||
|
||||
template<IR::Opcode Opcode, typename... Args>
|
||||
struct Inst {
|
||||
public:
|
||||
using ReturnType = mp::concat<std::tuple<>, typename Args::ReturnType...>;
|
||||
|
||||
static std::optional<ReturnType> Match(const IR::Inst& inst) {
|
||||
if (inst.GetOpcode() != Opcode)
|
||||
return std::nullopt;
|
||||
if (inst.HasAssociatedPseudoOperation())
|
||||
return std::nullopt;
|
||||
return MatchArgs<0>(inst);
|
||||
}
|
||||
|
||||
static std::optional<ReturnType> Match(IR::Value value) {
|
||||
if (value.IsImmediate())
|
||||
return std::nullopt;
|
||||
return Match(*value.GetInstRecursive());
|
||||
}
|
||||
|
||||
private:
|
||||
template<size_t I>
|
||||
static auto MatchArgs(const IR::Inst& inst) -> std::optional<mp::apply<mp::concat, mp::prepend<mp::drop<I, mp::list<typename Args::ReturnType...>>, std::tuple<>>>> {
|
||||
if constexpr (I >= sizeof...(Args)) {
|
||||
return std::tuple();
|
||||
} else {
|
||||
using Arg = mp::get<I, mp::list<Args...>>;
|
||||
|
||||
if (const auto arg = Arg::Match(inst.GetArg(I))) {
|
||||
if (const auto rest = MatchArgs<I + 1>(inst)) {
|
||||
return std::tuple_cat(*arg, *rest);
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline bool IsSameInst(std::tuple<IR::Inst*, IR::Inst*> t) {
|
||||
return std::get<0>(t) == std::get<1>(t);
|
||||
}
|
||||
|
||||
inline bool IsSameInst(std::tuple<IR::Inst*, IR::Inst*, IR::Inst*> t) {
|
||||
return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization::IRMatcher
|
|
@ -1,18 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2023 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void NamingPass(IR::Block& block) {
|
||||
unsigned name = 1;
|
||||
for (auto& inst : block) {
|
||||
inst.SetName(name++);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,47 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Dynarmic::A32 {
|
||||
struct UserCallbacks;
|
||||
}
|
||||
|
||||
namespace Dynarmic::A64 {
|
||||
struct UserCallbacks;
|
||||
struct UserConfig;
|
||||
} // namespace Dynarmic::A64
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
}
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
struct PolyfillOptions {
|
||||
bool sha256 = false;
|
||||
bool vector_multiply_widen = false;
|
||||
|
||||
bool operator==(const PolyfillOptions&) const = default;
|
||||
};
|
||||
|
||||
struct A32GetSetEliminationOptions {
|
||||
bool convert_nzc_to_nz = false;
|
||||
bool convert_nz_to_nzc = false;
|
||||
};
|
||||
|
||||
void PolyfillPass(IR::Block& block, const PolyfillOptions& opt);
|
||||
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb);
|
||||
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt);
|
||||
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf);
|
||||
void A64GetSetElimination(IR::Block& block);
|
||||
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb);
|
||||
void ConstantPropagation(IR::Block& block);
|
||||
void DeadCodeElimination(IR::Block& block);
|
||||
void IdentityRemovalPass(IR::Block& block);
|
||||
void VerificationPass(const IR::Block& block);
|
||||
void NamingPass(IR::Block& block);
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,218 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2022 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/ir_emitter.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
namespace {
|
||||
|
||||
void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||
|
||||
const IR::U128 t = ir.VectorExtract(x, y, 32);
|
||||
|
||||
IR::U128 result = ir.ZeroVector();
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
const IR::U32 modified_element = [&] {
|
||||
const IR::U32 element = ir.VectorGetElement(32, t, i);
|
||||
const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7));
|
||||
const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18));
|
||||
const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3));
|
||||
|
||||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||
}();
|
||||
|
||||
result = ir.VectorSetElement(32, result, i, modified_element);
|
||||
}
|
||||
result = ir.VectorAdd(32, result, x);
|
||||
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||
const IR::U128 z = (IR::U128)inst.GetArg(2);
|
||||
|
||||
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
|
||||
|
||||
const IR::U128 lower_half = [&] {
|
||||
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64);
|
||||
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
|
||||
const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19);
|
||||
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10);
|
||||
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
|
||||
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0));
|
||||
return ir.VectorZeroUpper(tmp5);
|
||||
}();
|
||||
|
||||
const IR::U64 upper_half = [&] {
|
||||
const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17);
|
||||
const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19);
|
||||
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10);
|
||||
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
|
||||
|
||||
// Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2]
|
||||
const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64);
|
||||
const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64);
|
||||
|
||||
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0));
|
||||
return ir.VectorGetElement(64, tmp5, 0);
|
||||
}();
|
||||
|
||||
const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half);
|
||||
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||
return ir.Eor(ir.And(ir.Eor(y, z), x), z);
|
||||
}
|
||||
|
||||
IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z));
|
||||
}
|
||||
|
||||
IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
|
||||
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2));
|
||||
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13));
|
||||
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22));
|
||||
|
||||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||
}
|
||||
|
||||
IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
|
||||
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6));
|
||||
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11));
|
||||
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25));
|
||||
|
||||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||
}
|
||||
|
||||
void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||
IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||
const IR::U128 w = (IR::U128)inst.GetArg(2);
|
||||
const bool part1 = inst.GetArg(3).GetU1();
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
const IR::U32 low_x = ir.VectorGetElement(32, x, 0);
|
||||
const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1);
|
||||
const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2);
|
||||
const IR::U32 high_x = ir.VectorGetElement(32, x, 3);
|
||||
|
||||
const IR::U32 low_y = ir.VectorGetElement(32, y, 0);
|
||||
const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1);
|
||||
const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2);
|
||||
const IR::U32 high_y = ir.VectorGetElement(32, y, 3);
|
||||
|
||||
const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y);
|
||||
const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x);
|
||||
|
||||
const IR::U32 t = [&] {
|
||||
const IR::U32 w_element = ir.VectorGetElement(32, w, i);
|
||||
const IR::U32 sig = SHAhashSIGMA1(ir, low_y);
|
||||
|
||||
return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element)));
|
||||
}();
|
||||
|
||||
const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority));
|
||||
const IR::U32 new_low_y = ir.Add(t, high_x);
|
||||
|
||||
// Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3]
|
||||
const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96);
|
||||
const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96);
|
||||
|
||||
x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x);
|
||||
y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y);
|
||||
}
|
||||
|
||||
inst.ReplaceUsesWith(part1 ? x : y);
|
||||
}
|
||||
|
||||
template<size_t esize, bool is_signed>
|
||||
void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
IR::U128 n = (IR::U128)inst.GetArg(0);
|
||||
IR::U128 m = (IR::U128)inst.GetArg(1);
|
||||
|
||||
const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n);
|
||||
const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m);
|
||||
|
||||
const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m);
|
||||
|
||||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
|
||||
if (polyfill == PolyfillOptions{}) {
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block};
|
||||
|
||||
for (auto& inst : block) {
|
||||
ir.SetInsertionPointBefore(&inst);
|
||||
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::SHA256MessageSchedule0:
|
||||
if (polyfill.sha256) {
|
||||
PolyfillSHA256MessageSchedule0(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::SHA256MessageSchedule1:
|
||||
if (polyfill.sha256) {
|
||||
PolyfillSHA256MessageSchedule1(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::SHA256Hash:
|
||||
if (polyfill.sha256) {
|
||||
PolyfillSHA256Hash(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplySignedWiden8:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<8, true>(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplySignedWiden16:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<16, true>(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplySignedWiden32:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<32, true>(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplyUnsignedWiden8:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<8, false>(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplyUnsignedWiden16:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<16, false>(ir, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplyUnsignedWiden32:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<32, false>(ir, inst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -1,51 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <cstdio>
|
||||
#include <map>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <ankerl/unordered_dense.h>
|
||||
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/type.h"
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
void VerificationPass(const IR::Block& block) {
|
||||
for (const auto& inst : block) {
|
||||
for (size_t i = 0; i < inst.NumArgs(); i++) {
|
||||
const IR::Type t1 = inst.GetArg(i).GetType();
|
||||
const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i);
|
||||
if (!IR::AreTypesCompatible(t1, t2)) {
|
||||
std::puts(IR::DumpBlock(block).c_str());
|
||||
ASSERT_FALSE("above block failed validation");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ankerl::unordered_dense::map<IR::Inst*, size_t> actual_uses;
|
||||
for (const auto& inst : block) {
|
||||
for (size_t i = 0; i < inst.NumArgs(); i++) {
|
||||
const auto arg = inst.GetArg(i);
|
||||
if (!arg.IsImmediate()) {
|
||||
actual_uses[arg.GetInst()]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& pair : actual_uses) {
|
||||
ASSERT(pair.first->UseCount() == pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
1519
src/dynarmic/src/dynarmic/ir/opt_passes.cpp
Normal file
1519
src/dynarmic/src/dynarmic/ir/opt_passes.cpp
Normal file
File diff suppressed because it is too large
Load diff
37
src/dynarmic/src/dynarmic/ir/opt_passes.h
Normal file
37
src/dynarmic/src/dynarmic/ir/opt_passes.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Dynarmic::A32 {
|
||||
struct UserCallbacks;
|
||||
struct UserConfig;
|
||||
}
|
||||
|
||||
namespace Dynarmic::A64 {
|
||||
struct UserCallbacks;
|
||||
struct UserConfig;
|
||||
}
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
}
|
||||
|
||||
namespace Dynarmic::Optimization {
|
||||
|
||||
struct PolyfillOptions {
|
||||
bool sha256 = false;
|
||||
bool vector_multiply_widen = false;
|
||||
|
||||
bool operator==(const PolyfillOptions&) const = default;
|
||||
};
|
||||
|
||||
void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options);
|
||||
void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options);
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
|
@ -24,6 +24,7 @@
|
|||
#include "../rand_int.h"
|
||||
#include "../unicorn_emu/a32_unicorn.h"
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/common/llvm_disassemble.h"
|
||||
|
@ -46,7 +47,7 @@ using namespace Dynarmic;
|
|||
|
||||
template<typename Fn>
|
||||
bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) {
|
||||
return Common::VisitVariant<bool>(terminal, [&](auto t) -> bool {
|
||||
return boost::apply_visitor([&](auto t) -> bool {
|
||||
using T = std::decay_t<decltype(t)>;
|
||||
if constexpr (std::is_same_v<T, IR::Term::Invalid>) {
|
||||
return false;
|
||||
|
@ -72,7 +73,7 @@ bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) {
|
|||
ASSERT_MSG(false, "Invalid terminal type");
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}, terminal);
|
||||
}
|
||||
|
||||
bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) {
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "../rand_int.h"
|
||||
#include "../unicorn_emu/a32_unicorn.h"
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/frontend/A32/FPSCR.h"
|
||||
#include "dynarmic/frontend/A32/PSR.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
|
@ -29,7 +30,7 @@
|
|||
#include "dynarmic/frontend/A32/translate/a32_translate.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
||||
|
@ -179,13 +180,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn<Th
|
|||
while (num_insts < instructions_to_execute_count) {
|
||||
A32::LocationDescriptor descriptor = {u32(num_insts * 4), cpsr, A32::FPSCR{}};
|
||||
IR::Block ir_block = A32::Translate(descriptor, &test_env, {});
|
||||
Optimization::NamingPass(ir_block);
|
||||
Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::A32ConstantMemoryReads(ir_block, &test_env);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::VerificationPass(ir_block);
|
||||
Optimization::Optimize(ir_block, &test_env, {});
|
||||
printf("\n\nIR:\n%s", IR::DumpBlock(ir_block).c_str());
|
||||
printf("\n\nx86_64:\n");
|
||||
jit.DumpDisassembly();
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "dynarmic/interface/A32/coprocessor.h"
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
|
||||
static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) {
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "../native/testenv.h"
|
||||
|
||||
template<typename InstructionType_, u32 infinite_loop_u32>
|
||||
class A32TestEnv : public Dynarmic::A32::UserCallbacks {
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "../rand_int.h"
|
||||
#include "../unicorn_emu/a64_unicorn.h"
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/common/llvm_disassemble.h"
|
||||
|
@ -28,7 +29,7 @@
|
|||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
// Must be declared last for all necessary operator<< to be declared prior to this.
|
||||
#include <fmt/format.h>
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
|
||||
TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_access_via_page_table is set", "[a64]") {
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "./testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
#include "../native/testenv.h"
|
||||
|
||||
using Vector = Dynarmic::A64::Vector;
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
include(TargetArchitectureSpecificSources)
|
||||
|
||||
add_executable(dynarmic_tests
|
||||
|
@ -6,33 +8,24 @@ add_executable(dynarmic_tests
|
|||
fp/mantissa_util_tests.cpp
|
||||
fp/unpacked_tests.cpp
|
||||
rand_int.h
|
||||
# A32
|
||||
A32/test_arm_disassembler.cpp
|
||||
A32/test_arm_instructions.cpp
|
||||
A32/test_coprocessor.cpp
|
||||
A32/test_svc.cpp
|
||||
A32/test_thumb_instructions.cpp
|
||||
A32/testenv.h
|
||||
decoder_tests.cpp
|
||||
# A64
|
||||
A64/a64.cpp
|
||||
A64/fibonacci.cpp
|
||||
A64/fp_min_max.cpp
|
||||
A64/misaligned_page_table.cpp
|
||||
A64/test_invalidation.cpp
|
||||
A64/real_world.cpp
|
||||
A64/testenv.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A32/test_arm_disassembler.cpp
|
||||
A32/test_arm_instructions.cpp
|
||||
A32/test_coprocessor.cpp
|
||||
A32/test_svc.cpp
|
||||
A32/test_thumb_instructions.cpp
|
||||
A32/testenv.h
|
||||
decoder_tests.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_link_libraries(dynarmic_tests PRIVATE merry::oaknut)
|
||||
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A64/a64.cpp
|
||||
A64/fibonacci.cpp
|
||||
A64/fp_min_max.cpp
|
||||
A64/misaligned_page_table.cpp
|
||||
A64/test_invalidation.cpp
|
||||
A64/real_world.cpp
|
||||
A64/testenv.h
|
||||
)
|
||||
endif()
|
||||
target_link_libraries(dynarmic_tests PRIVATE merry::oaknut)
|
||||
|
||||
if (DYNARMIC_TESTS_USE_UNICORN)
|
||||
target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn)
|
||||
|
@ -40,25 +33,17 @@ if (DYNARMIC_TESTS_USE_UNICORN)
|
|||
target_sources(dynarmic_tests PRIVATE
|
||||
fuzz_util.cpp
|
||||
fuzz_util.h
|
||||
# A32
|
||||
A32/fuzz_arm.cpp
|
||||
A32/fuzz_thumb.cpp
|
||||
unicorn_emu/a32_unicorn.cpp
|
||||
unicorn_emu/a32_unicorn.h
|
||||
# A64
|
||||
A64/fuzz_with_unicorn.cpp
|
||||
A64/verify_unicorn.cpp
|
||||
unicorn_emu/a64_unicorn.cpp
|
||||
unicorn_emu/a64_unicorn.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A32/fuzz_arm.cpp
|
||||
A32/fuzz_thumb.cpp
|
||||
unicorn_emu/a32_unicorn.cpp
|
||||
unicorn_emu/a32_unicorn.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A64/fuzz_with_unicorn.cpp
|
||||
A64/verify_unicorn.cpp
|
||||
unicorn_emu/a64_unicorn.cpp
|
||||
unicorn_emu/a64_unicorn.h
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ("riscv" IN_LIST ARCHITECTURE)
|
||||
|
@ -69,9 +54,6 @@ if ("x86_64" IN_LIST ARCHITECTURE)
|
|||
target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak)
|
||||
target_architecture_specific_sources(dynarmic_tests "x86_64"
|
||||
x64_cpu_info.cpp
|
||||
)
|
||||
|
||||
target_architecture_specific_sources(dynarmic_tests "x86_64"
|
||||
native/preserve_xmm.cpp
|
||||
)
|
||||
|
||||
|
@ -85,50 +67,70 @@ endif()
|
|||
|
||||
include(CreateDirectoryGroups)
|
||||
|
||||
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS))
|
||||
add_executable(dynarmic_print_info
|
||||
print_info.cpp
|
||||
)
|
||||
|
||||
create_target_directory_groups(dynarmic_print_info)
|
||||
|
||||
target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl)
|
||||
target_include_directories(dynarmic_print_info PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
#
|
||||
# dynarmic_print_info
|
||||
#
|
||||
add_executable(dynarmic_print_info
|
||||
print_info.cpp
|
||||
)
|
||||
create_target_directory_groups(dynarmic_print_info)
|
||||
target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl)
|
||||
if (BOOST_NO_HEADERS)
|
||||
target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
else()
|
||||
target_link_libraries(dynarmic_print_info PRIVATE Boost::headers)
|
||||
endif()
|
||||
target_include_directories(dynarmic_print_info PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
||||
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS))
|
||||
add_executable(dynarmic_test_generator
|
||||
fuzz_util.cpp
|
||||
fuzz_util.h
|
||||
test_generator.cpp
|
||||
)
|
||||
#
|
||||
# dynarmic_test_generator
|
||||
#
|
||||
add_executable(dynarmic_test_generator
|
||||
fuzz_util.cpp
|
||||
fuzz_util.h
|
||||
test_generator.cpp
|
||||
)
|
||||
|
||||
create_target_directory_groups(dynarmic_test_generator)
|
||||
create_target_directory_groups(dynarmic_test_generator)
|
||||
|
||||
target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl)
|
||||
target_include_directories(dynarmic_test_generator PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl)
|
||||
if (BOOST_NO_HEADERS)
|
||||
target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
else()
|
||||
target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers)
|
||||
endif()
|
||||
target_include_directories(dynarmic_test_generator PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
||||
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS))
|
||||
add_executable(dynarmic_test_reader
|
||||
test_reader.cpp
|
||||
)
|
||||
|
||||
create_target_directory_groups(dynarmic_test_reader)
|
||||
|
||||
target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl)
|
||||
target_include_directories(dynarmic_test_reader PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
#
|
||||
# dynarmic_test_reader
|
||||
#
|
||||
add_executable(dynarmic_test_reader
|
||||
test_reader.cpp
|
||||
)
|
||||
create_target_directory_groups(dynarmic_test_reader)
|
||||
target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl)
|
||||
if (BOOST_NO_HEADERS)
|
||||
target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
else()
|
||||
target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers)
|
||||
endif()
|
||||
target_include_directories(dynarmic_test_reader PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
||||
#
|
||||
create_target_directory_groups(dynarmic_tests)
|
||||
|
||||
target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl)
|
||||
target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl)
|
||||
if (BOOST_NO_HEADERS)
|
||||
target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool)
|
||||
else()
|
||||
target_link_libraries(dynarmic_tests PRIVATE Boost::headers)
|
||||
endif()
|
||||
target_include_directories(dynarmic_tests PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
|
|
@ -36,22 +36,12 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") {
|
|||
|
||||
const auto is_decode_error = [&get_ir](const A32::ASIMDMatcher<A32::TranslatorVisitor>& matcher, u32 instruction) {
|
||||
const auto block = get_ir(matcher, instruction);
|
||||
|
||||
for (const auto& ir_inst : block) {
|
||||
if (ir_inst.GetOpcode() == IR::Opcode::A32ExceptionRaised) {
|
||||
if (static_cast<A32::Exception>(ir_inst.GetArg(1).GetU64()) == A32::Exception::DecodeError) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return std::find_if(block.cbegin(), block.cend(), [](auto const& e) {
|
||||
return e.GetOpcode() == IR::Opcode::A32ExceptionRaised && A32::Exception(e.GetArg(1).GetU64()) == A32::Exception::DecodeError;
|
||||
}) != block.cend();
|
||||
};
|
||||
|
||||
for (auto iter = table.cbegin(); iter != table.cend(); ++iter) {
|
||||
if (std::strncmp(iter->GetName(), "UNALLOCATED", 11) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 expect = iter->GetExpected();
|
||||
const u32 mask = iter->GetMask();
|
||||
u32 x = 0;
|
||||
|
@ -59,15 +49,17 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") {
|
|||
const u32 instruction = expect | x;
|
||||
|
||||
const bool iserr = is_decode_error(*iter, instruction);
|
||||
const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { return m.Matches(instruction); });
|
||||
const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) {
|
||||
return m.Matches(instruction);
|
||||
});
|
||||
const bool altiserr = is_decode_error(*alternative, instruction);
|
||||
|
||||
INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction);
|
||||
INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect);
|
||||
INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x);
|
||||
INFO("Name: " << iter->GetName());
|
||||
INFO("Name: " << *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction));
|
||||
INFO("iserr: " << iserr);
|
||||
INFO("alternative: " << alternative->GetName());
|
||||
//INFO("alternative: " << alternative->GetName());
|
||||
INFO("altiserr: " << altiserr);
|
||||
|
||||
REQUIRE(((!iserr && alternative == iter) || (iserr && alternative != iter && !altiserr)));
|
||||
|
@ -75,4 +67,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") {
|
|||
x = ((x | mask) + 1) & ~mask;
|
||||
} while (x != 0);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -6,6 +6,7 @@
|
|||
#include <immintrin.h>
|
||||
|
||||
#include "../A64/testenv.h"
|
||||
#include "../native/testenv.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
|
||||
|
|
|
@ -32,27 +32,26 @@
|
|||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
||||
#include "dynarmic/frontend/A64/translate/impl/impl.h"
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "dynarmic/interface/A32/config.h"
|
||||
#include "dynarmic/interface/A32/disassembler.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/opt/passes.h"
|
||||
#include "dynarmic/ir/opt_passes.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
|
||||
const char* GetNameOfA32Instruction(u32 instruction) {
|
||||
if (auto vfp_decoder = A32::DecodeVFP<A32::TranslatorVisitor>(instruction)) {
|
||||
return vfp_decoder->get().GetName();
|
||||
} else if (auto asimd_decoder = A32::DecodeASIMD<A32::TranslatorVisitor>(instruction)) {
|
||||
return asimd_decoder->get().GetName();
|
||||
} else if (auto decoder = A32::DecodeArm<A32::TranslatorVisitor>(instruction)) {
|
||||
return decoder->get().GetName();
|
||||
}
|
||||
std::string_view GetNameOfA32Instruction(u32 instruction) {
|
||||
if (auto const vfp_decoder = A32::DecodeVFP<A32::TranslatorVisitor>(instruction))
|
||||
return *A32::GetNameVFP<A32::TranslatorVisitor>(instruction);
|
||||
else if (auto const asimd_decoder = A32::DecodeASIMD<A32::TranslatorVisitor>(instruction))
|
||||
return *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction);
|
||||
else if (auto const decoder = A32::DecodeArm<A32::TranslatorVisitor>(instruction))
|
||||
return *A32::GetNameARM<A32::TranslatorVisitor>(instruction);
|
||||
return "<null>";
|
||||
}
|
||||
|
||||
const char* GetNameOfA64Instruction(u32 instruction) {
|
||||
if (auto decoder = A64::Decode<A64::TranslatorVisitor>(instruction)) {
|
||||
return decoder->get().GetName();
|
||||
}
|
||||
std::string_view GetNameOfA64Instruction(u32 instruction) {
|
||||
if (auto const decoder = A64::Decode<A64::TranslatorVisitor>(instruction))
|
||||
return *A64::GetName<A64::TranslatorVisitor>(instruction);
|
||||
return "<null>";
|
||||
}
|
||||
|
||||
|
@ -64,18 +63,9 @@ void PrintA32Instruction(u32 instruction) {
|
|||
IR::Block ir_block{location};
|
||||
const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction);
|
||||
fmt::print("should_continue: {}\n\n", should_continue);
|
||||
|
||||
Optimization::NamingPass(ir_block);
|
||||
|
||||
fmt::print("IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
|
||||
Optimization::A32GetSetElimination(ir_block, {});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::IdentityRemovalPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, A32::UserConfig{}, {});
|
||||
fmt::print("Optimized IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
}
|
||||
|
@ -88,18 +78,9 @@ void PrintA64Instruction(u32 instruction) {
|
|||
IR::Block ir_block{location};
|
||||
const bool should_continue = A64::TranslateSingleInstruction(ir_block, location, instruction);
|
||||
fmt::print("should_continue: {}\n\n", should_continue);
|
||||
|
||||
Optimization::NamingPass(ir_block);
|
||||
|
||||
fmt::print("IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
|
||||
Optimization::A64GetSetElimination(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::IdentityRemovalPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, A64::UserConfig{}, {});
|
||||
fmt::print("Optimized IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
}
|
||||
|
@ -115,18 +96,9 @@ void PrintThumbInstruction(u32 instruction) {
|
|||
IR::Block ir_block{location};
|
||||
const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction);
|
||||
fmt::print("should_continue: {}\n\n", should_continue);
|
||||
|
||||
Optimization::NamingPass(ir_block);
|
||||
|
||||
fmt::print("IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
|
||||
Optimization::A32GetSetElimination(ir_block, {});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::IdentityRemovalPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, A32::UserConfig{}, {});
|
||||
fmt::print("Optimized IR:\n");
|
||||
fmt::print("{}\n", IR::DumpBlock(ir_block));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue