Compare commits

...

23 commits

Author SHA1 Message Date
2407dfbcb0
[dynarmic] enforce higher constraints
All checks were successful
eden-license / license-header (pull_request) Successful in 20s
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:59 +00:00
23ab89636c
Fix license headers
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:59 +00:00
de588465b6
[dynarmic] increase cache code size
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:59 +00:00
e44ff3ba68
[dynarmic] inlined pool in block + slab-like for each block
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:59 +00:00
2becd9e488
[dynarmic] Allow to skip verification pass
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:59 +00:00
5fa48b8ec8
[dynarmic] fix exception posix handler
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:59 +00:00
d2f10fba10
[dynarmic] use better boost::visitor
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:59 +00:00
de45a0ac20
[dynarmic] regalloc use scratchimpl that uses all instead of iteraiting
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:59 +00:00
f67a89e12f
[dynarmic] unconditional branches always take
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:59 +00:00
81e5d18f79
[dynarmic] fix hardcoded AVX512 registers, use xmm0 instead of xmm16 to align with spec
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:59 +00:00
9881cb8503
[dynarmic] checked code alignment
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:58 +00:00
97a4001547
[dynarmic] remove use of mcl reverse iterator
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:58 +00:00
33d1873d5f
[dynarmic, docs] fastmem docs
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:58 +00:00
ac4fe93fa2
[dynarmic] use ARCHITECTURE_ macros instead of MCL ones
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:58 +00:00
8888090cf3
[dynarmic] add back encoding names (for print_info)
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:58 +00:00
2355c25371
[dynarmic] fix ASIMD execution
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:47 +00:00
6e4b66c8e7
[dynarmic] fix tests
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:46 +00:00
254fd53b41
[dynarmic] reduce matcher table noise and cache misses
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:03 +00:00
c2d1dab721
[dynarmic] (prolly makes MSVC crash) - use 128MiB code cache
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:03 +00:00
dbe643328a
[docs] fastmem draft
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:03 +00:00
9d4a72fe26
[dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:03 +00:00
45250b3d88
[dynarmic] use small vector experiment
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:03 +00:00
4bb141d4a2
[dynarmic] reduce opt pass latency
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-29 07:50:03 +00:00
37 changed files with 343 additions and 449 deletions

View file

@ -210,12 +210,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* pa
config.wall_clock_cntpct = m_uses_wall_clock;
config.enable_cycle_counting = !m_uses_wall_clock;
// Code cache size
#ifdef ARCHITECTURE_arm64
// Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
// Solaris doesn't support kPageSize >= 512MiB
config.code_cache_size = std::uint32_t(128_MiB);
#else
config.code_cache_size = std::uint32_t(512_MiB);
#endif
// Allow memory fault handling to work
if (m_system.DebuggerEnabled()) {

View file

@ -269,12 +269,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
config.wall_clock_cntpct = m_uses_wall_clock;
config.enable_cycle_counting = !m_uses_wall_clock;
// Code cache size
#ifdef ARCHITECTURE_arm64
// Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
// Solaris doesn't support kPageSize >= 512MiB
config.code_cache_size = std::uint32_t(128_MiB);
#else
config.code_cache_size = std::uint32_t(512_MiB);
#endif
// Allow memory fault handling to work
if (m_system.DebuggerEnabled()) {

View file

@ -58,14 +58,11 @@ add_library(dynarmic
common/lut_from_list.h
common/math_util.cpp
common/math_util.h
common/memory_pool.cpp
common/memory_pool.h
common/safe_ops.h
common/spin_lock.h
common/string_util.h
common/u128.cpp
common/u128.h
common/variant_util.h
frontend/A32/a32_types.cpp
frontend/A32/a32_types.h
frontend/A64/a64_types.cpp
@ -80,7 +77,6 @@ add_library(dynarmic
ir/basic_block.cpp
ir/basic_block.h
ir/cond.h
ir/ir_emitter.cpp
ir/ir_emitter.h
ir/location_descriptor.cpp
ir/location_descriptor.h

View file

@ -15,15 +15,15 @@
#include <mcl/macro/architecture.hpp>
#include "dynarmic/common/common_types.h"
#if defined(MCL_ARCHITECTURE_X86_64)
#if defined(ARCHITECTURE_x86_64)
namespace Dynarmic::Backend::X64 {
class BlockOfCode;
} // namespace Dynarmic::Backend::X64
#elif defined(MCL_ARCHITECTURE_ARM64)
#elif defined(ARCHITECTURE_arm64)
namespace oaknut {
class CodeBlock;
} // namespace oaknut
#elif defined(MCL_ARCHITECTURE_RISCV)
#elif defined(ARCHITECTURE_riscv64)
namespace Dynarmic::Backend::RV64 {
class CodeBlock;
} // namespace Dynarmic::Backend::RV64
@ -33,16 +33,16 @@ class CodeBlock;
namespace Dynarmic::Backend {
#if defined(MCL_ARCHITECTURE_X86_64)
#if defined(ARCHITECTURE_x86_64)
struct FakeCall {
u64 call_rip;
u64 ret_rip;
};
#elif defined(MCL_ARCHITECTURE_ARM64)
#elif defined(ARCHITECTURE_arm64)
struct FakeCall {
u64 call_pc;
};
#elif defined(MCL_ARCHITECTURE_RISCV)
#elif defined(ARCHITECTURE_riscv64)
struct FakeCall {
};
#else
@ -54,11 +54,11 @@ public:
ExceptionHandler();
~ExceptionHandler();
#if defined(MCL_ARCHITECTURE_X86_64)
#if defined(ARCHITECTURE_x86_64)
void Register(X64::BlockOfCode& code);
#elif defined(MCL_ARCHITECTURE_ARM64)
#elif defined(ARCHITECTURE_arm64)
void Register(oaknut::CodeBlock& mem, std::size_t mem_size);
#elif defined(MCL_ARCHITECTURE_RISCV)
#elif defined(ARCHITECTURE_riscv64)
void Register(RV64::CodeBlock& mem, std::size_t mem_size);
#else
# error "Invalid architecture"

View file

@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::PolyfillPass(ir_block, {});
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
Optimization::VerificationPass(ir_block);
Optimization::Optimize(ir_block, conf, {});
return ir_block;
}

View file

@ -28,7 +28,6 @@
#include "dynarmic/backend/x64/nzcv_util.h"
#include "dynarmic/backend/x64/perf_map.h"
#include "dynarmic/backend/x64/stack_layout.h"
#include "dynarmic/common/variant_util.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/interface/A32/coprocessor.h"

View file

@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
auto const opcode = inst.GetOpcode();
// Call the relevant Emit* member function.
switch (opcode) {
#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch;
#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch;
#define A32OPC(name, type, ...)
#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch;
#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch;
#include "dynarmic/ir/opcodes.inc"
#undef OPCODE
#undef A32OPC
@ -764,7 +764,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
target_code_ptr = code.GetReturnFromRunCodeAddress();
}
const CodePtr patch_location = code.getCurr();
code.mov(code.rcx, reinterpret_cast<u64>(target_code_ptr));
code.mov(code.rcx, u64(target_code_ptr));
code.EnsurePatchLocationSize(patch_location, 10);
}

View file

@ -80,16 +80,16 @@ public:
};
// TODO: Check code alignment
const CodePtr current_code_ptr = [this] {
const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15));
const CodePtr current_code_ptr = [this, aligned_code_ptr] {
// RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask;
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
jit_state.rsb_ptr = new_rsb_ptr;
return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]);
return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]);
}
return GetCurrentBlock();
return aligned_code_ptr;
//return GetCurrentBlock();
}();
const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr);

View file

@ -10,7 +10,6 @@
#include <algorithm>
#include <mcl/iterator/reverse.hpp>
#include "dynarmic/common/common_types.h"
#include <xbyak/xbyak.h>
@ -76,7 +75,8 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE);
for (auto const xmm : mcl::iterator::reverse(regs)) {
for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
auto const xmm = *it;
if (HostLocIsXMM(xmm)) {
xmm_offset -= XMM_SIZE;
if (code.HasHostFeature(HostFeature::AVX)) {
@ -88,9 +88,11 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
}
if (frame_info.stack_subtraction != 0)
code.add(rsp, u32(frame_info.stack_subtraction));
for (auto const gpr : mcl::iterator::reverse(regs))
for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
auto const gpr = *it;
if (HostLocIsGPR(gpr))
code.pop(HostLocToReg64(gpr));
}
}
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) {

View file

@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
lock();
or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
SwitchMxcsrOnEntry();
jmp(ABI_PARAM2);
@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
}
xor_(eax, eax);
lock();
xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax);
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));

View file

@ -11,6 +11,7 @@
#include <iterator>
#include "dynarmic/common/assert.h"
#include <boost/variant/detail/apply_visitor_binary.hpp>
#include <mcl/bit/bit_field.hpp>
#include <mcl/scope_exit.hpp>
#include "dynarmic/common/common_types.h"
@ -21,7 +22,6 @@
#include "dynarmic/backend/x64/perf_map.h"
#include "dynarmic/backend/x64/stack_layout.h"
#include "dynarmic/backend/x64/verbose_debugging_output.h"
#include "dynarmic/common/variant_util.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de
}
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
Common::VisitVariant<void>(terminal, [this, initial_location, is_single_step](auto x) {
boost::apply_visitor([this, initial_location, is_single_step](auto x) {
using T = std::decay_t<decltype(x)>;
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
this->EmitTerminalImpl(x, initial_location, is_single_step);
} else {
ASSERT_MSG(false, "Invalid terminal");
}
});
}, terminal);
}
void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {

View file

@ -92,13 +92,10 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list<Xbyak::Xmm> t
FpFixup::Norm_Src,
FpFixup::Norm_Src,
FpFixup::Norm_Src);
const Xbyak::Xmm tmp = xmm16;
const Xbyak::Xmm tmp = xmm0;
FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero));
for (const Xbyak::Xmm& xmm : to_daz) {
for (const Xbyak::Xmm& xmm : to_daz)
FCODE(vfixupimms)(xmm, xmm, tmp, u8(0));
}
return;
}

View file

@ -273,34 +273,31 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
if constexpr (bitsize != 128) {
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
} else {
if constexpr (bitsize == 128) {
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(inst);
} else {
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
}
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
Xbyak::Label end;
code.mov(code.ABI_RETURN, u32(1));
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
code.test(tmp.cvt8(), tmp.cvt8());
code.je(end);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
code.xor_(tmp.cvt32(), tmp.cvt32());
code.xchg(tmp.cvt8(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
code.mov(code.ABI_PARAM1, u64(&conf));
if constexpr (bitsize != 128) {
using T = mcl::unsigned_integer_of_size<bitsize>;
code.CallLambda(
[](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
[&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
})
? 0
: 1;
});
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr, [&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
}) ? 0 : 1;
});
if (ordered) {
code.mfence();
}
@ -308,15 +305,11 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
code.movaps(xword[code.ABI_PARAM3], xmm1);
code.CallLambda(
[](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr,
[&](Vector expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
})
? 0
: 1;
});
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr, [&](Vector expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
}) ? 0 : 1;
});
if (ordered) {
code.mfence();
}
@ -437,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
SharedLabel end = GenSharedLabel();
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
code.mov(status, u32(1));
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
code.test(tmp.cvt8(), tmp.cvt8());
code.je(*end, code.T_NEAR);
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
code.cmp(qword[tmp], vaddr);
code.jne(*end, code.T_NEAR);
@ -474,30 +468,29 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
const auto location = code.getCurr();
if constexpr (bitsize == 128) {
switch (bitsize) {
case 8:
code.lock();
code.cmpxchg(code.byte[dest_ptr], value.cvt8());
break;
case 16:
code.lock();
code.cmpxchg(word[dest_ptr], value.cvt16());
break;
case 32:
code.lock();
code.cmpxchg(dword[dest_ptr], value.cvt32());
break;
case 64:
code.lock();
code.cmpxchg(qword[dest_ptr], value.cvt64());
break;
case 128:
code.lock();
code.cmpxchg16b(ptr[dest_ptr]);
} else {
switch (bitsize) {
case 8:
code.lock();
code.cmpxchg(code.byte[dest_ptr], value.cvt8());
break;
case 16:
code.lock();
code.cmpxchg(word[dest_ptr], value.cvt16());
break;
case 32:
code.lock();
code.cmpxchg(dword[dest_ptr], value.cvt32());
break;
case 64:
code.lock();
code.cmpxchg(qword[dest_ptr], value.cvt64());
break;
default:
UNREACHABLE();
}
break;
default:
UNREACHABLE();
}
code.setnz(status.cvt8());

View file

@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm right_shift = xmm16;
const Xbyak::Xmm tmp = xmm17;
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
code.vpxord(right_shift, right_shift, right_shift);
@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm right_shift = xmm16;
const Xbyak::Xmm tmp = xmm17;
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF));
code.vpxorq(right_shift, right_shift, right_shift);
@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm right_shift = xmm16;
const Xbyak::Xmm tmp = xmm17;
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
code.vpxord(right_shift, right_shift, right_shift);
@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst)
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
const Xbyak::Xmm c = xmm16;
const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
code.vpsraq(c, a, 32);
code.vpsllq(a, a, 32);
code.vpsraq(a, a, 32);
@ -5461,7 +5461,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) {
const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm masked = xmm16;
const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm();
code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));

View file

@ -9,6 +9,7 @@
#include "dynarmic/backend/x64/reg_alloc.h"
#include <algorithm>
#include <limits>
#include <numeric>
#include <utility>
@ -118,7 +119,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept {
values.push_back(inst);
ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits<uint16_t>::max)());
total_uses += inst->UseCount();
max_bit_width = std::max<uint8_t>(max_bit_width, GetBitWidth(inst->GetType()));
max_bit_width = std::max<uint8_t>(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType())));
}
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept {
@ -152,19 +153,19 @@ bool Argument::GetImmediateU1() const noexcept {
u8 Argument::GetImmediateU8() const noexcept {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100);
ASSERT(imm <= u64(std::numeric_limits<u8>::max()));
return u8(imm);
}
u16 Argument::GetImmediateU16() const noexcept {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x10000);
ASSERT(imm <= u64(std::numeric_limits<u16>::max()));
return u16(imm);
}
u32 Argument::GetImmediateU32() const noexcept {
const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100000000);
ASSERT(imm <= u64(std::numeric_limits<u32>::max()));
return u32(imm);
}
@ -366,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def,
if (result_def) {
DefineValueImpl(result_def, ABI_RETURN);
}
for (size_t i = 0; i < args.size(); i++) {
if (args[i]) {
UseScratch(*args[i], args_hostloc[i]);
} else {
ScratchGpr(args_hostloc[i]); // TODO: Force spill
}
}
// Must match with with ScratchImpl
for (auto const gpr : other_caller_save) {
MoveOutOfTheWay(gpr);
LocInfo(gpr).WriteLock();
}
for (size_t i = 0; i < args.size(); i++) {
if (args[i] && !args[i]->get().IsVoid()) {
UseScratch(*args[i], args_hostloc[i]);
// LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
switch (args[i]->get().GetType()) {
@ -389,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def,
}
}
}
for (size_t i = 0; i < args.size(); i++)
if (!args[i]) {
// TODO: Force spill
ScratchGpr(args_hostloc[i]);
}
for (auto const caller_saved : other_caller_save)
ScratchImpl({caller_saved});
}
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
@ -559,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept {
}
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
#if 0
// TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows
// but it's fine anyways. We can find other ways to cheat it later - but which?!?!
// we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end?
// TODO(lizzie): This needs to be investigated further later.
// Do not spill XMM into other XMM silly
if (!is_xmm) {
/*if (!is_xmm) {
// TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY
// Intel recommends to spill GPR onto XMM registers IF POSSIBLE
// TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call?
@ -573,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i)
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
return loc;
}
#endif
}*/
// TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits
// of spilling on XMM versus the potential cost of using XMM registers.....
// Otherwise go to stack spilling
for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i)
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())

View file

@ -12,6 +12,7 @@
#include <functional>
#include <optional>
#include "boost/container/small_vector.hpp"
#include "dynarmic/common/common_types.h"
#include <xbyak/xbyak.h>
#include <boost/container/static_vector.hpp>
@ -77,13 +78,13 @@ public:
return std::find(values.begin(), values.end(), inst) != values.end();
}
inline size_t GetMaxBitWidth() const noexcept {
return max_bit_width;
return 1 << max_bit_width;
}
void AddValue(IR::Inst* inst) noexcept;
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept;
private:
//non trivial
std::vector<IR::Inst*> values; //24
boost::container::small_vector<IR::Inst*, 3> values; //24
// Block state
uint16_t total_uses = 0; //8
//sometimes zeroed
@ -93,10 +94,10 @@ private:
uint16_t is_being_used_count = 0; //8
uint16_t current_references = 0; //8
// Value state
uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128
uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6)
uint8_t lru_counter : 2 = 0; //1
bool is_scratch : 1 = false; //1
bool is_set_last_use : 1 = false; //1
alignas(16) uint8_t lru_counter = 0; //1
friend class RegAlloc;
};
static_assert(sizeof(HostLocInfo) == 64);

View file

@ -1,13 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/common/memory_pool.h"
#include <cstdlib>
namespace Dynarmic::Common {
} // namespace Dynarmic::Common

View file

@ -1,61 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <cstddef>
#include <vector>
namespace Dynarmic::Common {
/// @tparam object_size Byte-size of objects to construct
/// @tparam slab_size Number of objects to have per slab
template<size_t object_size, size_t slab_size>
class Pool {
public:
inline Pool() noexcept {
AllocateNewSlab();
}
inline ~Pool() noexcept {
std::free(current_slab);
for (char* slab : slabs) {
std::free(slab);
}
}
Pool(const Pool&) = delete;
Pool(Pool&&) = delete;
Pool& operator=(const Pool&) = delete;
Pool& operator=(Pool&&) = delete;
/// @brief Returns a pointer to an `object_size`-bytes block of memory.
[[nodiscard]] void* Alloc() noexcept {
if (remaining == 0) {
slabs.push_back(current_slab);
AllocateNewSlab();
}
void* ret = static_cast<void*>(current_ptr);
current_ptr += object_size;
remaining--;
return ret;
}
private:
/// @brief Allocates a completely new memory slab.
/// Used when an entirely new slab is needed
/// due the current one running out of usable space.
void AllocateNewSlab() noexcept {
current_slab = static_cast<char*>(std::malloc(object_size * slab_size));
current_ptr = current_slab;
remaining = slab_size;
}
std::vector<char*> slabs;
char* current_slab = nullptr;
char* current_ptr = nullptr;
size_t remaining = 0;
};
} // namespace Dynarmic::Common

View file

@ -1,29 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <boost/variant.hpp>
namespace Dynarmic::Common {
namespace detail {
template<typename ReturnT, typename Lambda>
struct VariantVisitor : boost::static_visitor<ReturnT>
, Lambda {
VariantVisitor(Lambda&& lambda)
: Lambda(std::move(lambda)) {}
using Lambda::operator();
};
} // namespace detail
template<typename ReturnT, typename Variant, typename Lambda>
inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) {
return boost::apply_visitor(detail::VariantVisitor<ReturnT, Lambda>(std::move(lambda)), variant);
}
} // namespace Dynarmic::Common

View file

@ -9,12 +9,9 @@
#pragma once
#include <string>
#include <utility>
#include <fmt/format.h>
#include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/interface/A32/coprocessor_util.h"
#include "dynarmic/ir/cond.h"
@ -89,24 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) {
inline size_t RegNumber(Reg reg) {
ASSERT(reg != Reg::INVALID_REG);
return static_cast<size_t>(reg);
return size_t(reg);
}
inline size_t RegNumber(ExtReg reg) {
if (IsSingleExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::S0);
return size_t(reg) - size_t(ExtReg::S0);
} else if (IsDoubleExtReg(reg)) {
return size_t(reg) - size_t(ExtReg::D0);
}
if (IsDoubleExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::D0);
}
if (IsQuadExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::Q0);
}
ASSERT_MSG(false, "Invalid extended register");
return 0;
ASSERT(IsQuadExtReg(reg));
return size_t(reg) - size_t(ExtReg::Q0);
}
inline Reg operator+(Reg reg, size_t number) {

View file

@ -30,13 +30,13 @@ template<typename Visitor>
using ArmDecodeTable = std::array<std::vector<ArmMatcher<Visitor>>, 0x1000>;
namespace detail {
inline size_t ToFastLookupIndexArm(u32 instruction) {
inline size_t ToFastLookupIndexArm(u32 instruction) noexcept {
return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0);
}
} // namespace detail
template<typename V>
constexpr ArmDecodeTable<V> GetArmDecodeTable() {
constexpr ArmDecodeTable<V> GetArmDecodeTable() noexcept {
std::vector<ArmMatcher<V>> list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./arm.inc"
@ -62,15 +62,27 @@ constexpr ArmDecodeTable<V> GetArmDecodeTable() {
}
template<typename V>
std::optional<std::reference_wrapper<const ArmMatcher<V>>> DecodeArm(u32 instruction) {
std::optional<std::reference_wrapper<const ArmMatcher<V>>> DecodeArm(u32 instruction) noexcept {
alignas(64) static const auto table = GetArmDecodeTable<V>();
const auto matches_instruction = [instruction](const auto& matcher) {
return matcher.Matches(instruction);
};
const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)];
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction);
return iter != subtable.end() ? std::optional<std::reference_wrapper<const ArmMatcher<V>>>(*iter) : std::nullopt;
}
template<typename V>
std::optional<std::string_view> GetNameARM(u32 inst) noexcept {
std::vector<std::pair<std::string_view, ArmMatcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./arm.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A32

View file

@ -26,15 +26,12 @@ template<typename Visitor>
using ASIMDMatcher = Decoder::Matcher<Visitor, u32>;
template<typename V>
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
std::vector<ASIMDMatcher<V>> table = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() noexcept {
std::vector<std::pair<const char*, ASIMDMatcher<V>>> table = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./asimd.inc"
#undef INST
};
// Exceptions to the rule of thumb.
const std::set<std::string> comes_first{
"VBIC, VMOV, VMVN, VORR (immediate)",
@ -53,29 +50,43 @@ std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
"VQDMULH (scalar)",
"VQRDMULH (scalar)",
};
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
return comes_first.count(matcher.GetName()) > 0;
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
return comes_first.count(e.first) > 0;
});
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
return comes_last.count(matcher.GetName()) == 0;
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
return comes_last.count(e.first) == 0;
});
// If a matcher has more bits in its mask it is more specific, so it should come first.
std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) {
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) {
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
});
return table;
std::vector<ASIMDMatcher<V>> final_table;
std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) {
return e.second;
});
return final_table;
}
template<typename V>
std::optional<std::reference_wrapper<const ASIMDMatcher<V>>> DecodeASIMD(u32 instruction) {
static const auto table = GetASIMDDecodeTable<V>();
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
std::optional<std::reference_wrapper<const ASIMDMatcher<V>>> DecodeASIMD(u32 instruction) noexcept {
alignas(64) static const auto table = GetASIMDDecodeTable<V>();
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
return matcher.Matches(instruction);
});
return iter != table.end() ? std::optional<std::reference_wrapper<const ASIMDMatcher<V>>>(*iter) : std::nullopt;
}
template<typename V>
std::optional<std::string_view> GetNameASIMD(u32 inst) noexcept {
std::vector<std::pair<std::string_view, ASIMDMatcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./asimd.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A32

View file

@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher<Visitor, u16>;
template<typename V>
std::optional<std::reference_wrapper<const Thumb16Matcher<V>>> DecodeThumb16(u16 instruction) {
static const std::vector<Thumb16Matcher<V>> table = {
alignas(64) static const std::vector<Thumb16Matcher<V>> table = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)),
#include "./thumb16.inc"
#undef INST
};
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
return matcher.Matches(instruction);
});
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb16Matcher<V>>>(*iter) : std::nullopt;
}
template<typename V>
std::optional<std::string_view> GetNameThumb16(u32 inst) noexcept {
std::vector<std::pair<std::string_view, Thumb16Matcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) },
#include "./thumb16.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A32

View file

@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher<Visitor, u32>;
template<typename V>
std::optional<std::reference_wrapper<const Thumb32Matcher<V>>> DecodeThumb32(u32 instruction) {
static const std::vector<Thumb32Matcher<V>> table = {
alignas(64) static const std::vector<Thumb32Matcher<V>> table = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./thumb32.inc"
#undef INST
};
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
return matcher.Matches(instruction);
});
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb32Matcher<V>>>(*iter) : std::nullopt;
}
template<typename V>
std::optional<std::string_view> GetNameThumb32(u32 inst) noexcept {
std::vector<std::pair<std::string_view, Thumb32Matcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./thumb32.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A32

View file

@ -26,36 +26,42 @@ using VFPMatcher = Decoder::Matcher<Visitor, u32>;
template<typename V>
std::optional<std::reference_wrapper<const VFPMatcher<V>>> DecodeVFP(u32 instruction) {
using Table = std::vector<VFPMatcher<V>>;
static const struct Tables {
alignas(64) static const struct Tables {
Table unconditional;
Table conditional;
} tables = [] {
} tables = []() {
Table list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./vfp.inc"
#undef INST
};
const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
return (matcher.GetMask() & 0xF0000000) == 0xF0000000;
});
return Tables{
Table{list.begin(), division},
Table{division, list.end()},
Table{list.begin(), it},
Table{it, list.end()},
};
}();
const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000;
const Table& table = is_unconditional ? tables.unconditional : tables.conditional;
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
return matcher.Matches(instruction);
});
return iter != table.end() ? std::optional<std::reference_wrapper<const VFPMatcher<V>>>(*iter) : std::nullopt;
}
template<typename V>
std::optional<std::string_view> GetNameVFP(u32 inst) noexcept {
std::vector<std::pair<std::string_view, VFPMatcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./vfp.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A32

View file

@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) {
return 0xF7F0A000; // UDF
}
bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) {
inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept {
return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000;
}

View file

@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) {
template<typename V>
constexpr DecodeTable<V> GetDecodeTable() {
std::vector<Matcher<V>> list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
std::vector<std::pair<const char*, Matcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./a64.inc"
#undef INST
};
// If a matcher has more bits in its mask it is more specific, so it should come first.
std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) {
std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) {
// If a matcher has more bits in its mask it is more specific, so it should come first.
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
});
// Exceptions to the above rule of thumb.
std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
std::stable_partition(list.begin(), list.end(), [&](const auto& e) {
return std::set<std::string>{
"MOVI, MVNI, ORR, BIC (vector, immediate)",
"FMOV (vector, immediate)",
"Unallocated SIMD modified immediate",
}.count(matcher.GetName()) > 0;
}.count(e.first) > 0;
});
DecodeTable<V> table{};
for (size_t i = 0; i < table.size(); ++i) {
for (auto matcher : list) {
const auto expect = detail::ToFastLookupIndex(matcher.GetExpected());
const auto mask = detail::ToFastLookupIndex(matcher.GetMask());
for (auto const& e : list) {
const auto expect = detail::ToFastLookupIndex(e.second.GetExpected());
const auto mask = detail::ToFastLookupIndex(e.second.GetMask());
if ((i & mask) == expect) {
table[i].push_back(matcher);
table[i].push_back(e.second);
}
}
}
@ -74,12 +71,24 @@ constexpr DecodeTable<V> GetDecodeTable() {
template<typename V>
std::optional<std::reference_wrapper<const Matcher<V>>> Decode(u32 instruction) {
alignas(64) static const auto table = GetDecodeTable<V>();
const auto matches_instruction = [instruction](const auto& matcher) {
return matcher.Matches(instruction);
};
const auto& subtable = table[detail::ToFastLookupIndex(instruction)];
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction);
auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) {
return matcher.Matches(instruction);
});
return iter != subtable.end() ? std::optional<std::reference_wrapper<const Matcher<V>>>(*iter) : std::nullopt;
}
template<typename V>
std::optional<std::string_view> GetName(u32 inst) noexcept {
std::vector<std::pair<std::string_view, Matcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./a64.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A64

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
@ -20,9 +23,12 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) {
bool TranslatorVisitor::B_uncond(Imm<26> imm26) {
const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend<s64>();
const u64 target = ir.PC() + offset;
//ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)});
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)});
// Pattern to halt execution (B .)
if (target == ir.PC()) {
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)});
return false;
}
ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)});
return false;
}

View file

@ -70,11 +70,9 @@ struct detail {
return std::make_tuple(mask, expect);
}
/**
* Generates the masks and shifts for each argument.
* A '-' in a bitstring indicates that we don't care about that value.
* An argument is specified by a continuous string of the same character.
*/
/// @brief Generates the masks and shifts for each argument.
/// A '-' in a bitstring indicates that we don't care about that value.
/// An argument is specified by a continuous string of the same character.
template<size_t N>
static consteval auto GetArgInfo(std::array<char, opcode_bitsize> bitstring) {
std::array<opcode_type, N> masks = {};
@ -98,7 +96,6 @@ struct detail {
if constexpr (N > 0) {
const size_t bit_position = opcode_bitsize - i - 1;
if (arg_index >= N)
throw std::out_of_range("Unexpected field");
@ -109,20 +106,16 @@ struct detail {
}
}
}
#if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__)
// Avoids a MSVC ICE, and avoids Android NDK issue.
ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; }));
#endif
return std::make_tuple(masks, shifts);
}
/**
* This struct's Make member function generates a lambda which decodes an instruction based on
* the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a
* template argument.
*/
/// @brief This struct's Make member function generates a lambda which decodes an instruction
/// based on the provided arg_masks and arg_shifts. The Visitor member function to call is
/// provided as a template argument.
template<typename FnT>
struct VisitorCaller;
@ -130,36 +123,36 @@ struct detail {
# pragma warning(push)
# pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning)
#endif
template<typename Visitor, typename... Args, typename CallRetT>
struct VisitorCaller<CallRetT (Visitor::*)(Args...)> {
template<typename V, typename... Args, typename ReturnType>
struct VisitorCaller<ReturnType (V::*)(Args...)> {
template<size_t... iota>
static auto Make(std::integer_sequence<size_t, iota...>,
CallRetT (Visitor::*const fn)(Args...),
static constexpr auto Make(std::integer_sequence<size_t, iota...>,
ReturnType (V::*const fn)(Args...),
const std::array<opcode_type, sizeof...(iota)> arg_masks,
const std::array<size_t, sizeof...(iota)> arg_shifts) {
static_assert(std::is_same_v<visitor_type, Visitor>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) {
static_assert(std::is_same_v<visitor_type, V>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) {
(void)instruction;
(void)arg_masks;
(void)arg_shifts;
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
};
}
};
template<typename Visitor, typename... Args, typename CallRetT>
struct VisitorCaller<CallRetT (Visitor::*)(Args...) const> {
template<typename V, typename... Args, typename ReturnType>
struct VisitorCaller<ReturnType (V::*)(Args...) const> {
template<size_t... iota>
static auto Make(std::integer_sequence<size_t, iota...>,
CallRetT (Visitor::*const fn)(Args...) const,
static constexpr auto Make(std::integer_sequence<size_t, iota...>,
ReturnType (V::*const fn)(Args...) const,
const std::array<opcode_type, sizeof...(iota)> arg_masks,
const std::array<size_t, sizeof...(iota)> arg_shifts) {
static_assert(std::is_same_v<visitor_type, const Visitor>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) {
static_assert(std::is_same_v<visitor_type, const V>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) {
(void)instruction;
(void)arg_masks;
(void)arg_shifts;
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
};
}
};
@ -167,27 +160,21 @@ struct detail {
# pragma warning(pop)
#endif
/**
* Creates a matcher that can match and parse instructions based on bitstring.
* See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
*/
template<auto bitstring, typename FnT>
static auto GetMatcher(FnT fn, const char* const name) {
constexpr size_t args_count = mcl::parameter_count_v<FnT>;
/// @brief Creates a matcher that can match and parse instructions based on bitstring.
/// See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
template<auto bitstring, typename F>
static constexpr auto GetMatcher(F fn) {
constexpr size_t args_count = mcl::parameter_count_v<F>;
constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring));
constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring));
constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring));
constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring));
using Iota = std::make_index_sequence<args_count>;
const auto proxy_fn = VisitorCaller<FnT>::Make(Iota(), fn, arg_masks, arg_shifts);
return MatcherT(name, mask, expect, proxy_fn);
const auto proxy_fn = VisitorCaller<F>::Make(std::make_index_sequence<args_count>(), fn, arg_masks, arg_shifts);
return MatcherT(mask, expect, proxy_fn);
}
};
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn, name)
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn)
} // namespace detail
} // namespace Dynarmic::Decoder

View file

@ -14,16 +14,12 @@
namespace Dynarmic::Decoder {
/**
* Generic instruction handling construct.
*
* @tparam Visitor An arbitrary visitor type that will be passed through
* to the function being handled. This type must be the
* type of the first parameter in a handler function.
*
* @tparam OpcodeType Type representing an opcode. This must be the
* type of the second parameter in a handler function.
*/
/// Generic instruction handling construct.
/// @tparam Visitor An arbitrary visitor type that will be passed through
/// to the function being handled. This type must be the
/// type of the first parameter in a handler function.
/// @tparam OpcodeType Type representing an opcode. This must be the
/// type of the second parameter in a handler function.
template<typename Visitor, typename OpcodeType>
class Matcher {
public:
@ -31,46 +27,35 @@ public:
using visitor_type = Visitor;
using handler_return_type = typename Visitor::instruction_return_type;
using handler_function = std::function<handler_return_type(Visitor&, opcode_type)>;
Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func)
: name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {}
/// Gets the name of this type of instruction.
const char* GetName() const {
return name;
}
Matcher(opcode_type mask, opcode_type expected, handler_function func)
: mask{mask}, expected{expected}, fn{std::move(func)} {}
/// Gets the mask for this instruction.
opcode_type GetMask() const {
inline opcode_type GetMask() const noexcept {
return mask;
}
/// Gets the expected value after masking for this instruction.
opcode_type GetExpected() const {
inline opcode_type GetExpected() const noexcept {
return expected;
}
/**
* Tests to see if the given instruction is the instruction this matcher represents.
* @param instruction The instruction to test
* @returns true if the given instruction matches.
*/
bool Matches(opcode_type instruction) const {
/// Tests to see if the given instruction is the instruction this matcher represents.
/// @param instruction The instruction to test
/// @returns true if the given instruction matches.
inline bool Matches(opcode_type instruction) const noexcept {
return (instruction & mask) == expected;
}
/**
* Calls the corresponding instruction handler on visitor for this type of instruction.
* @param v The visitor to use
* @param instruction The instruction to decode.
*/
handler_return_type call(Visitor& v, opcode_type instruction) const {
/// Calls the corresponding instruction handler on visitor for this type of instruction.
/// @param v The visitor to use
/// @param instruction The instruction to decode.
inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept {
ASSERT(Matches(instruction));
return fn(v, instruction);
}
private:
const char* name;
opcode_type mask;
opcode_type expected;
handler_function fn;

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2020 MerryMage
* SPDX-License-Identifier: 0BSD
@ -34,6 +37,8 @@ enum class OptimizationFlag : std::uint32_t {
MiscIROpt = 0x00000020,
/// Optimize for code speed rather than for code size (this serves well for tight loops)
CodeSpeed = 0x00000040,
/// Disable verification passes
DisableVerification = 0x00000080,
/// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations.
/// This unfuses fused instructions to improve performance on host CPUs without FMA support.

View file

@ -15,8 +15,6 @@
#include <fmt/format.h>
#include "dynarmic/common/assert.h"
#include "dynarmic/common/memory_pool.h"
#include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/frontend/A64/a64_types.h"
#include "dynarmic/ir/cond.h"
@ -27,8 +25,7 @@ namespace Dynarmic::IR {
Block::Block(const LocationDescriptor& location)
: location{location},
end_location{location},
cond{Cond::AL},
instruction_alloc_pool{std::make_unique<std::remove_reference_t<decltype(*instruction_alloc_pool)>>()}
cond{Cond::AL}
{
}
@ -40,7 +37,21 @@ Block::Block(const LocationDescriptor& location)
/// @param args A sequence of Value instances used as arguments for the instruction.
/// @returns Iterator to the newly created instruction.
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode);
// First try using the "inline" buffer, otherwise fallback to a slower slab-like allocation scheme
// purpouse is to avoid many calls to new/delete which invoke malloc which invokes mmap
// just pool it!!! - reason why there is an inline buffer is because many small blocks are created
// with few instructions due to subpar optimisations on other passes... plus branch-heavy code will
// hugely benefit from the coherency of faster allocations...
IR::Inst* inst;
if (inlined_inst.size() < inlined_inst.max_size()) {
inst = &inlined_inst[inlined_inst.size()];
inlined_inst.emplace_back(opcode);
} else {
if (pooled_inst.empty() || pooled_inst.back().size() == pooled_inst.back().max_size())
pooled_inst.emplace_back();
inst = &pooled_inst.back()[pooled_inst.back().size()];
pooled_inst.back().emplace_back(opcode);
}
DEBUG_ASSERT(args.size() == inst->NumArgs());
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
inst->SetArg(index, arg);

View file

@ -13,6 +13,9 @@
#include <optional>
#include <string>
#include <boost/container/container_fwd.hpp>
#include <boost/container/static_vector.hpp>
#include <boost/container/stable_vector.hpp>
#include <mcl/container/intrusive_list.hpp>
#include "dynarmic/common/common_types.h"
@ -21,7 +24,6 @@
#include "dynarmic/ir/terminal.h"
#include "dynarmic/ir/value.h"
#include "dynarmic/ir/dense_list.h"
#include "dynarmic/common/memory_pool.h"
namespace Dynarmic::IR {
@ -164,8 +166,12 @@ public:
return cycle_count;
}
private:
/// "Hot cache" for small blocks so we don't call global allocator
boost::container::static_vector<Inst, 14> inlined_inst;
/// List of instructions in this block.
instruction_list_type instructions;
/// "Long/far" memory pool
boost::container::stable_vector<boost::container::static_vector<Inst, 32>> pooled_inst;
/// Block to execute next if `cond` did not pass.
std::optional<LocationDescriptor> cond_failed = {};
/// Description of the starting location of this block
@ -174,8 +180,6 @@ private:
LocationDescriptor end_location;
/// Conditional to pass in order to execute this block
Cond cond;
/// Memory pool for instruction list
std::unique_ptr<Common::Pool<sizeof(Inst), 2097152UL / sizeof(Inst)>> instruction_alloc_pool;
/// Terminal instruction of this block.
Terminal terminal = Term::Invalid{};
/// Number of cycles this block takes to execute if the conditional fails.
@ -183,6 +187,7 @@ private:
/// Number of cycles this block takes to execute.
size_t cycle_count = 0;
};
static_assert(sizeof(Block) == 2048);
/// Returns a string representation of the contents of block. Intended for debugging.
std::string DumpBlock(const IR::Block& block) noexcept;

View file

@ -1,21 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/ir/ir_emitter.h"
#include <vector>
#include "dynarmic/common/assert.h"
#include <mcl/bit_cast.hpp>
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::IR {
} // namespace Dynarmic::IR

View file

@ -1491,9 +1491,9 @@ void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization:
Optimization::DeadCodeElimination(block);
}
Optimization::IdentityRemovalPass(block);
//if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) {
if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) {
Optimization::VerificationPass(block);
//}
}
}
void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) {
@ -1511,9 +1511,9 @@ void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization:
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] {
Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks);
}
//if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) {
if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) {
Optimization::VerificationPass(block);
//}
}
}
} // namespace Dynarmic::Optimization

View file

@ -57,7 +57,7 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") {
INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction);
INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect);
INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x);
//INFO("Name: " << *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction));
INFO("Name: " << *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction));
INFO("iserr: " << iserr);
//INFO("alternative: " << alternative->GetName());
INFO("altiserr: " << altiserr);

View file

@ -40,18 +40,18 @@
using namespace Dynarmic;
std::string_view GetNameOfA32Instruction(u32 instruction) {
//if (auto const vfp_decoder = A32::DecodeVFP<A32::TranslatorVisitor>(instruction))
// return *A32::GetNameVFP<A32::TranslatorVisitor>(instruction);
//else if (auto const asimd_decoder = A32::DecodeASIMD<A32::TranslatorVisitor>(instruction))
// return *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction);
//else if (auto const decoder = A32::DecodeArm<A32::TranslatorVisitor>(instruction))
// return *A32::GetNameARM<A32::TranslatorVisitor>(instruction);
if (auto const vfp_decoder = A32::DecodeVFP<A32::TranslatorVisitor>(instruction))
return *A32::GetNameVFP<A32::TranslatorVisitor>(instruction);
else if (auto const asimd_decoder = A32::DecodeASIMD<A32::TranslatorVisitor>(instruction))
return *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction);
else if (auto const decoder = A32::DecodeArm<A32::TranslatorVisitor>(instruction))
return *A32::GetNameARM<A32::TranslatorVisitor>(instruction);
return "<null>";
}
std::string_view GetNameOfA64Instruction(u32 instruction) {
//if (auto const decoder = A64::Decode<A64::TranslatorVisitor>(instruction))
// return *A64::GetName<A64::TranslatorVisitor>(instruction);
if (auto const decoder = A64::Decode<A64::TranslatorVisitor>(instruction))
return *A64::GetName<A64::TranslatorVisitor>(instruction);
return "<null>";
}