[dynarmic] Refactoring to reduce latency hit from recompilation #358

Open
Lizzie wants to merge 22 commits from liz-dynarmic-latency-improvments into master
35 changed files with 334 additions and 445 deletions

View file

@ -210,12 +210,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* pa
config.wall_clock_cntpct = m_uses_wall_clock; config.wall_clock_cntpct = m_uses_wall_clock;
config.enable_cycle_counting = !m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock;
// Code cache size // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
#ifdef ARCHITECTURE_arm64 // Solaris doesn't support kPageSize >= 512MiB
config.code_cache_size = std::uint32_t(128_MiB); config.code_cache_size = std::uint32_t(128_MiB);
#else
config.code_cache_size = std::uint32_t(512_MiB);
#endif
// Allow memory fault handling to work // Allow memory fault handling to work
if (m_system.DebuggerEnabled()) { if (m_system.DebuggerEnabled()) {

View file

@ -269,12 +269,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
config.wall_clock_cntpct = m_uses_wall_clock; config.wall_clock_cntpct = m_uses_wall_clock;
config.enable_cycle_counting = !m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock;
// Code cache size // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
#ifdef ARCHITECTURE_arm64 // Solaris doesn't support kPageSize >= 512MiB
config.code_cache_size = std::uint32_t(128_MiB); config.code_cache_size = std::uint32_t(128_MiB);
#else
config.code_cache_size = std::uint32_t(512_MiB);
#endif
// Allow memory fault handling to work // Allow memory fault handling to work
if (m_system.DebuggerEnabled()) { if (m_system.DebuggerEnabled()) {

View file

@ -58,14 +58,11 @@ add_library(dynarmic
common/lut_from_list.h common/lut_from_list.h
common/math_util.cpp common/math_util.cpp
common/math_util.h common/math_util.h
common/memory_pool.cpp
common/memory_pool.h
common/safe_ops.h common/safe_ops.h
common/spin_lock.h common/spin_lock.h
common/string_util.h common/string_util.h
common/u128.cpp common/u128.cpp
common/u128.h common/u128.h
common/variant_util.h
frontend/A32/a32_types.cpp frontend/A32/a32_types.cpp
frontend/A32/a32_types.h frontend/A32/a32_types.h
frontend/A64/a64_types.cpp frontend/A64/a64_types.cpp
@ -80,7 +77,6 @@ add_library(dynarmic
ir/basic_block.cpp ir/basic_block.cpp
ir/basic_block.h ir/basic_block.h
ir/cond.h ir/cond.h
ir/ir_emitter.cpp
ir/ir_emitter.h ir/ir_emitter.h
ir/location_descriptor.cpp ir/location_descriptor.cpp
ir/location_descriptor.h ir/location_descriptor.h

View file

@ -15,15 +15,15 @@
#include <mcl/macro/architecture.hpp> #include <mcl/macro/architecture.hpp>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#if defined(MCL_ARCHITECTURE_X86_64) #if defined(ARCHITECTURE_x86_64)
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
class BlockOfCode; class BlockOfCode;
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64
#elif defined(MCL_ARCHITECTURE_ARM64) #elif defined(ARCHITECTURE_arm64)
namespace oaknut { namespace oaknut {
class CodeBlock; class CodeBlock;
} // namespace oaknut } // namespace oaknut
#elif defined(MCL_ARCHITECTURE_RISCV) #elif defined(ARCHITECTURE_riscv64)
namespace Dynarmic::Backend::RV64 { namespace Dynarmic::Backend::RV64 {
class CodeBlock; class CodeBlock;
} // namespace Dynarmic::Backend::RV64 } // namespace Dynarmic::Backend::RV64
@ -33,16 +33,16 @@ class CodeBlock;
namespace Dynarmic::Backend { namespace Dynarmic::Backend {
#if defined(MCL_ARCHITECTURE_X86_64) #if defined(ARCHITECTURE_x86_64)
struct FakeCall { struct FakeCall {
u64 call_rip; u64 call_rip;
u64 ret_rip; u64 ret_rip;
}; };
#elif defined(MCL_ARCHITECTURE_ARM64) #elif defined(ARCHITECTURE_arm64)
struct FakeCall { struct FakeCall {
u64 call_pc; u64 call_pc;
}; };
#elif defined(MCL_ARCHITECTURE_RISCV) #elif defined(ARCHITECTURE_riscv64)
struct FakeCall { struct FakeCall {
}; };
#else #else
@ -54,11 +54,11 @@ public:
ExceptionHandler(); ExceptionHandler();
~ExceptionHandler(); ~ExceptionHandler();
#if defined(MCL_ARCHITECTURE_X86_64) #if defined(ARCHITECTURE_x86_64)
void Register(X64::BlockOfCode& code); void Register(X64::BlockOfCode& code);
#elif defined(MCL_ARCHITECTURE_ARM64) #elif defined(ARCHITECTURE_arm64)
void Register(oaknut::CodeBlock& mem, std::size_t mem_size); void Register(oaknut::CodeBlock& mem, std::size_t mem_size);
#elif defined(MCL_ARCHITECTURE_RISCV) #elif defined(ARCHITECTURE_riscv64)
void Register(RV64::CodeBlock& mem, std::size_t mem_size); void Register(RV64::CodeBlock& mem, std::size_t mem_size);
#else #else
# error "Invalid architecture" # error "Invalid architecture"

View file

@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::Optimize(ir_block, conf, {});
Optimization::PolyfillPass(ir_block, {});
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
Optimization::VerificationPass(ir_block);
return ir_block; return ir_block;
} }

View file

@ -28,7 +28,6 @@
#include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/nzcv_util.h"
#include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/perf_map.h"
#include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/stack_layout.h"
#include "dynarmic/common/variant_util.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/interface/A32/coprocessor.h" #include "dynarmic/interface/A32/coprocessor.h"

View file

@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
auto const opcode = inst.GetOpcode(); auto const opcode = inst.GetOpcode();
// Call the relevant Emit* member function. // Call the relevant Emit* member function.
switch (opcode) { switch (opcode) {
#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch; #define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch;
#define A32OPC(name, type, ...) #define A32OPC(name, type, ...)
#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch; #define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch;
#include "dynarmic/ir/opcodes.inc" #include "dynarmic/ir/opcodes.inc"
#undef OPCODE #undef OPCODE
#undef A32OPC #undef A32OPC
@ -764,7 +764,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
target_code_ptr = code.GetReturnFromRunCodeAddress(); target_code_ptr = code.GetReturnFromRunCodeAddress();
} }
const CodePtr patch_location = code.getCurr(); const CodePtr patch_location = code.getCurr();
code.mov(code.rcx, reinterpret_cast<u64>(target_code_ptr)); code.mov(code.rcx, u64(target_code_ptr));
code.EnsurePatchLocationSize(patch_location, 10); code.EnsurePatchLocationSize(patch_location, 10);
} }

View file

@ -80,16 +80,16 @@ public:
}; };
// TODO: Check code alignment // TODO: Check code alignment
const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15));
const CodePtr current_code_ptr = [this] { const CodePtr current_code_ptr = [this, aligned_code_ptr] {
// RSB optimization // RSB optimization
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask;
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
jit_state.rsb_ptr = new_rsb_ptr; jit_state.rsb_ptr = new_rsb_ptr;
return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]); return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]);
} }
return aligned_code_ptr;
return GetCurrentBlock(); //return GetCurrentBlock();
}(); }();
const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr); const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr);

View file

@ -10,7 +10,6 @@
#include <algorithm> #include <algorithm>
#include <mcl/iterator/reverse.hpp>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include <xbyak/xbyak.h> #include <xbyak/xbyak.h>
@ -76,7 +75,8 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size); const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE); size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE);
for (auto const xmm : mcl::iterator::reverse(regs)) { for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
auto const xmm = *it;
if (HostLocIsXMM(xmm)) { if (HostLocIsXMM(xmm)) {
xmm_offset -= XMM_SIZE; xmm_offset -= XMM_SIZE;
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
@ -88,9 +88,11 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
} }
if (frame_info.stack_subtraction != 0) if (frame_info.stack_subtraction != 0)
code.add(rsp, u32(frame_info.stack_subtraction)); code.add(rsp, u32(frame_info.stack_subtraction));
for (auto const gpr : mcl::iterator::reverse(regs)) for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
auto const gpr = *it;
if (HostLocIsGPR(gpr)) if (HostLocIsGPR(gpr))
code.pop(HostLocToReg64(gpr)); code.pop(HostLocToReg64(gpr));
}
} }
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) { void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) {

View file

@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited, T_NEAR); jne(return_to_caller_mxcsr_already_exited, T_NEAR);
lock(); lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
SwitchMxcsrOnEntry(); SwitchMxcsrOnEntry();
jmp(ABI_PARAM2); jmp(ABI_PARAM2);
@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
} }
xor_(eax, eax); xor_(eax, eax);
lock();
xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax); xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax);
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));

View file

@ -11,6 +11,7 @@
#include <iterator> #include <iterator>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include <boost/variant/detail/apply_visitor_binary.hpp>
#include <mcl/bit/bit_field.hpp> #include <mcl/bit/bit_field.hpp>
#include <mcl/scope_exit.hpp> #include <mcl/scope_exit.hpp>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
@ -21,7 +22,6 @@
#include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/perf_map.h"
#include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/stack_layout.h"
#include "dynarmic/backend/x64/verbose_debugging_output.h" #include "dynarmic/backend/x64/verbose_debugging_output.h"
#include "dynarmic/common/variant_util.h"
#include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h" #include "dynarmic/ir/opcodes.h"
@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de
} }
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) { void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
Common::VisitVariant<void>(terminal, [this, initial_location, is_single_step](auto x) { boost::apply_visitor([this, initial_location, is_single_step](auto x) {
using T = std::decay_t<decltype(x)>; using T = std::decay_t<decltype(x)>;
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) { if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
this->EmitTerminalImpl(x, initial_location, is_single_step); this->EmitTerminalImpl(x, initial_location, is_single_step);
} else { } else {
ASSERT_MSG(false, "Invalid terminal"); ASSERT_MSG(false, "Invalid terminal");
} }
}); }, terminal);
} }
void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {

View file

@ -92,13 +92,10 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list<Xbyak::Xmm> t
FpFixup::Norm_Src, FpFixup::Norm_Src,
FpFixup::Norm_Src, FpFixup::Norm_Src,
FpFixup::Norm_Src); FpFixup::Norm_Src);
const Xbyak::Xmm tmp = xmm0;
const Xbyak::Xmm tmp = xmm16;
FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero)); FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero));
for (const Xbyak::Xmm& xmm : to_daz)
for (const Xbyak::Xmm& xmm : to_daz) {
FCODE(vfixupimms)(xmm, xmm, tmp, u8(0)); FCODE(vfixupimms)(xmm, xmm, tmp, u8(0));
}
return; return;
} }

View file

@ -273,34 +273,31 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool ordered = IsOrdered(args[3].GetImmediateAccType()); const bool ordered = IsOrdered(args[3].GetImmediateAccType());
if constexpr (bitsize != 128) { if constexpr (bitsize == 128) {
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
} else {
ctx.reg_alloc.Use(args[1], ABI_PARAM2); ctx.reg_alloc.Use(args[1], ABI_PARAM2);
ctx.reg_alloc.Use(args[2], HostLoc::XMM1); ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(inst); ctx.reg_alloc.HostCall(inst);
} else {
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
} }
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
Xbyak::Label end; Xbyak::Label end;
code.mov(code.ABI_RETURN, u32(1)); code.mov(code.ABI_RETURN, u32(1));
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
code.test(tmp.cvt8(), tmp.cvt8());
code.je(end); code.je(end);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.xor_(tmp.cvt32(), tmp.cvt32());
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf)); code.xchg(tmp.cvt8(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
code.mov(code.ABI_PARAM1, u64(&conf));
if constexpr (bitsize != 128) { if constexpr (bitsize != 128) {
using T = mcl::unsigned_integer_of_size<bitsize>; using T = mcl::unsigned_integer_of_size<bitsize>;
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 {
code.CallLambda( return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr, [&](T expected) -> bool {
[](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 { return (conf.callbacks->*callback)(vaddr, value, expected);
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr, }) ? 0 : 1;
[&](T expected) -> bool { });
return (conf.callbacks->*callback)(vaddr, value, expected);
})
? 0
: 1;
});
if (ordered) { if (ordered) {
code.mfence(); code.mfence();
} }
@ -308,15 +305,11 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
code.movaps(xword[code.ABI_PARAM3], xmm1); code.movaps(xword[code.ABI_PARAM3], xmm1);
code.CallLambda( code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 {
[](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 { return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr, [&](Vector expected) -> bool {
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr, return (conf.callbacks->*callback)(vaddr, value, expected);
[&](Vector expected) -> bool { }) ? 0 : 1;
return (conf.callbacks->*callback)(vaddr, value, expected); });
})
? 0
: 1;
});
if (ordered) { if (ordered) {
code.mfence(); code.mfence();
} }
@ -437,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
SharedLabel end = GenSharedLabel(); SharedLabel end = GenSharedLabel();
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
code.mov(status, u32(1)); code.mov(status, u32(1));
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
code.test(tmp.cvt8(), tmp.cvt8());
code.je(*end, code.T_NEAR); code.je(*end, code.T_NEAR);
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
code.cmp(qword[tmp], vaddr); code.cmp(qword[tmp], vaddr);
code.jne(*end, code.T_NEAR); code.jne(*end, code.T_NEAR);
@ -474,30 +468,29 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
const auto location = code.getCurr(); const auto location = code.getCurr();
if constexpr (bitsize == 128) { switch (bitsize) {
case 8:
code.lock();
code.cmpxchg(code.byte[dest_ptr], value.cvt8());
break;
case 16:
code.lock();
code.cmpxchg(word[dest_ptr], value.cvt16());
break;
case 32:
code.lock();
code.cmpxchg(dword[dest_ptr], value.cvt32());
break;
case 64:
code.lock();
code.cmpxchg(qword[dest_ptr], value.cvt64());
break;
case 128:
code.lock(); code.lock();
code.cmpxchg16b(ptr[dest_ptr]); code.cmpxchg16b(ptr[dest_ptr]);
} else { break;
switch (bitsize) { default:
case 8: UNREACHABLE();
code.lock();
code.cmpxchg(code.byte[dest_ptr], value.cvt8());
break;
case 16:
code.lock();
code.cmpxchg(word[dest_ptr], value.cvt16());
break;
case 32:
code.lock();
code.cmpxchg(dword[dest_ptr], value.cvt32());
break;
case 64:
code.lock();
code.cmpxchg(qword[dest_ptr], value.cvt64());
break;
default:
UNREACHABLE();
}
} }
code.setnz(status.cvt8()); code.setnz(status.cvt8());

View file

@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm right_shift = xmm16; const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = xmm17; const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
code.vpxord(right_shift, right_shift, right_shift); code.vpxord(right_shift, right_shift, right_shift);
@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm right_shift = xmm16; const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = xmm17; const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF));
code.vpxorq(right_shift, right_shift, right_shift); code.vpxorq(right_shift, right_shift, right_shift);
@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm right_shift = xmm16; const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = xmm17; const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
code.vpxord(right_shift, right_shift, right_shift); code.vpxord(right_shift, right_shift, right_shift);
@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst)
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
const Xbyak::Xmm c = xmm16; const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
code.vpsraq(c, a, 32); code.vpsraq(c, a, 32);
code.vpsllq(a, a, 32); code.vpsllq(a, a, 32);
code.vpsraq(a, a, 32); code.vpsraq(a, a, 32);
@ -5461,7 +5461,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) {
const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm masked = xmm16; const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm();
code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));

View file

@ -9,6 +9,7 @@
#include "dynarmic/backend/x64/reg_alloc.h" #include "dynarmic/backend/x64/reg_alloc.h"
#include <algorithm> #include <algorithm>
#include <limits>
#include <numeric> #include <numeric>
#include <utility> #include <utility>
@ -118,7 +119,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept {
values.push_back(inst); values.push_back(inst);
ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits<uint16_t>::max)()); ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits<uint16_t>::max)());
total_uses += inst->UseCount(); total_uses += inst->UseCount();
max_bit_width = std::max<uint8_t>(max_bit_width, GetBitWidth(inst->GetType())); max_bit_width = std::max<uint8_t>(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType())));
} }
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept {
@ -152,19 +153,19 @@ bool Argument::GetImmediateU1() const noexcept {
u8 Argument::GetImmediateU8() const noexcept { u8 Argument::GetImmediateU8() const noexcept {
const u64 imm = value.GetImmediateAsU64(); const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100); ASSERT(imm <= u64(std::numeric_limits<u8>::max()));
return u8(imm); return u8(imm);
} }
u16 Argument::GetImmediateU16() const noexcept { u16 Argument::GetImmediateU16() const noexcept {
const u64 imm = value.GetImmediateAsU64(); const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x10000); ASSERT(imm <= u64(std::numeric_limits<u16>::max()));
return u16(imm); return u16(imm);
} }
u32 Argument::GetImmediateU32() const noexcept { u32 Argument::GetImmediateU32() const noexcept {
const u64 imm = value.GetImmediateAsU64(); const u64 imm = value.GetImmediateAsU64();
ASSERT(imm < 0x100000000); ASSERT(imm <= u64(std::numeric_limits<u32>::max()));
return u32(imm); return u32(imm);
} }
@ -366,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def,
if (result_def) { if (result_def) {
DefineValueImpl(result_def, ABI_RETURN); DefineValueImpl(result_def, ABI_RETURN);
} }
for (size_t i = 0; i < args.size(); i++) {
if (args[i]) {
UseScratch(*args[i], args_hostloc[i]);
} else {
ScratchGpr(args_hostloc[i]); // TODO: Force spill
}
}
// Must match with with ScratchImpl
for (auto const gpr : other_caller_save) {
MoveOutOfTheWay(gpr);
LocInfo(gpr).WriteLock();
}
for (size_t i = 0; i < args.size(); i++) { for (size_t i = 0; i < args.size(); i++) {
if (args[i] && !args[i]->get().IsVoid()) { if (args[i] && !args[i]->get().IsVoid()) {
UseScratch(*args[i], args_hostloc[i]);
// LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]); const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
switch (args[i]->get().GetType()) { switch (args[i]->get().GetType()) {
@ -389,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def,
} }
} }
} }
for (size_t i = 0; i < args.size(); i++)
if (!args[i]) {
// TODO: Force spill
ScratchGpr(args_hostloc[i]);
}
for (auto const caller_saved : other_caller_save)
ScratchImpl({caller_saved});
} }
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
@ -559,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept {
} }
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
#if 0
// TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows // TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows
// but it's fine anyways. We can find other ways to cheat it later - but which?!?! // but it's fine anyways. We can find other ways to cheat it later - but which?!?!
// we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end? // we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end?
// TODO(lizzie): This needs to be investigated further later. // TODO(lizzie): This needs to be investigated further later.
// Do not spill XMM into other XMM silly // Do not spill XMM into other XMM silly
if (!is_xmm) { /*if (!is_xmm) {
// TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY // TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY
// Intel recommends to spill GPR onto XMM registers IF POSSIBLE // Intel recommends to spill GPR onto XMM registers IF POSSIBLE
// TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call? // TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call?
@ -573,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i) for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i)
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
return loc; return loc;
} }*/
#endif // TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits
// of spilling on XMM versus the potential cost of using XMM registers.....
// Otherwise go to stack spilling // Otherwise go to stack spilling
for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i) for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i)
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())

View file

@ -12,6 +12,7 @@
#include <functional> #include <functional>
#include <optional> #include <optional>
#include "boost/container/small_vector.hpp"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include <xbyak/xbyak.h> #include <xbyak/xbyak.h>
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
@ -77,13 +78,13 @@ public:
return std::find(values.begin(), values.end(), inst) != values.end(); return std::find(values.begin(), values.end(), inst) != values.end();
} }
inline size_t GetMaxBitWidth() const noexcept { inline size_t GetMaxBitWidth() const noexcept {
return max_bit_width; return 1 << max_bit_width;
} }
void AddValue(IR::Inst* inst) noexcept; void AddValue(IR::Inst* inst) noexcept;
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept;
private: private:
//non trivial //non trivial
std::vector<IR::Inst*> values; //24 boost::container::small_vector<IR::Inst*, 3> values; //24
// Block state // Block state
uint16_t total_uses = 0; //8 uint16_t total_uses = 0; //8
//sometimes zeroed //sometimes zeroed
@ -93,10 +94,10 @@ private:
uint16_t is_being_used_count = 0; //8 uint16_t is_being_used_count = 0; //8
uint16_t current_references = 0; //8 uint16_t current_references = 0; //8
// Value state // Value state
uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6)
uint8_t lru_counter : 2 = 0; //1
bool is_scratch : 1 = false; //1 bool is_scratch : 1 = false; //1
bool is_set_last_use : 1 = false; //1 bool is_set_last_use : 1 = false; //1
alignas(16) uint8_t lru_counter = 0; //1
friend class RegAlloc; friend class RegAlloc;
}; };
static_assert(sizeof(HostLocInfo) == 64); static_assert(sizeof(HostLocInfo) == 64);

View file

@ -1,13 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/common/memory_pool.h"
#include <cstdlib>
namespace Dynarmic::Common {
} // namespace Dynarmic::Common

View file

@ -1,61 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <cstddef>
#include <vector>
namespace Dynarmic::Common {
/// @tparam object_size Byte-size of objects to construct
/// @tparam slab_size Number of objects to have per slab
template<size_t object_size, size_t slab_size>
class Pool {
public:
inline Pool() noexcept {
AllocateNewSlab();
}
inline ~Pool() noexcept {
std::free(current_slab);
for (char* slab : slabs) {
std::free(slab);
}
}
Pool(const Pool&) = delete;
Pool(Pool&&) = delete;
Pool& operator=(const Pool&) = delete;
Pool& operator=(Pool&&) = delete;
/// @brief Returns a pointer to an `object_size`-bytes block of memory.
[[nodiscard]] void* Alloc() noexcept {
if (remaining == 0) {
slabs.push_back(current_slab);
AllocateNewSlab();
}
void* ret = static_cast<void*>(current_ptr);
current_ptr += object_size;
remaining--;
return ret;
}
private:
/// @brief Allocates a completely new memory slab.
/// Used when an entirely new slab is needed
/// due the current one running out of usable space.
void AllocateNewSlab() noexcept {
current_slab = static_cast<char*>(std::malloc(object_size * slab_size));
current_ptr = current_slab;
remaining = slab_size;
}
std::vector<char*> slabs;
char* current_slab = nullptr;
char* current_ptr = nullptr;
size_t remaining = 0;
};
} // namespace Dynarmic::Common

View file

@ -1,29 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <boost/variant.hpp>
namespace Dynarmic::Common {
namespace detail {
template<typename ReturnT, typename Lambda>
struct VariantVisitor : boost::static_visitor<ReturnT>
, Lambda {
VariantVisitor(Lambda&& lambda)
: Lambda(std::move(lambda)) {}
using Lambda::operator();
};
} // namespace detail
template<typename ReturnT, typename Variant, typename Lambda>
inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) {
return boost::apply_visitor(detail::VariantVisitor<ReturnT, Lambda>(std::move(lambda)), variant);
}
} // namespace Dynarmic::Common

View file

@ -9,12 +9,9 @@
#pragma once #pragma once
#include <string> #include <string>
#include <utility>
#include <fmt/format.h> #include <fmt/format.h>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/interface/A32/coprocessor_util.h"
#include "dynarmic/ir/cond.h" #include "dynarmic/ir/cond.h"
@ -89,24 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) {
inline size_t RegNumber(Reg reg) { inline size_t RegNumber(Reg reg) {
ASSERT(reg != Reg::INVALID_REG); ASSERT(reg != Reg::INVALID_REG);
return static_cast<size_t>(reg); return size_t(reg);
} }
inline size_t RegNumber(ExtReg reg) { inline size_t RegNumber(ExtReg reg) {
if (IsSingleExtReg(reg)) { if (IsSingleExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::S0); return size_t(reg) - size_t(ExtReg::S0);
} else if (IsDoubleExtReg(reg)) {
return size_t(reg) - size_t(ExtReg::D0);
} }
ASSERT(IsQuadExtReg(reg));
if (IsDoubleExtReg(reg)) { return size_t(reg) - size_t(ExtReg::Q0);
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::D0);
}
if (IsQuadExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::Q0);
}
ASSERT_MSG(false, "Invalid extended register");
return 0;
} }
inline Reg operator+(Reg reg, size_t number) { inline Reg operator+(Reg reg, size_t number) {

View file

@ -30,13 +30,13 @@ template<typename Visitor>
using ArmDecodeTable = std::array<std::vector<ArmMatcher<Visitor>>, 0x1000>; using ArmDecodeTable = std::array<std::vector<ArmMatcher<Visitor>>, 0x1000>;
namespace detail { namespace detail {
inline size_t ToFastLookupIndexArm(u32 instruction) { inline size_t ToFastLookupIndexArm(u32 instruction) noexcept {
return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0); return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0);
} }
} // namespace detail } // namespace detail
template<typename V> template<typename V>
constexpr ArmDecodeTable<V> GetArmDecodeTable() { constexpr ArmDecodeTable<V> GetArmDecodeTable() noexcept {
std::vector<ArmMatcher<V>> list = { std::vector<ArmMatcher<V>> list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./arm.inc" #include "./arm.inc"
@ -62,15 +62,27 @@ constexpr ArmDecodeTable<V> GetArmDecodeTable() {
} }
template<typename V> template<typename V>
std::optional<std::reference_wrapper<const ArmMatcher<V>>> DecodeArm(u32 instruction) { std::optional<std::reference_wrapper<const ArmMatcher<V>>> DecodeArm(u32 instruction) noexcept {
alignas(64) static const auto table = GetArmDecodeTable<V>(); alignas(64) static const auto table = GetArmDecodeTable<V>();
const auto matches_instruction = [instruction](const auto& matcher) { const auto matches_instruction = [instruction](const auto& matcher) {
return matcher.Matches(instruction); return matcher.Matches(instruction);
}; };
const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)]; const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)];
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction);
return iter != subtable.end() ? std::optional<std::reference_wrapper<const ArmMatcher<V>>>(*iter) : std::nullopt; return iter != subtable.end() ? std::optional<std::reference_wrapper<const ArmMatcher<V>>>(*iter) : std::nullopt;
} }
template<typename V>
std::optional<std::string_view> GetNameARM(u32 inst) noexcept {
std::vector<std::pair<std::string_view, ArmMatcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./arm.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A32 } // namespace Dynarmic::A32

View file

@ -26,15 +26,12 @@ template<typename Visitor>
using ASIMDMatcher = Decoder::Matcher<Visitor, u32>; using ASIMDMatcher = Decoder::Matcher<Visitor, u32>;
template<typename V> template<typename V>
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() { std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() noexcept {
std::vector<ASIMDMatcher<V>> table = { std::vector<std::pair<const char*, ASIMDMatcher<V>>> table = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./asimd.inc" #include "./asimd.inc"
#undef INST #undef INST
}; };
// Exceptions to the rule of thumb. // Exceptions to the rule of thumb.
const std::set<std::string> comes_first{ const std::set<std::string> comes_first{
"VBIC, VMOV, VMVN, VORR (immediate)", "VBIC, VMOV, VMVN, VORR (immediate)",
@ -53,29 +50,43 @@ std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
"VQDMULH (scalar)", "VQDMULH (scalar)",
"VQRDMULH (scalar)", "VQRDMULH (scalar)",
}; };
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
return comes_first.count(matcher.GetName()) > 0; return comes_first.count(e.first) > 0;
}); });
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
return comes_last.count(matcher.GetName()) == 0; return comes_last.count(e.first) == 0;
}); });
// If a matcher has more bits in its mask it is more specific, so it should come first. // If a matcher has more bits in its mask it is more specific, so it should come first.
std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) {
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
}); });
std::vector<ASIMDMatcher<V>> final_table;
return table; std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) {
return e.second;
});
return final_table;
} }
template<typename V> template<typename V>
std::optional<std::reference_wrapper<const ASIMDMatcher<V>>> DecodeASIMD(u32 instruction) { std::optional<std::reference_wrapper<const ASIMDMatcher<V>>> DecodeASIMD(u32 instruction) noexcept {
static const auto table = GetASIMDDecodeTable<V>(); alignas(64) static const auto table = GetASIMDDecodeTable<V>();
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; return matcher.Matches(instruction);
});
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
return iter != table.end() ? std::optional<std::reference_wrapper<const ASIMDMatcher<V>>>(*iter) : std::nullopt; return iter != table.end() ? std::optional<std::reference_wrapper<const ASIMDMatcher<V>>>(*iter) : std::nullopt;
} }
template<typename V>
std::optional<std::string_view> GetNameASIMD(u32 inst) noexcept {
std::vector<std::pair<std::string_view, ASIMDMatcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./asimd.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A32 } // namespace Dynarmic::A32

View file

@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher<Visitor, u16>;
template<typename V> template<typename V>
std::optional<std::reference_wrapper<const Thumb16Matcher<V>>> DecodeThumb16(u16 instruction) { std::optional<std::reference_wrapper<const Thumb16Matcher<V>>> DecodeThumb16(u16 instruction) {
static const std::vector<Thumb16Matcher<V>> table = { alignas(64) static const std::vector<Thumb16Matcher<V>> table = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)),
#include "./thumb16.inc" #include "./thumb16.inc"
#undef INST #undef INST
}; };
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; return matcher.Matches(instruction);
});
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb16Matcher<V>>>(*iter) : std::nullopt; return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb16Matcher<V>>>(*iter) : std::nullopt;
} }
template<typename V>
std::optional<std::string_view> GetNameThumb16(u32 inst) noexcept {
std::vector<std::pair<std::string_view, Thumb16Matcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) },
#include "./thumb16.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A32 } // namespace Dynarmic::A32

View file

@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher<Visitor, u32>;
template<typename V> template<typename V>
std::optional<std::reference_wrapper<const Thumb32Matcher<V>>> DecodeThumb32(u32 instruction) { std::optional<std::reference_wrapper<const Thumb32Matcher<V>>> DecodeThumb32(u32 instruction) {
static const std::vector<Thumb32Matcher<V>> table = { alignas(64) static const std::vector<Thumb32Matcher<V>> table = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./thumb32.inc" #include "./thumb32.inc"
#undef INST #undef INST
}; };
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; return matcher.Matches(instruction);
});
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb32Matcher<V>>>(*iter) : std::nullopt; return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb32Matcher<V>>>(*iter) : std::nullopt;
} }
template<typename V>
std::optional<std::string_view> GetNameThumb32(u32 inst) noexcept {
std::vector<std::pair<std::string_view, Thumb32Matcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./thumb32.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A32 } // namespace Dynarmic::A32

View file

@ -26,36 +26,42 @@ using VFPMatcher = Decoder::Matcher<Visitor, u32>;
template<typename V> template<typename V>
std::optional<std::reference_wrapper<const VFPMatcher<V>>> DecodeVFP(u32 instruction) { std::optional<std::reference_wrapper<const VFPMatcher<V>>> DecodeVFP(u32 instruction) {
using Table = std::vector<VFPMatcher<V>>; using Table = std::vector<VFPMatcher<V>>;
alignas(64) static const struct Tables {
static const struct Tables {
Table unconditional; Table unconditional;
Table conditional; Table conditional;
} tables = [] { } tables = []() {
Table list = { Table list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./vfp.inc" #include "./vfp.inc"
#undef INST #undef INST
}; };
auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
return (matcher.GetMask() & 0xF0000000) == 0xF0000000; return (matcher.GetMask() & 0xF0000000) == 0xF0000000;
}); });
return Tables{ return Tables{
Table{list.begin(), division}, Table{list.begin(), it},
Table{division, list.end()}, Table{it, list.end()},
}; };
}(); }();
const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000; const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000;
const Table& table = is_unconditional ? tables.unconditional : tables.conditional; const Table& table = is_unconditional ? tables.unconditional : tables.conditional;
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; return matcher.Matches(instruction);
});
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
return iter != table.end() ? std::optional<std::reference_wrapper<const VFPMatcher<V>>>(*iter) : std::nullopt; return iter != table.end() ? std::optional<std::reference_wrapper<const VFPMatcher<V>>>(*iter) : std::nullopt;
} }
template<typename V>
std::optional<std::string_view> GetNameVFP(u32 inst) noexcept {
std::vector<std::pair<std::string_view, VFPMatcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./vfp.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A32 } // namespace Dynarmic::A32

View file

@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) {
return 0xF7F0A000; // UDF return 0xF7F0A000; // UDF
} }
bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) { inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept {
return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000; return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000;
} }

View file

@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) {
template<typename V> template<typename V>
constexpr DecodeTable<V> GetDecodeTable() { constexpr DecodeTable<V> GetDecodeTable() {
std::vector<Matcher<V>> list = { std::vector<std::pair<const char*, Matcher<V>>> list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./a64.inc" #include "./a64.inc"
#undef INST #undef INST
}; };
// If a matcher has more bits in its mask it is more specific, so it should come first. // If a matcher has more bits in its mask it is more specific, so it should come first.
std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) {
// If a matcher has more bits in its mask it is more specific, so it should come first. // If a matcher has more bits in its mask it is more specific, so it should come first.
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
}); });
// Exceptions to the above rule of thumb. // Exceptions to the above rule of thumb.
std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { std::stable_partition(list.begin(), list.end(), [&](const auto& e) {
return std::set<std::string>{ return std::set<std::string>{
"MOVI, MVNI, ORR, BIC (vector, immediate)", "MOVI, MVNI, ORR, BIC (vector, immediate)",
"FMOV (vector, immediate)", "FMOV (vector, immediate)",
"Unallocated SIMD modified immediate", "Unallocated SIMD modified immediate",
}.count(matcher.GetName()) > 0; }.count(e.first) > 0;
}); });
DecodeTable<V> table{}; DecodeTable<V> table{};
for (size_t i = 0; i < table.size(); ++i) { for (size_t i = 0; i < table.size(); ++i) {
for (auto matcher : list) { for (auto const& e : list) {
const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); const auto expect = detail::ToFastLookupIndex(e.second.GetExpected());
const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); const auto mask = detail::ToFastLookupIndex(e.second.GetMask());
if ((i & mask) == expect) { if ((i & mask) == expect) {
table[i].push_back(matcher); table[i].push_back(e.second);
} }
} }
} }
@ -74,12 +71,24 @@ constexpr DecodeTable<V> GetDecodeTable() {
template<typename V> template<typename V>
std::optional<std::reference_wrapper<const Matcher<V>>> Decode(u32 instruction) { std::optional<std::reference_wrapper<const Matcher<V>>> Decode(u32 instruction) {
alignas(64) static const auto table = GetDecodeTable<V>(); alignas(64) static const auto table = GetDecodeTable<V>();
const auto matches_instruction = [instruction](const auto& matcher) {
return matcher.Matches(instruction);
};
const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; const auto& subtable = table[detail::ToFastLookupIndex(instruction)];
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) {
return matcher.Matches(instruction);
});
return iter != subtable.end() ? std::optional<std::reference_wrapper<const Matcher<V>>>(*iter) : std::nullopt; return iter != subtable.end() ? std::optional<std::reference_wrapper<const Matcher<V>>>(*iter) : std::nullopt;
} }
template<typename V>
std::optional<std::string_view> GetName(u32 inst) noexcept {
std::vector<std::pair<std::string_view, Matcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./a64.inc"
#undef INST
};
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
return m.second.Matches(inst);
});
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
}
} // namespace Dynarmic::A64 } // namespace Dynarmic::A64

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project. /* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage * Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
@ -20,9 +23,12 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) {
bool TranslatorVisitor::B_uncond(Imm<26> imm26) { bool TranslatorVisitor::B_uncond(Imm<26> imm26) {
const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend<s64>(); const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend<s64>();
const u64 target = ir.PC() + offset; const u64 target = ir.PC() + offset;
// Pattern to halt execution (B .)
//ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); if (target == ir.PC()) {
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)});
return false;
}
ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)});
return false; return false;
} }

View file

@ -70,11 +70,9 @@ struct detail {
return std::make_tuple(mask, expect); return std::make_tuple(mask, expect);
} }
/** /// @brief Generates the masks and shifts for each argument.
* Generates the masks and shifts for each argument. /// A '-' in a bitstring indicates that we don't care about that value.
* A '-' in a bitstring indicates that we don't care about that value. /// An argument is specified by a continuous string of the same character.
* An argument is specified by a continuous string of the same character.
*/
template<size_t N> template<size_t N>
static consteval auto GetArgInfo(std::array<char, opcode_bitsize> bitstring) { static consteval auto GetArgInfo(std::array<char, opcode_bitsize> bitstring) {
std::array<opcode_type, N> masks = {}; std::array<opcode_type, N> masks = {};
@ -98,7 +96,6 @@ struct detail {
if constexpr (N > 0) { if constexpr (N > 0) {
const size_t bit_position = opcode_bitsize - i - 1; const size_t bit_position = opcode_bitsize - i - 1;
if (arg_index >= N) if (arg_index >= N)
throw std::out_of_range("Unexpected field"); throw std::out_of_range("Unexpected field");
@ -109,20 +106,16 @@ struct detail {
} }
} }
} }
#if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__)
// Avoids a MSVC ICE, and avoids Android NDK issue. // Avoids a MSVC ICE, and avoids Android NDK issue.
ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; }));
#endif #endif
return std::make_tuple(masks, shifts); return std::make_tuple(masks, shifts);
} }
/** /// @brief This struct's Make member function generates a lambda which decodes an instruction
* This struct's Make member function generates a lambda which decodes an instruction based on /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is
* the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a /// provided as a template argument.
* template argument.
*/
template<typename FnT> template<typename FnT>
struct VisitorCaller; struct VisitorCaller;
@ -130,36 +123,36 @@ struct detail {
# pragma warning(push) # pragma warning(push)
# pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning)
#endif #endif
template<typename Visitor, typename... Args, typename CallRetT> template<typename V, typename... Args, typename ReturnType>
struct VisitorCaller<CallRetT (Visitor::*)(Args...)> { struct VisitorCaller<ReturnType (V::*)(Args...)> {
template<size_t... iota> template<size_t... iota>
static auto Make(std::integer_sequence<size_t, iota...>, static constexpr auto Make(std::integer_sequence<size_t, iota...>,
CallRetT (Visitor::*const fn)(Args...), ReturnType (V::*const fn)(Args...),
const std::array<opcode_type, sizeof...(iota)> arg_masks, const std::array<opcode_type, sizeof...(iota)> arg_masks,
const std::array<size_t, sizeof...(iota)> arg_shifts) { const std::array<size_t, sizeof...(iota)> arg_shifts) {
static_assert(std::is_same_v<visitor_type, Visitor>, "Member function is not from Matcher's Visitor"); static_assert(std::is_same_v<visitor_type, V>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) {
(void)instruction; (void)instruction;
(void)arg_masks; (void)arg_masks;
(void)arg_shifts; (void)arg_shifts;
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...); return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
}; };
} }
}; };
template<typename Visitor, typename... Args, typename CallRetT> template<typename V, typename... Args, typename ReturnType>
struct VisitorCaller<CallRetT (Visitor::*)(Args...) const> { struct VisitorCaller<ReturnType (V::*)(Args...) const> {
template<size_t... iota> template<size_t... iota>
static auto Make(std::integer_sequence<size_t, iota...>, static constexpr auto Make(std::integer_sequence<size_t, iota...>,
CallRetT (Visitor::*const fn)(Args...) const, ReturnType (V::*const fn)(Args...) const,
const std::array<opcode_type, sizeof...(iota)> arg_masks, const std::array<opcode_type, sizeof...(iota)> arg_masks,
const std::array<size_t, sizeof...(iota)> arg_shifts) { const std::array<size_t, sizeof...(iota)> arg_shifts) {
static_assert(std::is_same_v<visitor_type, const Visitor>, "Member function is not from Matcher's Visitor"); static_assert(std::is_same_v<visitor_type, const V>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) {
(void)instruction; (void)instruction;
(void)arg_masks; (void)arg_masks;
(void)arg_shifts; (void)arg_shifts;
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...); return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
}; };
} }
}; };
@ -167,27 +160,21 @@ struct detail {
# pragma warning(pop) # pragma warning(pop)
#endif #endif
/** /// @brief Creates a matcher that can match and parse instructions based on bitstring.
* Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
* See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template<auto bitstring, typename F>
*/ static constexpr auto GetMatcher(F fn) {
template<auto bitstring, typename FnT> constexpr size_t args_count = mcl::parameter_count_v<F>;
static auto GetMatcher(FnT fn, const char* const name) {
constexpr size_t args_count = mcl::parameter_count_v<FnT>;
constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring));
constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring));
constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring));
constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring));
const auto proxy_fn = VisitorCaller<F>::Make(std::make_index_sequence<args_count>(), fn, arg_masks, arg_shifts);
using Iota = std::make_index_sequence<args_count>; return MatcherT(mask, expect, proxy_fn);
const auto proxy_fn = VisitorCaller<FnT>::Make(Iota(), fn, arg_masks, arg_shifts);
return MatcherT(name, mask, expect, proxy_fn);
} }
}; };
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn, name) #define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn)
} // namespace detail } // namespace detail
} // namespace Dynarmic::Decoder } // namespace Dynarmic::Decoder

View file

@ -14,16 +14,12 @@
namespace Dynarmic::Decoder { namespace Dynarmic::Decoder {
/** /// Generic instruction handling construct.
* Generic instruction handling construct. /// @tparam Visitor An arbitrary visitor type that will be passed through
* /// to the function being handled. This type must be the
* @tparam Visitor An arbitrary visitor type that will be passed through /// type of the first parameter in a handler function.
* to the function being handled. This type must be the /// @tparam OpcodeType Type representing an opcode. This must be the
* type of the first parameter in a handler function. /// type of the second parameter in a handler function.
*
* @tparam OpcodeType Type representing an opcode. This must be the
* type of the second parameter in a handler function.
*/
template<typename Visitor, typename OpcodeType> template<typename Visitor, typename OpcodeType>
class Matcher { class Matcher {
public: public:
@ -31,46 +27,35 @@ public:
using visitor_type = Visitor; using visitor_type = Visitor;
using handler_return_type = typename Visitor::instruction_return_type; using handler_return_type = typename Visitor::instruction_return_type;
using handler_function = std::function<handler_return_type(Visitor&, opcode_type)>; using handler_function = std::function<handler_return_type(Visitor&, opcode_type)>;
Matcher(opcode_type mask, opcode_type expected, handler_function func)
Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) : mask{mask}, expected{expected}, fn{std::move(func)} {}
: name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {}
/// Gets the name of this type of instruction.
const char* GetName() const {
return name;
}
/// Gets the mask for this instruction. /// Gets the mask for this instruction.
opcode_type GetMask() const { inline opcode_type GetMask() const noexcept {
return mask; return mask;
} }
/// Gets the expected value after masking for this instruction. /// Gets the expected value after masking for this instruction.
opcode_type GetExpected() const { inline opcode_type GetExpected() const noexcept {
return expected; return expected;
} }
/** /// Tests to see if the given instruction is the instruction this matcher represents.
* Tests to see if the given instruction is the instruction this matcher represents. /// @param instruction The instruction to test
* @param instruction The instruction to test /// @returns true if the given instruction matches.
* @returns true if the given instruction matches. inline bool Matches(opcode_type instruction) const noexcept {
*/
bool Matches(opcode_type instruction) const {
return (instruction & mask) == expected; return (instruction & mask) == expected;
} }
/** /// Calls the corresponding instruction handler on visitor for this type of instruction.
* Calls the corresponding instruction handler on visitor for this type of instruction. /// @param v The visitor to use
* @param v The visitor to use /// @param instruction The instruction to decode.
* @param instruction The instruction to decode. inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept {
*/
handler_return_type call(Visitor& v, opcode_type instruction) const {
ASSERT(Matches(instruction)); ASSERT(Matches(instruction));
return fn(v, instruction); return fn(v, instruction);
} }
private: private:
const char* name;
opcode_type mask; opcode_type mask;
opcode_type expected; opcode_type expected;
handler_function fn; handler_function fn;

View file

@ -15,8 +15,6 @@
#include <fmt/format.h> #include <fmt/format.h>
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include "dynarmic/common/memory_pool.h"
#include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/frontend/A64/a64_types.h"
#include "dynarmic/ir/cond.h" #include "dynarmic/ir/cond.h"
@ -27,8 +25,7 @@ namespace Dynarmic::IR {
Block::Block(const LocationDescriptor& location) Block::Block(const LocationDescriptor& location)
: location{location}, : location{location},
end_location{location}, end_location{location},
cond{Cond::AL}, cond{Cond::AL}
instruction_alloc_pool{std::make_unique<std::remove_reference_t<decltype(*instruction_alloc_pool)>>()}
{ {
} }
@ -40,7 +37,21 @@ Block::Block(const LocationDescriptor& location)
/// @param args A sequence of Value instances used as arguments for the instruction. /// @param args A sequence of Value instances used as arguments for the instruction.
/// @returns Iterator to the newly created instruction. /// @returns Iterator to the newly created instruction.
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept { Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); // First try using the "inline" buffer, otherwise fallback to a slower slab-like allocation scheme
// purpouse is to avoid many calls to new/delete which invoke malloc which invokes mmap
// just pool it!!! - reason why there is an inline buffer is because many small blocks are created
// with few instructions due to subpar optimisations on other passes... plus branch-heavy code will
// hugely benefit from the coherency of faster allocations...
IR::Inst* inst;
if (inlined_inst.size() < inlined_inst.max_size()) {
inst = &inlined_inst[inlined_inst.size()];
inlined_inst.emplace_back(opcode);
} else {
if (pooled_inst.empty() || pooled_inst.back().size() == pooled_inst.back().max_size())
pooled_inst.emplace_back();
inst = &pooled_inst.back()[pooled_inst.back().size()];
pooled_inst.back().emplace_back(opcode);
}
DEBUG_ASSERT(args.size() == inst->NumArgs()); DEBUG_ASSERT(args.size() == inst->NumArgs());
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
inst->SetArg(index, arg); inst->SetArg(index, arg);

View file

@ -13,6 +13,9 @@
#include <optional> #include <optional>
#include <string> #include <string>
#include <boost/container/container_fwd.hpp>
#include <boost/container/static_vector.hpp>
#include <boost/container/stable_vector.hpp>
#include <mcl/container/intrusive_list.hpp> #include <mcl/container/intrusive_list.hpp>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
@ -21,7 +24,6 @@
#include "dynarmic/ir/terminal.h" #include "dynarmic/ir/terminal.h"
#include "dynarmic/ir/value.h" #include "dynarmic/ir/value.h"
#include "dynarmic/ir/dense_list.h" #include "dynarmic/ir/dense_list.h"
#include "dynarmic/common/memory_pool.h"
namespace Dynarmic::IR { namespace Dynarmic::IR {
@ -164,8 +166,12 @@ public:
return cycle_count; return cycle_count;
} }
private: private:
/// "Hot cache" for small blocks so we don't call global allocator
boost::container::static_vector<Inst, 14> inlined_inst;
/// List of instructions in this block. /// List of instructions in this block.
instruction_list_type instructions; instruction_list_type instructions;
/// "Long/far" memory pool
boost::container::stable_vector<boost::container::static_vector<Inst, 32>> pooled_inst;
/// Block to execute next if `cond` did not pass. /// Block to execute next if `cond` did not pass.
std::optional<LocationDescriptor> cond_failed = {}; std::optional<LocationDescriptor> cond_failed = {};
/// Description of the starting location of this block /// Description of the starting location of this block
@ -174,8 +180,6 @@ private:
LocationDescriptor end_location; LocationDescriptor end_location;
/// Conditional to pass in order to execute this block /// Conditional to pass in order to execute this block
Cond cond; Cond cond;
/// Memory pool for instruction list
std::unique_ptr<Common::Pool<sizeof(Inst), 2097152UL / sizeof(Inst)>> instruction_alloc_pool;
/// Terminal instruction of this block. /// Terminal instruction of this block.
Terminal terminal = Term::Invalid{}; Terminal terminal = Term::Invalid{};
/// Number of cycles this block takes to execute if the conditional fails. /// Number of cycles this block takes to execute if the conditional fails.
@ -183,6 +187,7 @@ private:
/// Number of cycles this block takes to execute. /// Number of cycles this block takes to execute.
size_t cycle_count = 0; size_t cycle_count = 0;
}; };
static_assert(sizeof(Block) == 2048);
/// Returns a string representation of the contents of block. Intended for debugging. /// Returns a string representation of the contents of block. Intended for debugging.
std::string DumpBlock(const IR::Block& block) noexcept; std::string DumpBlock(const IR::Block& block) noexcept;

View file

@ -1,21 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/ir/ir_emitter.h"
#include <vector>
#include "dynarmic/common/assert.h"
#include <mcl/bit_cast.hpp>
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::IR {
} // namespace Dynarmic::IR

View file

@ -57,7 +57,7 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") {
INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction); INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction);
INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect); INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect);
INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x); INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x);
//INFO("Name: " << *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction)); INFO("Name: " << *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction));
INFO("iserr: " << iserr); INFO("iserr: " << iserr);
//INFO("alternative: " << alternative->GetName()); //INFO("alternative: " << alternative->GetName());
INFO("altiserr: " << altiserr); INFO("altiserr: " << altiserr);

View file

@ -40,18 +40,18 @@
using namespace Dynarmic; using namespace Dynarmic;
std::string_view GetNameOfA32Instruction(u32 instruction) { std::string_view GetNameOfA32Instruction(u32 instruction) {
//if (auto const vfp_decoder = A32::DecodeVFP<A32::TranslatorVisitor>(instruction)) if (auto const vfp_decoder = A32::DecodeVFP<A32::TranslatorVisitor>(instruction))
// return *A32::GetNameVFP<A32::TranslatorVisitor>(instruction); return *A32::GetNameVFP<A32::TranslatorVisitor>(instruction);
//else if (auto const asimd_decoder = A32::DecodeASIMD<A32::TranslatorVisitor>(instruction)) else if (auto const asimd_decoder = A32::DecodeASIMD<A32::TranslatorVisitor>(instruction))
// return *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction); return *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction);
//else if (auto const decoder = A32::DecodeArm<A32::TranslatorVisitor>(instruction)) else if (auto const decoder = A32::DecodeArm<A32::TranslatorVisitor>(instruction))
// return *A32::GetNameARM<A32::TranslatorVisitor>(instruction); return *A32::GetNameARM<A32::TranslatorVisitor>(instruction);
return "<null>"; return "<null>";
} }
std::string_view GetNameOfA64Instruction(u32 instruction) { std::string_view GetNameOfA64Instruction(u32 instruction) {
//if (auto const decoder = A64::Decode<A64::TranslatorVisitor>(instruction)) if (auto const decoder = A64::Decode<A64::TranslatorVisitor>(instruction))
// return *A64::GetName<A64::TranslatorVisitor>(instruction); return *A64::GetName<A64::TranslatorVisitor>(instruction);
return "<null>"; return "<null>";
} }