Compare commits
22 commits
48396a07e9
...
962b4861ec
Author | SHA1 | Date | |
---|---|---|---|
962b4861ec | |||
7775141174 | |||
20b94cb9ee | |||
21766eb6d8 | |||
1c0a931247 | |||
4437386012 | |||
7646e5a6d8 | |||
fb61a1d3d5 | |||
2a9fb1ea6c | |||
c3311b624d | |||
844dec7531 | |||
ce02513842 | |||
9c890118d9 | |||
99e8f9efd5 | |||
bc83c24046 | |||
6be1120fc4 | |||
e003e281cf | |||
ac08daa18a | |||
1106494f6b | |||
21ca974f8a | |||
1750aec1e4 | |||
0baab3d99e |
35 changed files with 334 additions and 445 deletions
|
@ -210,12 +210,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* pa
|
|||
config.wall_clock_cntpct = m_uses_wall_clock;
|
||||
config.enable_cycle_counting = !m_uses_wall_clock;
|
||||
|
||||
// Code cache size
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
// Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
|
||||
// Solaris doesn't support kPageSize >= 512MiB
|
||||
config.code_cache_size = std::uint32_t(128_MiB);
|
||||
#else
|
||||
config.code_cache_size = std::uint32_t(512_MiB);
|
||||
#endif
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (m_system.DebuggerEnabled()) {
|
||||
|
|
|
@ -269,12 +269,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
|
|||
config.wall_clock_cntpct = m_uses_wall_clock;
|
||||
config.enable_cycle_counting = !m_uses_wall_clock;
|
||||
|
||||
// Code cache size
|
||||
#ifdef ARCHITECTURE_arm64
|
||||
// Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
|
||||
// Solaris doesn't support kPageSize >= 512MiB
|
||||
config.code_cache_size = std::uint32_t(128_MiB);
|
||||
#else
|
||||
config.code_cache_size = std::uint32_t(512_MiB);
|
||||
#endif
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (m_system.DebuggerEnabled()) {
|
||||
|
|
|
@ -58,14 +58,11 @@ add_library(dynarmic
|
|||
common/lut_from_list.h
|
||||
common/math_util.cpp
|
||||
common/math_util.h
|
||||
common/memory_pool.cpp
|
||||
common/memory_pool.h
|
||||
common/safe_ops.h
|
||||
common/spin_lock.h
|
||||
common/string_util.h
|
||||
common/u128.cpp
|
||||
common/u128.h
|
||||
common/variant_util.h
|
||||
frontend/A32/a32_types.cpp
|
||||
frontend/A32/a32_types.h
|
||||
frontend/A64/a64_types.cpp
|
||||
|
@ -80,7 +77,6 @@ add_library(dynarmic
|
|||
ir/basic_block.cpp
|
||||
ir/basic_block.h
|
||||
ir/cond.h
|
||||
ir/ir_emitter.cpp
|
||||
ir/ir_emitter.h
|
||||
ir/location_descriptor.cpp
|
||||
ir/location_descriptor.h
|
||||
|
|
|
@ -15,15 +15,15 @@
|
|||
#include <mcl/macro/architecture.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
class BlockOfCode;
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
namespace oaknut {
|
||||
class CodeBlock;
|
||||
} // namespace oaknut
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
namespace Dynarmic::Backend::RV64 {
|
||||
class CodeBlock;
|
||||
} // namespace Dynarmic::Backend::RV64
|
||||
|
@ -33,16 +33,16 @@ class CodeBlock;
|
|||
|
||||
namespace Dynarmic::Backend {
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
struct FakeCall {
|
||||
u64 call_rip;
|
||||
u64 ret_rip;
|
||||
};
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
struct FakeCall {
|
||||
u64 call_pc;
|
||||
};
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
struct FakeCall {
|
||||
};
|
||||
#else
|
||||
|
@ -54,11 +54,11 @@ public:
|
|||
ExceptionHandler();
|
||||
~ExceptionHandler();
|
||||
|
||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
||||
#if defined(ARCHITECTURE_x86_64)
|
||||
void Register(X64::BlockOfCode& code);
|
||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
||||
#elif defined(ARCHITECTURE_arm64)
|
||||
void Register(oaknut::CodeBlock& mem, std::size_t mem_size);
|
||||
#elif defined(MCL_ARCHITECTURE_RISCV)
|
||||
#elif defined(ARCHITECTURE_riscv64)
|
||||
void Register(RV64::CodeBlock& mem, std::size_t mem_size);
|
||||
#else
|
||||
# error "Invalid architecture"
|
||||
|
|
|
@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
|
|||
|
||||
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
||||
|
||||
Optimization::PolyfillPass(ir_block, {});
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
|
||||
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
}
|
||||
Optimization::VerificationPass(ir_block);
|
||||
|
||||
Optimization::Optimize(ir_block, conf, {});
|
||||
return ir_block;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
#include "dynarmic/backend/x64/nzcv_util.h"
|
||||
#include "dynarmic/backend/x64/perf_map.h"
|
||||
#include "dynarmic/backend/x64/stack_layout.h"
|
||||
#include "dynarmic/common/variant_util.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/interface/A32/coprocessor.h"
|
||||
|
|
|
@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
|
|||
auto const opcode = inst.GetOpcode();
|
||||
// Call the relevant Emit* member function.
|
||||
switch (opcode) {
|
||||
#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch;
|
||||
#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch;
|
||||
#define A32OPC(name, type, ...)
|
||||
#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch;
|
||||
#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch;
|
||||
#include "dynarmic/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
|
@ -764,7 +764,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
|
|||
target_code_ptr = code.GetReturnFromRunCodeAddress();
|
||||
}
|
||||
const CodePtr patch_location = code.getCurr();
|
||||
code.mov(code.rcx, reinterpret_cast<u64>(target_code_ptr));
|
||||
code.mov(code.rcx, u64(target_code_ptr));
|
||||
code.EnsurePatchLocationSize(patch_location, 10);
|
||||
}
|
||||
|
||||
|
|
|
@ -80,16 +80,16 @@ public:
|
|||
};
|
||||
|
||||
// TODO: Check code alignment
|
||||
|
||||
const CodePtr current_code_ptr = [this] {
|
||||
const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15));
|
||||
const CodePtr current_code_ptr = [this, aligned_code_ptr] {
|
||||
// RSB optimization
|
||||
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask;
|
||||
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
|
||||
jit_state.rsb_ptr = new_rsb_ptr;
|
||||
return reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]);
|
||||
return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]);
|
||||
}
|
||||
|
||||
return GetCurrentBlock();
|
||||
return aligned_code_ptr;
|
||||
//return GetCurrentBlock();
|
||||
}();
|
||||
|
||||
const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr);
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
|
||||
#include <algorithm>
|
||||
|
||||
#include <mcl/iterator/reverse.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
|
@ -76,7 +75,8 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
|
|||
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||
|
||||
size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE);
|
||||
for (auto const xmm : mcl::iterator::reverse(regs)) {
|
||||
for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
|
||||
auto const xmm = *it;
|
||||
if (HostLocIsXMM(xmm)) {
|
||||
xmm_offset -= XMM_SIZE;
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
|
@ -88,9 +88,11 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
|
|||
}
|
||||
if (frame_info.stack_subtraction != 0)
|
||||
code.add(rsp, u32(frame_info.stack_subtraction));
|
||||
for (auto const gpr : mcl::iterator::reverse(regs))
|
||||
for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
|
||||
auto const gpr = *it;
|
||||
if (HostLocIsGPR(gpr))
|
||||
code.pop(HostLocToReg64(gpr));
|
||||
}
|
||||
}
|
||||
|
||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) {
|
||||
|
|
|
@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
|
||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
|
||||
lock();
|
||||
or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
|
||||
lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
|
||||
|
||||
SwitchMxcsrOnEntry();
|
||||
jmp(ABI_PARAM2);
|
||||
|
@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
}
|
||||
|
||||
xor_(eax, eax);
|
||||
lock();
|
||||
xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax);
|
||||
|
||||
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <iterator>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <boost/variant/detail/apply_visitor_binary.hpp>
|
||||
#include <mcl/bit/bit_field.hpp>
|
||||
#include <mcl/scope_exit.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
@ -21,7 +22,6 @@
|
|||
#include "dynarmic/backend/x64/perf_map.h"
|
||||
#include "dynarmic/backend/x64/stack_layout.h"
|
||||
#include "dynarmic/backend/x64/verbose_debugging_output.h"
|
||||
#include "dynarmic/common/variant_util.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de
|
|||
}
|
||||
|
||||
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
Common::VisitVariant<void>(terminal, [this, initial_location, is_single_step](auto x) {
|
||||
boost::apply_visitor([this, initial_location, is_single_step](auto x) {
|
||||
using T = std::decay_t<decltype(x)>;
|
||||
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
|
||||
this->EmitTerminalImpl(x, initial_location, is_single_step);
|
||||
} else {
|
||||
ASSERT_MSG(false, "Invalid terminal");
|
||||
}
|
||||
});
|
||||
}, terminal);
|
||||
}
|
||||
|
||||
void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
|
||||
|
|
|
@ -92,13 +92,10 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list<Xbyak::Xmm> t
|
|||
FpFixup::Norm_Src,
|
||||
FpFixup::Norm_Src,
|
||||
FpFixup::Norm_Src);
|
||||
|
||||
const Xbyak::Xmm tmp = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm0;
|
||||
FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero));
|
||||
|
||||
for (const Xbyak::Xmm& xmm : to_daz) {
|
||||
for (const Xbyak::Xmm& xmm : to_daz)
|
||||
FCODE(vfixupimms)(xmm, xmm, tmp, u8(0));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -273,34 +273,31 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||
|
||||
if constexpr (bitsize != 128) {
|
||||
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
|
||||
} else {
|
||||
if constexpr (bitsize == 128) {
|
||||
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
|
||||
ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(inst);
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
|
||||
}
|
||||
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||
Xbyak::Label end;
|
||||
|
||||
code.mov(code.ABI_RETURN, u32(1));
|
||||
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
|
||||
code.test(tmp.cvt8(), tmp.cvt8());
|
||||
code.je(end);
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
|
||||
code.xor_(tmp.cvt32(), tmp.cvt32());
|
||||
code.xchg(tmp.cvt8(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
|
||||
code.mov(code.ABI_PARAM1, u64(&conf));
|
||||
if constexpr (bitsize != 128) {
|
||||
using T = mcl::unsigned_integer_of_size<bitsize>;
|
||||
|
||||
code.CallLambda(
|
||||
[](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
|
||||
[&](T expected) -> bool {
|
||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||
})
|
||||
? 0
|
||||
: 1;
|
||||
});
|
||||
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr, [&](T expected) -> bool {
|
||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||
}) ? 0 : 1;
|
||||
});
|
||||
if (ordered) {
|
||||
code.mfence();
|
||||
}
|
||||
|
@ -308,15 +305,11 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
code.movaps(xword[code.ABI_PARAM3], xmm1);
|
||||
code.CallLambda(
|
||||
[](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr,
|
||||
[&](Vector expected) -> bool {
|
||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||
})
|
||||
? 0
|
||||
: 1;
|
||||
});
|
||||
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 {
|
||||
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr, [&](Vector expected) -> bool {
|
||||
return (conf.callbacks->*callback)(vaddr, value, expected);
|
||||
}) ? 0 : 1;
|
||||
});
|
||||
if (ordered) {
|
||||
code.mfence();
|
||||
}
|
||||
|
@ -437,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
|||
|
||||
SharedLabel end = GenSharedLabel();
|
||||
|
||||
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
|
||||
code.mov(status, u32(1));
|
||||
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
|
||||
code.test(tmp.cvt8(), tmp.cvt8());
|
||||
code.je(*end, code.T_NEAR);
|
||||
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
|
||||
code.cmp(qword[tmp], vaddr);
|
||||
code.jne(*end, code.T_NEAR);
|
||||
|
||||
|
@ -474,30 +468,29 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
|||
|
||||
const auto location = code.getCurr();
|
||||
|
||||
if constexpr (bitsize == 128) {
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.lock();
|
||||
code.cmpxchg(code.byte[dest_ptr], value.cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code.lock();
|
||||
code.cmpxchg(word[dest_ptr], value.cvt16());
|
||||
break;
|
||||
case 32:
|
||||
code.lock();
|
||||
code.cmpxchg(dword[dest_ptr], value.cvt32());
|
||||
break;
|
||||
case 64:
|
||||
code.lock();
|
||||
code.cmpxchg(qword[dest_ptr], value.cvt64());
|
||||
break;
|
||||
case 128:
|
||||
code.lock();
|
||||
code.cmpxchg16b(ptr[dest_ptr]);
|
||||
} else {
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.lock();
|
||||
code.cmpxchg(code.byte[dest_ptr], value.cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code.lock();
|
||||
code.cmpxchg(word[dest_ptr], value.cvt16());
|
||||
break;
|
||||
case 32:
|
||||
code.lock();
|
||||
code.cmpxchg(dword[dest_ptr], value.cvt32());
|
||||
break;
|
||||
case 64:
|
||||
code.lock();
|
||||
code.cmpxchg(qword[dest_ptr], value.cvt64());
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
code.setnz(status.cvt8());
|
||||
|
|
|
@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm right_shift = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm17;
|
||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
||||
code.vpxord(right_shift, right_shift, right_shift);
|
||||
|
@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm right_shift = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm17;
|
||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF));
|
||||
code.vpxorq(right_shift, right_shift, right_shift);
|
||||
|
@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm right_shift = xmm16;
|
||||
const Xbyak::Xmm tmp = xmm17;
|
||||
const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
||||
code.vpxord(right_shift, right_shift, right_shift);
|
||||
|
@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst)
|
|||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX512_Ortho)) {
|
||||
const Xbyak::Xmm c = xmm16;
|
||||
const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
||||
code.vpsraq(c, a, 32);
|
||||
code.vpsllq(a, a, 32);
|
||||
code.vpsraq(a, a, 32);
|
||||
|
@ -5461,7 +5461,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) {
|
|||
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) {
|
||||
const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm masked = xmm16;
|
||||
const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0));
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "dynarmic/backend/x64/reg_alloc.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
|
||||
|
@ -118,7 +119,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept {
|
|||
values.push_back(inst);
|
||||
ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits<uint16_t>::max)());
|
||||
total_uses += inst->UseCount();
|
||||
max_bit_width = std::max<uint8_t>(max_bit_width, GetBitWidth(inst->GetType()));
|
||||
max_bit_width = std::max<uint8_t>(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType())));
|
||||
}
|
||||
|
||||
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept {
|
||||
|
@ -152,19 +153,19 @@ bool Argument::GetImmediateU1() const noexcept {
|
|||
|
||||
u8 Argument::GetImmediateU8() const noexcept {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x100);
|
||||
ASSERT(imm <= u64(std::numeric_limits<u8>::max()));
|
||||
return u8(imm);
|
||||
}
|
||||
|
||||
u16 Argument::GetImmediateU16() const noexcept {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x10000);
|
||||
ASSERT(imm <= u64(std::numeric_limits<u16>::max()));
|
||||
return u16(imm);
|
||||
}
|
||||
|
||||
u32 Argument::GetImmediateU32() const noexcept {
|
||||
const u64 imm = value.GetImmediateAsU64();
|
||||
ASSERT(imm < 0x100000000);
|
||||
ASSERT(imm <= u64(std::numeric_limits<u32>::max()));
|
||||
return u32(imm);
|
||||
}
|
||||
|
||||
|
@ -366,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
|||
if (result_def) {
|
||||
DefineValueImpl(result_def, ABI_RETURN);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
if (args[i]) {
|
||||
UseScratch(*args[i], args_hostloc[i]);
|
||||
} else {
|
||||
ScratchGpr(args_hostloc[i]); // TODO: Force spill
|
||||
}
|
||||
}
|
||||
// Must match with with ScratchImpl
|
||||
for (auto const gpr : other_caller_save) {
|
||||
MoveOutOfTheWay(gpr);
|
||||
LocInfo(gpr).WriteLock();
|
||||
}
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
if (args[i] && !args[i]->get().IsVoid()) {
|
||||
UseScratch(*args[i], args_hostloc[i]);
|
||||
// LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
|
||||
const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
|
||||
switch (args[i]->get().GetType()) {
|
||||
|
@ -389,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < args.size(); i++)
|
||||
if (!args[i]) {
|
||||
// TODO: Force spill
|
||||
ScratchGpr(args_hostloc[i]);
|
||||
}
|
||||
for (auto const caller_saved : other_caller_save)
|
||||
ScratchImpl({caller_saved});
|
||||
}
|
||||
|
||||
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
|
||||
|
@ -559,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept {
|
|||
}
|
||||
|
||||
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
||||
#if 0
|
||||
// TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows
|
||||
// but it's fine anyways. We can find other ways to cheat it later - but which?!?!
|
||||
// we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end?
|
||||
// TODO(lizzie): This needs to be investigated further later.
|
||||
// Do not spill XMM into other XMM silly
|
||||
if (!is_xmm) {
|
||||
/*if (!is_xmm) {
|
||||
// TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY
|
||||
// Intel recommends to spill GPR onto XMM registers IF POSSIBLE
|
||||
// TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call?
|
||||
|
@ -573,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
|||
for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i)
|
||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||
return loc;
|
||||
}
|
||||
#endif
|
||||
}*/
|
||||
// TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits
|
||||
// of spilling on XMM versus the potential cost of using XMM registers.....
|
||||
// Otherwise go to stack spilling
|
||||
for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i)
|
||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <functional>
|
||||
#include <optional>
|
||||
|
||||
#include "boost/container/small_vector.hpp"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
@ -77,13 +78,13 @@ public:
|
|||
return std::find(values.begin(), values.end(), inst) != values.end();
|
||||
}
|
||||
inline size_t GetMaxBitWidth() const noexcept {
|
||||
return max_bit_width;
|
||||
return 1 << max_bit_width;
|
||||
}
|
||||
void AddValue(IR::Inst* inst) noexcept;
|
||||
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept;
|
||||
private:
|
||||
//non trivial
|
||||
std::vector<IR::Inst*> values; //24
|
||||
boost::container::small_vector<IR::Inst*, 3> values; //24
|
||||
// Block state
|
||||
uint16_t total_uses = 0; //8
|
||||
//sometimes zeroed
|
||||
|
@ -93,10 +94,10 @@ private:
|
|||
uint16_t is_being_used_count = 0; //8
|
||||
uint16_t current_references = 0; //8
|
||||
// Value state
|
||||
uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128
|
||||
uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6)
|
||||
uint8_t lru_counter : 2 = 0; //1
|
||||
bool is_scratch : 1 = false; //1
|
||||
bool is_set_last_use : 1 = false; //1
|
||||
alignas(16) uint8_t lru_counter = 0; //1
|
||||
friend class RegAlloc;
|
||||
};
|
||||
static_assert(sizeof(HostLocInfo) == 64);
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
namespace Dynarmic::Common {
|
||||
|
||||
|
||||
} // namespace Dynarmic::Common
|
|
@ -1,61 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
namespace Dynarmic::Common {
|
||||
|
||||
/// @tparam object_size Byte-size of objects to construct
|
||||
/// @tparam slab_size Number of objects to have per slab
|
||||
template<size_t object_size, size_t slab_size>
|
||||
class Pool {
|
||||
public:
|
||||
inline Pool() noexcept {
|
||||
AllocateNewSlab();
|
||||
}
|
||||
inline ~Pool() noexcept {
|
||||
std::free(current_slab);
|
||||
for (char* slab : slabs) {
|
||||
std::free(slab);
|
||||
}
|
||||
}
|
||||
|
||||
Pool(const Pool&) = delete;
|
||||
Pool(Pool&&) = delete;
|
||||
|
||||
Pool& operator=(const Pool&) = delete;
|
||||
Pool& operator=(Pool&&) = delete;
|
||||
|
||||
/// @brief Returns a pointer to an `object_size`-bytes block of memory.
|
||||
[[nodiscard]] void* Alloc() noexcept {
|
||||
if (remaining == 0) {
|
||||
slabs.push_back(current_slab);
|
||||
AllocateNewSlab();
|
||||
}
|
||||
void* ret = static_cast<void*>(current_ptr);
|
||||
current_ptr += object_size;
|
||||
remaining--;
|
||||
return ret;
|
||||
}
|
||||
private:
|
||||
/// @brief Allocates a completely new memory slab.
|
||||
/// Used when an entirely new slab is needed
|
||||
/// due the current one running out of usable space.
|
||||
void AllocateNewSlab() noexcept {
|
||||
current_slab = static_cast<char*>(std::malloc(object_size * slab_size));
|
||||
current_ptr = current_slab;
|
||||
remaining = slab_size;
|
||||
}
|
||||
|
||||
std::vector<char*> slabs;
|
||||
char* current_slab = nullptr;
|
||||
char* current_ptr = nullptr;
|
||||
size_t remaining = 0;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Common
|
|
@ -1,29 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/variant.hpp>
|
||||
|
||||
namespace Dynarmic::Common {
|
||||
namespace detail {
|
||||
|
||||
template<typename ReturnT, typename Lambda>
|
||||
struct VariantVisitor : boost::static_visitor<ReturnT>
|
||||
, Lambda {
|
||||
VariantVisitor(Lambda&& lambda)
|
||||
: Lambda(std::move(lambda)) {}
|
||||
|
||||
using Lambda::operator();
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template<typename ReturnT, typename Variant, typename Lambda>
|
||||
inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) {
|
||||
return boost::apply_visitor(detail::VariantVisitor<ReturnT, Lambda>(std::move(lambda)), variant);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Common
|
|
@ -9,12 +9,9 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/interface/A32/coprocessor_util.h"
|
||||
#include "dynarmic/ir/cond.h"
|
||||
|
||||
|
@ -89,24 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) {
|
|||
|
||||
inline size_t RegNumber(Reg reg) {
|
||||
ASSERT(reg != Reg::INVALID_REG);
|
||||
return static_cast<size_t>(reg);
|
||||
return size_t(reg);
|
||||
}
|
||||
|
||||
inline size_t RegNumber(ExtReg reg) {
|
||||
if (IsSingleExtReg(reg)) {
|
||||
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::S0);
|
||||
return size_t(reg) - size_t(ExtReg::S0);
|
||||
} else if (IsDoubleExtReg(reg)) {
|
||||
return size_t(reg) - size_t(ExtReg::D0);
|
||||
}
|
||||
|
||||
if (IsDoubleExtReg(reg)) {
|
||||
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::D0);
|
||||
}
|
||||
|
||||
if (IsQuadExtReg(reg)) {
|
||||
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::Q0);
|
||||
}
|
||||
|
||||
ASSERT_MSG(false, "Invalid extended register");
|
||||
return 0;
|
||||
ASSERT(IsQuadExtReg(reg));
|
||||
return size_t(reg) - size_t(ExtReg::Q0);
|
||||
}
|
||||
|
||||
inline Reg operator+(Reg reg, size_t number) {
|
||||
|
|
|
@ -30,13 +30,13 @@ template<typename Visitor>
|
|||
using ArmDecodeTable = std::array<std::vector<ArmMatcher<Visitor>>, 0x1000>;
|
||||
|
||||
namespace detail {
|
||||
inline size_t ToFastLookupIndexArm(u32 instruction) {
|
||||
inline size_t ToFastLookupIndexArm(u32 instruction) noexcept {
|
||||
return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0);
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
template<typename V>
|
||||
constexpr ArmDecodeTable<V> GetArmDecodeTable() {
|
||||
constexpr ArmDecodeTable<V> GetArmDecodeTable() noexcept {
|
||||
std::vector<ArmMatcher<V>> list = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
#include "./arm.inc"
|
||||
|
@ -62,15 +62,27 @@ constexpr ArmDecodeTable<V> GetArmDecodeTable() {
|
|||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const ArmMatcher<V>>> DecodeArm(u32 instruction) {
|
||||
std::optional<std::reference_wrapper<const ArmMatcher<V>>> DecodeArm(u32 instruction) noexcept {
|
||||
alignas(64) static const auto table = GetArmDecodeTable<V>();
|
||||
const auto matches_instruction = [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
};
|
||||
|
||||
const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)];
|
||||
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction);
|
||||
return iter != subtable.end() ? std::optional<std::reference_wrapper<const ArmMatcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameARM(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, ArmMatcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./arm.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -26,15 +26,12 @@ template<typename Visitor>
|
|||
using ASIMDMatcher = Decoder::Matcher<Visitor, u32>;
|
||||
|
||||
template<typename V>
|
||||
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
|
||||
std::vector<ASIMDMatcher<V>> table = {
|
||||
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() noexcept {
|
||||
std::vector<std::pair<const char*, ASIMDMatcher<V>>> table = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./asimd.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
// Exceptions to the rule of thumb.
|
||||
const std::set<std::string> comes_first{
|
||||
"VBIC, VMOV, VMVN, VORR (immediate)",
|
||||
|
@ -53,29 +50,43 @@ std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
|
|||
"VQDMULH (scalar)",
|
||||
"VQRDMULH (scalar)",
|
||||
};
|
||||
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
|
||||
return comes_first.count(matcher.GetName()) > 0;
|
||||
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
|
||||
return comes_first.count(e.first) > 0;
|
||||
});
|
||||
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
|
||||
return comes_last.count(matcher.GetName()) == 0;
|
||||
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
|
||||
return comes_last.count(e.first) == 0;
|
||||
});
|
||||
|
||||
// If a matcher has more bits in its mask it is more specific, so it should come first.
|
||||
std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) {
|
||||
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
|
||||
std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) {
|
||||
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
|
||||
});
|
||||
|
||||
return table;
|
||||
std::vector<ASIMDMatcher<V>> final_table;
|
||||
std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) {
|
||||
return e.second;
|
||||
});
|
||||
return final_table;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const ASIMDMatcher<V>>> DecodeASIMD(u32 instruction) {
|
||||
static const auto table = GetASIMDDecodeTable<V>();
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
std::optional<std::reference_wrapper<const ASIMDMatcher<V>>> DecodeASIMD(u32 instruction) noexcept {
|
||||
alignas(64) static const auto table = GetASIMDDecodeTable<V>();
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const ASIMDMatcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameASIMD(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, ASIMDMatcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./asimd.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher<Visitor, u16>;
|
|||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const Thumb16Matcher<V>>> DecodeThumb16(u16 instruction) {
|
||||
static const std::vector<Thumb16Matcher<V>> table = {
|
||||
|
||||
alignas(64) static const std::vector<Thumb16Matcher<V>> table = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)),
|
||||
#include "./thumb16.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb16Matcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameThumb16(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, Thumb16Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) },
|
||||
#include "./thumb16.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher<Visitor, u32>;
|
|||
|
||||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const Thumb32Matcher<V>>> DecodeThumb32(u32 instruction) {
|
||||
static const std::vector<Thumb32Matcher<V>> table = {
|
||||
|
||||
alignas(64) static const std::vector<Thumb32Matcher<V>> table = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
#include "./thumb32.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const Thumb32Matcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameThumb32(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, Thumb32Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./thumb32.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -26,36 +26,42 @@ using VFPMatcher = Decoder::Matcher<Visitor, u32>;
|
|||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const VFPMatcher<V>>> DecodeVFP(u32 instruction) {
|
||||
using Table = std::vector<VFPMatcher<V>>;
|
||||
|
||||
static const struct Tables {
|
||||
alignas(64) static const struct Tables {
|
||||
Table unconditional;
|
||||
Table conditional;
|
||||
} tables = [] {
|
||||
} tables = []() {
|
||||
Table list = {
|
||||
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
#include "./vfp.inc"
|
||||
#undef INST
|
||||
|
||||
};
|
||||
|
||||
const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
|
||||
auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
|
||||
return (matcher.GetMask() & 0xF0000000) == 0xF0000000;
|
||||
});
|
||||
|
||||
return Tables{
|
||||
Table{list.begin(), division},
|
||||
Table{division, list.end()},
|
||||
Table{list.begin(), it},
|
||||
Table{it, list.end()},
|
||||
};
|
||||
}();
|
||||
|
||||
const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000;
|
||||
const Table& table = is_unconditional ? tables.unconditional : tables.conditional;
|
||||
|
||||
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
|
||||
|
||||
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
|
||||
auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != table.end() ? std::optional<std::reference_wrapper<const VFPMatcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetNameVFP(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, VFPMatcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./vfp.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) {
|
|||
return 0xF7F0A000; // UDF
|
||||
}
|
||||
|
||||
bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) {
|
||||
inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept {
|
||||
return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000;
|
||||
}
|
||||
|
||||
|
|
|
@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) {
|
|||
|
||||
template<typename V>
|
||||
constexpr DecodeTable<V> GetDecodeTable() {
|
||||
std::vector<Matcher<V>> list = {
|
||||
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
|
||||
std::vector<std::pair<const char*, Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./a64.inc"
|
||||
#undef INST
|
||||
};
|
||||
|
||||
// If a matcher has more bits in its mask it is more specific, so it should come first.
|
||||
std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) {
|
||||
std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) {
|
||||
// If a matcher has more bits in its mask it is more specific, so it should come first.
|
||||
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
|
||||
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
|
||||
});
|
||||
|
||||
// Exceptions to the above rule of thumb.
|
||||
std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
|
||||
std::stable_partition(list.begin(), list.end(), [&](const auto& e) {
|
||||
return std::set<std::string>{
|
||||
"MOVI, MVNI, ORR, BIC (vector, immediate)",
|
||||
"FMOV (vector, immediate)",
|
||||
"Unallocated SIMD modified immediate",
|
||||
}.count(matcher.GetName()) > 0;
|
||||
}.count(e.first) > 0;
|
||||
});
|
||||
|
||||
DecodeTable<V> table{};
|
||||
for (size_t i = 0; i < table.size(); ++i) {
|
||||
for (auto matcher : list) {
|
||||
const auto expect = detail::ToFastLookupIndex(matcher.GetExpected());
|
||||
const auto mask = detail::ToFastLookupIndex(matcher.GetMask());
|
||||
for (auto const& e : list) {
|
||||
const auto expect = detail::ToFastLookupIndex(e.second.GetExpected());
|
||||
const auto mask = detail::ToFastLookupIndex(e.second.GetMask());
|
||||
if ((i & mask) == expect) {
|
||||
table[i].push_back(matcher);
|
||||
table[i].push_back(e.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -74,12 +71,24 @@ constexpr DecodeTable<V> GetDecodeTable() {
|
|||
template<typename V>
|
||||
std::optional<std::reference_wrapper<const Matcher<V>>> Decode(u32 instruction) {
|
||||
alignas(64) static const auto table = GetDecodeTable<V>();
|
||||
const auto matches_instruction = [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
};
|
||||
const auto& subtable = table[detail::ToFastLookupIndex(instruction)];
|
||||
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction);
|
||||
auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) {
|
||||
return matcher.Matches(instruction);
|
||||
});
|
||||
return iter != subtable.end() ? std::optional<std::reference_wrapper<const Matcher<V>>>(*iter) : std::nullopt;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
std::optional<std::string_view> GetName(u32 inst) noexcept {
|
||||
std::vector<std::pair<std::string_view, Matcher<V>>> list = {
|
||||
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
|
||||
#include "./a64.inc"
|
||||
#undef INST
|
||||
};
|
||||
auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) {
|
||||
return m.second.Matches(inst);
|
||||
});
|
||||
return iter != list.cend() ? std::optional{iter->first} : std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A64
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
@ -20,9 +23,12 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) {
|
|||
bool TranslatorVisitor::B_uncond(Imm<26> imm26) {
|
||||
const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend<s64>();
|
||||
const u64 target = ir.PC() + offset;
|
||||
|
||||
//ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)});
|
||||
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)});
|
||||
// Pattern to halt execution (B .)
|
||||
if (target == ir.PC()) {
|
||||
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)});
|
||||
return false;
|
||||
}
|
||||
ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)});
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -70,11 +70,9 @@ struct detail {
|
|||
return std::make_tuple(mask, expect);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the masks and shifts for each argument.
|
||||
* A '-' in a bitstring indicates that we don't care about that value.
|
||||
* An argument is specified by a continuous string of the same character.
|
||||
*/
|
||||
/// @brief Generates the masks and shifts for each argument.
|
||||
/// A '-' in a bitstring indicates that we don't care about that value.
|
||||
/// An argument is specified by a continuous string of the same character.
|
||||
template<size_t N>
|
||||
static consteval auto GetArgInfo(std::array<char, opcode_bitsize> bitstring) {
|
||||
std::array<opcode_type, N> masks = {};
|
||||
|
@ -98,7 +96,6 @@ struct detail {
|
|||
|
||||
if constexpr (N > 0) {
|
||||
const size_t bit_position = opcode_bitsize - i - 1;
|
||||
|
||||
if (arg_index >= N)
|
||||
throw std::out_of_range("Unexpected field");
|
||||
|
||||
|
@ -109,20 +106,16 @@ struct detail {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__)
|
||||
// Avoids a MSVC ICE, and avoids Android NDK issue.
|
||||
ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; }));
|
||||
#endif
|
||||
|
||||
return std::make_tuple(masks, shifts);
|
||||
}
|
||||
|
||||
/**
|
||||
* This struct's Make member function generates a lambda which decodes an instruction based on
|
||||
* the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a
|
||||
* template argument.
|
||||
*/
|
||||
/// @brief This struct's Make member function generates a lambda which decodes an instruction
|
||||
/// based on the provided arg_masks and arg_shifts. The Visitor member function to call is
|
||||
/// provided as a template argument.
|
||||
template<typename FnT>
|
||||
struct VisitorCaller;
|
||||
|
||||
|
@ -130,36 +123,36 @@ struct detail {
|
|||
# pragma warning(push)
|
||||
# pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning)
|
||||
#endif
|
||||
template<typename Visitor, typename... Args, typename CallRetT>
|
||||
struct VisitorCaller<CallRetT (Visitor::*)(Args...)> {
|
||||
template<typename V, typename... Args, typename ReturnType>
|
||||
struct VisitorCaller<ReturnType (V::*)(Args...)> {
|
||||
template<size_t... iota>
|
||||
static auto Make(std::integer_sequence<size_t, iota...>,
|
||||
CallRetT (Visitor::*const fn)(Args...),
|
||||
static constexpr auto Make(std::integer_sequence<size_t, iota...>,
|
||||
ReturnType (V::*const fn)(Args...),
|
||||
const std::array<opcode_type, sizeof...(iota)> arg_masks,
|
||||
const std::array<size_t, sizeof...(iota)> arg_shifts) {
|
||||
static_assert(std::is_same_v<visitor_type, Visitor>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) {
|
||||
static_assert(std::is_same_v<visitor_type, V>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) {
|
||||
(void)instruction;
|
||||
(void)arg_masks;
|
||||
(void)arg_shifts;
|
||||
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Visitor, typename... Args, typename CallRetT>
|
||||
struct VisitorCaller<CallRetT (Visitor::*)(Args...) const> {
|
||||
template<typename V, typename... Args, typename ReturnType>
|
||||
struct VisitorCaller<ReturnType (V::*)(Args...) const> {
|
||||
template<size_t... iota>
|
||||
static auto Make(std::integer_sequence<size_t, iota...>,
|
||||
CallRetT (Visitor::*const fn)(Args...) const,
|
||||
static constexpr auto Make(std::integer_sequence<size_t, iota...>,
|
||||
ReturnType (V::*const fn)(Args...) const,
|
||||
const std::array<opcode_type, sizeof...(iota)> arg_masks,
|
||||
const std::array<size_t, sizeof...(iota)> arg_shifts) {
|
||||
static_assert(std::is_same_v<visitor_type, const Visitor>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) {
|
||||
static_assert(std::is_same_v<visitor_type, const V>, "Member function is not from Matcher's Visitor");
|
||||
return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) {
|
||||
(void)instruction;
|
||||
(void)arg_masks;
|
||||
(void)arg_shifts;
|
||||
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
@ -167,27 +160,21 @@ struct detail {
|
|||
# pragma warning(pop)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Creates a matcher that can match and parse instructions based on bitstring.
|
||||
* See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
|
||||
*/
|
||||
template<auto bitstring, typename FnT>
|
||||
static auto GetMatcher(FnT fn, const char* const name) {
|
||||
constexpr size_t args_count = mcl::parameter_count_v<FnT>;
|
||||
|
||||
/// @brief Creates a matcher that can match and parse instructions based on bitstring.
|
||||
/// See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
|
||||
template<auto bitstring, typename F>
|
||||
static constexpr auto GetMatcher(F fn) {
|
||||
constexpr size_t args_count = mcl::parameter_count_v<F>;
|
||||
constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring));
|
||||
constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring));
|
||||
constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring));
|
||||
constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring));
|
||||
|
||||
using Iota = std::make_index_sequence<args_count>;
|
||||
|
||||
const auto proxy_fn = VisitorCaller<FnT>::Make(Iota(), fn, arg_masks, arg_shifts);
|
||||
return MatcherT(name, mask, expect, proxy_fn);
|
||||
const auto proxy_fn = VisitorCaller<F>::Make(std::make_index_sequence<args_count>(), fn, arg_masks, arg_shifts);
|
||||
return MatcherT(mask, expect, proxy_fn);
|
||||
}
|
||||
};
|
||||
|
||||
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn, name)
|
||||
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn)
|
||||
|
||||
} // namespace detail
|
||||
} // namespace Dynarmic::Decoder
|
||||
|
|
|
@ -14,16 +14,12 @@
|
|||
|
||||
namespace Dynarmic::Decoder {
|
||||
|
||||
/**
|
||||
* Generic instruction handling construct.
|
||||
*
|
||||
* @tparam Visitor An arbitrary visitor type that will be passed through
|
||||
* to the function being handled. This type must be the
|
||||
* type of the first parameter in a handler function.
|
||||
*
|
||||
* @tparam OpcodeType Type representing an opcode. This must be the
|
||||
* type of the second parameter in a handler function.
|
||||
*/
|
||||
/// Generic instruction handling construct.
|
||||
/// @tparam Visitor An arbitrary visitor type that will be passed through
|
||||
/// to the function being handled. This type must be the
|
||||
/// type of the first parameter in a handler function.
|
||||
/// @tparam OpcodeType Type representing an opcode. This must be the
|
||||
/// type of the second parameter in a handler function.
|
||||
template<typename Visitor, typename OpcodeType>
|
||||
class Matcher {
|
||||
public:
|
||||
|
@ -31,46 +27,35 @@ public:
|
|||
using visitor_type = Visitor;
|
||||
using handler_return_type = typename Visitor::instruction_return_type;
|
||||
using handler_function = std::function<handler_return_type(Visitor&, opcode_type)>;
|
||||
|
||||
Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func)
|
||||
: name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {}
|
||||
|
||||
/// Gets the name of this type of instruction.
|
||||
const char* GetName() const {
|
||||
return name;
|
||||
}
|
||||
Matcher(opcode_type mask, opcode_type expected, handler_function func)
|
||||
: mask{mask}, expected{expected}, fn{std::move(func)} {}
|
||||
|
||||
/// Gets the mask for this instruction.
|
||||
opcode_type GetMask() const {
|
||||
inline opcode_type GetMask() const noexcept {
|
||||
return mask;
|
||||
}
|
||||
|
||||
/// Gets the expected value after masking for this instruction.
|
||||
opcode_type GetExpected() const {
|
||||
inline opcode_type GetExpected() const noexcept {
|
||||
return expected;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests to see if the given instruction is the instruction this matcher represents.
|
||||
* @param instruction The instruction to test
|
||||
* @returns true if the given instruction matches.
|
||||
*/
|
||||
bool Matches(opcode_type instruction) const {
|
||||
/// Tests to see if the given instruction is the instruction this matcher represents.
|
||||
/// @param instruction The instruction to test
|
||||
/// @returns true if the given instruction matches.
|
||||
inline bool Matches(opcode_type instruction) const noexcept {
|
||||
return (instruction & mask) == expected;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls the corresponding instruction handler on visitor for this type of instruction.
|
||||
* @param v The visitor to use
|
||||
* @param instruction The instruction to decode.
|
||||
*/
|
||||
handler_return_type call(Visitor& v, opcode_type instruction) const {
|
||||
/// Calls the corresponding instruction handler on visitor for this type of instruction.
|
||||
/// @param v The visitor to use
|
||||
/// @param instruction The instruction to decode.
|
||||
inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept {
|
||||
ASSERT(Matches(instruction));
|
||||
return fn(v, instruction);
|
||||
}
|
||||
|
||||
private:
|
||||
const char* name;
|
||||
opcode_type mask;
|
||||
opcode_type expected;
|
||||
handler_function fn;
|
||||
|
|
|
@ -15,8 +15,6 @@
|
|||
|
||||
#include <fmt/format.h>
|
||||
#include "dynarmic/common/assert.h"
|
||||
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/frontend/A64/a64_types.h"
|
||||
#include "dynarmic/ir/cond.h"
|
||||
|
@ -27,8 +25,7 @@ namespace Dynarmic::IR {
|
|||
Block::Block(const LocationDescriptor& location)
|
||||
: location{location},
|
||||
end_location{location},
|
||||
cond{Cond::AL},
|
||||
instruction_alloc_pool{std::make_unique<std::remove_reference_t<decltype(*instruction_alloc_pool)>>()}
|
||||
cond{Cond::AL}
|
||||
{
|
||||
|
||||
}
|
||||
|
@ -40,7 +37,21 @@ Block::Block(const LocationDescriptor& location)
|
|||
/// @param args A sequence of Value instances used as arguments for the instruction.
|
||||
/// @returns Iterator to the newly created instruction.
|
||||
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
|
||||
IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode);
|
||||
// First try using the "inline" buffer, otherwise fallback to a slower slab-like allocation scheme
|
||||
// purpouse is to avoid many calls to new/delete which invoke malloc which invokes mmap
|
||||
// just pool it!!! - reason why there is an inline buffer is because many small blocks are created
|
||||
// with few instructions due to subpar optimisations on other passes... plus branch-heavy code will
|
||||
// hugely benefit from the coherency of faster allocations...
|
||||
IR::Inst* inst;
|
||||
if (inlined_inst.size() < inlined_inst.max_size()) {
|
||||
inst = &inlined_inst[inlined_inst.size()];
|
||||
inlined_inst.emplace_back(opcode);
|
||||
} else {
|
||||
if (pooled_inst.empty() || pooled_inst.back().size() == pooled_inst.back().max_size())
|
||||
pooled_inst.emplace_back();
|
||||
inst = &pooled_inst.back()[pooled_inst.back().size()];
|
||||
pooled_inst.back().emplace_back(opcode);
|
||||
}
|
||||
DEBUG_ASSERT(args.size() == inst->NumArgs());
|
||||
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
|
||||
inst->SetArg(index, arg);
|
||||
|
|
|
@ -13,6 +13,9 @@
|
|||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
#include <boost/container/container_fwd.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <boost/container/stable_vector.hpp>
|
||||
#include <mcl/container/intrusive_list.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
|
@ -21,7 +24,6 @@
|
|||
#include "dynarmic/ir/terminal.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
#include "dynarmic/ir/dense_list.h"
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
|
||||
|
@ -164,8 +166,12 @@ public:
|
|||
return cycle_count;
|
||||
}
|
||||
private:
|
||||
/// "Hot cache" for small blocks so we don't call global allocator
|
||||
boost::container::static_vector<Inst, 14> inlined_inst;
|
||||
/// List of instructions in this block.
|
||||
instruction_list_type instructions;
|
||||
/// "Long/far" memory pool
|
||||
boost::container::stable_vector<boost::container::static_vector<Inst, 32>> pooled_inst;
|
||||
/// Block to execute next if `cond` did not pass.
|
||||
std::optional<LocationDescriptor> cond_failed = {};
|
||||
/// Description of the starting location of this block
|
||||
|
@ -174,8 +180,6 @@ private:
|
|||
LocationDescriptor end_location;
|
||||
/// Conditional to pass in order to execute this block
|
||||
Cond cond;
|
||||
/// Memory pool for instruction list
|
||||
std::unique_ptr<Common::Pool<sizeof(Inst), 2097152UL / sizeof(Inst)>> instruction_alloc_pool;
|
||||
/// Terminal instruction of this block.
|
||||
Terminal terminal = Term::Invalid{};
|
||||
/// Number of cycles this block takes to execute if the conditional fails.
|
||||
|
@ -183,6 +187,7 @@ private:
|
|||
/// Number of cycles this block takes to execute.
|
||||
size_t cycle_count = 0;
|
||||
};
|
||||
static_assert(sizeof(Block) == 2048);
|
||||
|
||||
/// Returns a string representation of the contents of block. Intended for debugging.
|
||||
std::string DumpBlock(const IR::Block& block) noexcept;
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/ir/ir_emitter.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <mcl/bit_cast.hpp>
|
||||
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
|
||||
|
||||
} // namespace Dynarmic::IR
|
|
@ -57,7 +57,7 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") {
|
|||
INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction);
|
||||
INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect);
|
||||
INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x);
|
||||
//INFO("Name: " << *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction));
|
||||
INFO("Name: " << *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction));
|
||||
INFO("iserr: " << iserr);
|
||||
//INFO("alternative: " << alternative->GetName());
|
||||
INFO("altiserr: " << altiserr);
|
||||
|
|
|
@ -40,18 +40,18 @@
|
|||
using namespace Dynarmic;
|
||||
|
||||
std::string_view GetNameOfA32Instruction(u32 instruction) {
|
||||
//if (auto const vfp_decoder = A32::DecodeVFP<A32::TranslatorVisitor>(instruction))
|
||||
// return *A32::GetNameVFP<A32::TranslatorVisitor>(instruction);
|
||||
//else if (auto const asimd_decoder = A32::DecodeASIMD<A32::TranslatorVisitor>(instruction))
|
||||
// return *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction);
|
||||
//else if (auto const decoder = A32::DecodeArm<A32::TranslatorVisitor>(instruction))
|
||||
// return *A32::GetNameARM<A32::TranslatorVisitor>(instruction);
|
||||
if (auto const vfp_decoder = A32::DecodeVFP<A32::TranslatorVisitor>(instruction))
|
||||
return *A32::GetNameVFP<A32::TranslatorVisitor>(instruction);
|
||||
else if (auto const asimd_decoder = A32::DecodeASIMD<A32::TranslatorVisitor>(instruction))
|
||||
return *A32::GetNameASIMD<A32::TranslatorVisitor>(instruction);
|
||||
else if (auto const decoder = A32::DecodeArm<A32::TranslatorVisitor>(instruction))
|
||||
return *A32::GetNameARM<A32::TranslatorVisitor>(instruction);
|
||||
return "<null>";
|
||||
}
|
||||
|
||||
std::string_view GetNameOfA64Instruction(u32 instruction) {
|
||||
//if (auto const decoder = A64::Decode<A64::TranslatorVisitor>(instruction))
|
||||
// return *A64::GetName<A64::TranslatorVisitor>(instruction);
|
||||
if (auto const decoder = A64::Decode<A64::TranslatorVisitor>(instruction))
|
||||
return *A64::GetName<A64::TranslatorVisitor>(instruction);
|
||||
return "<null>";
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue