[dynarmic] provide backing fastmem, set verbose mov output to qword (appropriatedly), safer conversion for reads/writes

This commit is contained in:
lizzie 2025-07-27 06:29:25 +01:00 committed by crueter
parent 6145bf28cd
commit 620ceb9ce9
9 changed files with 114 additions and 75 deletions

View file

@ -127,10 +127,10 @@ protected:
BlockRangeInformation<u64> block_ranges;
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
ankerl::unordered_dense::map<u64, FastmemPatchInfo> fastmem_patch_info;
std::map<std::tuple<bool, size_t, int, int>, void (*)()> read_fallbacks;
std::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
std::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
std::set<DoNotFastmemMarker> do_not_fastmem;
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> read_fallbacks;
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
ankerl::unordered_dense::set<DoNotFastmemMarker> do_not_fastmem;
const void* terminal_handler_pop_rsb_hint = nullptr;
const void* terminal_handler_fast_dispatch_hint = nullptr;
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;

View file

@ -118,7 +118,7 @@ void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) {
code.movaps(xword[rsp + offsetof(RegisterData, xmms) + 2 * sizeof(u64) * i], Xbyak::Xmm{i});
}
code.lea(rax, ptr[rsp + sizeof(RegisterData) + offsetof(StackLayout, spill)]);
code.mov(xword[rsp + offsetof(RegisterData, spill)], rax);
code.mov(qword[rsp + offsetof(RegisterData, spill)], rax);
reg_alloc.EmitVerboseDebuggingOutput();

View file

@ -28,27 +28,24 @@ std::optional<AxxEmitX64::DoNotFastmemMarker> AxxEmitX64::ShouldFastmem(AxxEmitC
FakeCall AxxEmitX64::FastmemCallback(u64 rip_) {
const auto iter = fastmem_patch_info.find(rip_);
if (iter == fastmem_patch_info.end()) {
if (iter != fastmem_patch_info.end()) {
FakeCall result{
.call_rip = iter->second.callback,
.ret_rip = iter->second.resume_rip,
};
if (iter->second.recompile) {
const auto marker = iter->second.marker;
do_not_fastmem.insert(marker);
InvalidateBasicBlocks({std::get<0>(marker)});
}
return result;
} else {
fmt::print("dynarmic: Segfault happened within JITted code at rip = {:016x}\n", rip_);
fmt::print("Segfault wasn't at a fastmem patch location!\n");
fmt::print("Now dumping code.......\n\n");
Common::DumpDisassembledX64((void*)(rip_ & ~u64(0xFFF)), 0x1000);
ASSERT_FALSE("iter != fastmem_patch_info.end()");
}
FakeCall result{
.call_rip = iter->second.callback,
.ret_rip = iter->second.resume_rip,
};
if (iter->second.recompile) {
const auto marker = iter->second.marker;
do_not_fastmem.insert(marker);
InvalidateBasicBlocks({std::get<0>(marker)});
}
return result;
}
template<std::size_t bitsize, auto callback>
@ -95,7 +92,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
if (fastmem_marker) {
// Use fastmem
bool require_abort_handling;
bool require_abort_handling = false;
const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling);
const auto location = EmitReadMemoryMov<bitsize>(code, value_idx, src_ptr, ordered);
@ -182,7 +179,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
if (fastmem_marker) {
// Use fastmem
bool require_abort_handling;
bool require_abort_handling = false;
const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling);
const auto location = EmitWriteMemoryMov<bitsize>(code, dest_ptr, value_idx, ordered);

View file

@ -202,7 +202,7 @@ template<std::size_t bitsize>
const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::RegExp& addr, bool ordered) {
if (ordered) {
if constexpr (bitsize != 128) {
code.xor_(Xbyak::Reg32{value_idx}, Xbyak::Reg32{value_idx});
code.xor_(Xbyak::Reg32(value_idx), Xbyak::Reg32(value_idx));
} else {
code.xor_(eax, eax);
code.xor_(ebx, ebx);
@ -214,30 +214,30 @@ const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::Reg
switch (bitsize) {
case 8:
code.lock();
code.xadd(code.byte[addr], Xbyak::Reg32{value_idx}.cvt8());
code.xadd(code.byte[addr], Xbyak::Reg32(value_idx).cvt8());
break;
case 16:
code.lock();
code.xadd(word[addr], Xbyak::Reg16{value_idx});
code.xadd(word[addr], Xbyak::Reg64(value_idx).cvt16());
break;
case 32:
code.lock();
code.xadd(dword[addr], Xbyak::Reg32{value_idx});
code.xadd(dword[addr], Xbyak::Reg64(value_idx).cvt32());
break;
case 64:
code.lock();
code.xadd(qword[addr], Xbyak::Reg64{value_idx});
code.xadd(qword[addr], Xbyak::Reg64(value_idx));
break;
case 128:
code.lock();
code.cmpxchg16b(xword[addr]);
if (code.HasHostFeature(HostFeature::SSE41)) {
code.movq(Xbyak::Xmm{value_idx}, rax);
code.pinsrq(Xbyak::Xmm{value_idx}, rdx, 1);
code.movq(Xbyak::Xmm(value_idx), rax);
code.pinsrq(Xbyak::Xmm(value_idx), rdx, 1);
} else {
code.movq(Xbyak::Xmm{value_idx}, rax);
code.movq(Xbyak::Xmm(value_idx), rax);
code.movq(xmm0, rdx);
code.punpcklqdq(Xbyak::Xmm{value_idx}, xmm0);
code.punpcklqdq(Xbyak::Xmm(value_idx), xmm0);
}
break;
default:
@ -249,19 +249,19 @@ const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::Reg
const void* fastmem_location = code.getCurr();
switch (bitsize) {
case 8:
code.movzx(Xbyak::Reg32{value_idx}, code.byte[addr]);
code.movzx(Xbyak::Reg64(value_idx).cvt32(), code.byte[addr]);
break;
case 16:
code.movzx(Xbyak::Reg32{value_idx}, word[addr]);
code.movzx(Xbyak::Reg64(value_idx).cvt32(), word[addr]);
break;
case 32:
code.mov(Xbyak::Reg32{value_idx}, dword[addr]);
code.mov(Xbyak::Reg64(value_idx).cvt32(), dword[addr]);
break;
case 64:
code.mov(Xbyak::Reg64{value_idx}, qword[addr]);
code.mov(Xbyak::Reg64(value_idx), qword[addr]);
break;
case 128:
code.movups(Xbyak::Xmm{value_idx}, xword[addr]);
code.movups(Xbyak::Xmm(value_idx), xword[addr]);
break;
default:
ASSERT_FALSE("Invalid bitsize");
@ -276,10 +276,10 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int
code.xor_(eax, eax);
code.xor_(edx, edx);
if (code.HasHostFeature(HostFeature::SSE41)) {
code.movq(rbx, Xbyak::Xmm{value_idx});
code.pextrq(rcx, Xbyak::Xmm{value_idx}, 1);
code.movq(rbx, Xbyak::Xmm(value_idx));
code.pextrq(rcx, Xbyak::Xmm(value_idx), 1);
} else {
code.movaps(xmm0, Xbyak::Xmm{value_idx});
code.movaps(xmm0, Xbyak::Xmm(value_idx));
code.movq(rbx, xmm0);
code.punpckhqdq(xmm0, xmm0);
code.movq(rcx, xmm0);
@ -289,16 +289,16 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int
const void* fastmem_location = code.getCurr();
switch (bitsize) {
case 8:
code.xchg(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8());
code.xchg(code.byte[addr], Xbyak::Reg64(value_idx).cvt8());
break;
case 16:
code.xchg(word[addr], Xbyak::Reg16{value_idx});
code.xchg(word[addr], Xbyak::Reg64(value_idx).cvt16());
break;
case 32:
code.xchg(dword[addr], Xbyak::Reg32{value_idx});
code.xchg(dword[addr], Xbyak::Reg64(value_idx).cvt32());
break;
case 64:
code.xchg(qword[addr], Xbyak::Reg64{value_idx});
code.xchg(qword[addr], Xbyak::Reg64(value_idx));
break;
case 128: {
Xbyak::Label loop;
@ -317,19 +317,19 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int
const void* fastmem_location = code.getCurr();
switch (bitsize) {
case 8:
code.mov(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8());
code.mov(code.byte[addr], Xbyak::Reg64(value_idx).cvt8());
break;
case 16:
code.mov(word[addr], Xbyak::Reg16{value_idx});
code.mov(word[addr], Xbyak::Reg64(value_idx).cvt16());
break;
case 32:
code.mov(dword[addr], Xbyak::Reg32{value_idx});
code.mov(dword[addr], Xbyak::Reg64(value_idx).cvt32());
break;
case 64:
code.mov(qword[addr], Xbyak::Reg64{value_idx});
code.mov(qword[addr], Xbyak::Reg64(value_idx));
break;
case 128:
code.movups(xword[addr], Xbyak::Xmm{value_idx});
code.movups(xword[addr], Xbyak::Xmm(value_idx));
break;
default:
ASSERT_FALSE("Invalid bitsize");

View file

@ -109,6 +109,7 @@ inline size_t HostLocBitWidth(HostLoc loc) {
using HostLocList = std::initializer_list<HostLoc>;
// RSP is preserved for function calls
// R13 contains fastmem pointer if any
// R15 contains the JitState pointer
const HostLocList any_gpr = {
HostLoc::RAX,

View file

@ -109,13 +109,11 @@ bool TranslatorVisitor::arm_LDR_imm(Cond cond, bool P, bool U, bool W, Reg n, Re
if (t == Reg::PC) {
ir.LoadWritePC(data);
if (!P && W && n == Reg::R13) {
ir.SetTerm(IR::Term::PopRSBHint{});
} else {
ir.SetTerm(IR::Term::FastDispatchHint{});
}
return false;
}
@ -145,7 +143,11 @@ bool TranslatorVisitor::arm_LDR_reg(Cond cond, bool P, bool U, bool W, Reg n, Re
if (t == Reg::PC) {
ir.LoadWritePC(data);
ir.SetTerm(IR::Term::FastDispatchHint{});
if (!P && W && n == Reg::R13) {
ir.SetTerm(IR::Term::PopRSBHint{});
} else {
ir.SetTerm(IR::Term::FastDispatchHint{});
}
return false;
}

View file

@ -116,6 +116,8 @@ std::string DumpBlock(const IR::Block& block) noexcept {
return fmt::format("#{:#x}", arg.GetU32());
case Type::U64:
return fmt::format("#{:#x}", arg.GetU64());
case Type::U128:
return fmt::format("#<u128 imm>");
case Type::A32Reg:
return A32::RegToString(arg.GetA32RegRef());
case Type::A32ExtReg:
@ -124,8 +126,18 @@ std::string DumpBlock(const IR::Block& block) noexcept {
return A64::RegToString(arg.GetA64RegRef());
case Type::A64Vec:
return A64::VecToString(arg.GetA64VecRef());
case Type::CoprocInfo:
return fmt::format("#<coproc>");
case Type::NZCVFlags:
return fmt::format("#<NZCV flags>");
case Type::Cond:
return fmt::format("#<cond={}>", A32::CondToString(arg.GetCond()));
case Type::Table:
return fmt::format("#<table>");
case Type::AccType:
return fmt::format("#<acc-type={}>", u32(arg.GetAccType()));
default:
return "<unknown immediate type>";
return fmt::format("<unknown immediate type {}>", arg.GetType());
}
};

File diff suppressed because one or more lines are too long

View file

@ -133,6 +133,7 @@ class A64FastmemTestEnv final : public Dynarmic::A64::UserCallbacks {
public:
u64 ticks_left = 0;
char* backing_memory = nullptr;
bool ignore_invalid_insn = false;
explicit A64FastmemTestEnv(char* addr)
: backing_memory(addr) {}
@ -205,7 +206,7 @@ public:
return true;
}
void InterpreterFallback(u64 pc, size_t num_instructions) override { ASSERT_MSG(false, "InterpreterFallback({:016x}, {})", pc, num_instructions); }
void InterpreterFallback(u64 pc, size_t num_instructions) override { ASSERT_MSG(ignore_invalid_insn, "InterpreterFallback({:016x}, {})", pc, num_instructions); }
void CallSVC(std::uint32_t swi) override { ASSERT_MSG(false, "CallSVC({})", swi); }