[dynarmic] jit fix branch v2 #203

Merged
crueter merged 27 commits from dynarmic-v2 into master 2025-08-27 06:49:51 +02:00
7 changed files with 120 additions and 68 deletions
Showing only changes of commit 040214ecd4 - Show all commits

View file

@ -1165,32 +1165,28 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
code.ReturnFromRunCode();
return;
}
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJg(terminal.next);
}
} else {
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
patch_information[terminal.next].jz.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJg(terminal.next);
}
} else {
EmitPatchJz(terminal.next);
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
patch_information[terminal.next].jz.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJz(terminal.next);
}
}
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
PushRSBHelper(rax, rbx, terminal.next);
code.ForceReturnFromRunCode();
}
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
PushRSBHelper(rax, rbx, terminal.next);
code.ForceReturnFromRunCode();
}
void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
@ -1199,14 +1195,13 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::Location
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
code.ReturnFromRunCode();
return;
}
patch_information[terminal.next].jmp.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJmp(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJmp(terminal.next);
patch_information[terminal.next].jmp.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJmp(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJmp(terminal.next);
}
}
}

View file

@ -632,36 +632,33 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescri
}
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) {
// Used for patches and linking
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
return;
}
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJg(terminal.next);
}
} else {
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
patch_information[terminal.next].jz.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJg(terminal.next);
}
} else {
EmitPatchJz(terminal.next);
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
patch_information[terminal.next].jz.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJz(terminal.next);
}
}
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode();
}
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode();
}
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
@ -669,14 +666,13 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::Location
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
return;
}
patch_information[terminal.next].jmp.push_back(code.getCurr());
if (auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJmp(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJmp(terminal.next);
patch_information[terminal.next].jmp.push_back(code.getCurr());
if (auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJmp(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJmp(terminal.next);
}
}
}

View file

@ -92,17 +92,16 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I
: code.GetReturnFromRunCodeAddress();
code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]);
code.mov(loc_desc_reg, target.Value());
patch_information[target].mov_rcx.push_back(code.getCurr());
EmitPatchMovRcx(target_code_ptr);
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
code.add(index_reg.cvt32(), 1);
code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask));
// Byte size hack
DEBUG_ASSERT(code.GetJitStateInfo().rsb_ptr_mask <= 0xFF);
code.add(index_reg.cvt32(), 1); //flags trashed, 1 single byte, haswell doesn't care
code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask)); //trashes flags
// Results ready and sort by least needed: give OOO some break
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
}

View file

@ -909,11 +909,11 @@ static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* ca
}
}
// AL contains flags (after LAHF + SETO sequence)
static Xbyak::Reg64 DoNZCV(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* nzcv_out) {
if (!nzcv_out) {
return Xbyak::Reg64{-1};
}
const Xbyak::Reg64 nzcv = reg_alloc.ScratchGpr(HostLoc::RAX);
code.xor_(nzcv.cvt32(), nzcv.cvt32());
return nzcv;
@ -1168,7 +1168,7 @@ void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) {
code.xor_(eax, eax);
code.test(divisor, divisor);
code.jz(end);
code.jz(end, code.T_NEAR);
code.mov(eax, dividend);
code.xor_(edx, edx);
code.div(divisor);
@ -1189,7 +1189,7 @@ void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) {
code.xor_(eax, eax);
code.test(divisor, divisor);
code.jz(end);
code.jz(end, code.T_NEAR);
code.mov(rax, dividend);
code.xor_(edx, edx);
code.div(divisor);

View file

@ -445,6 +445,9 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit,
}
}
// TODO: Why the difference? QEMU what are you doing???
jit.Regs()[15] = uni.GetRegisters()[15];
REQUIRE(uni.GetRegisters() == jit.Regs());
REQUIRE(uni.GetExtRegs() == jit.ExtRegs());
REQUIRE((uni.GetCpsr() & 0xFFFFFDDF) == (jit.Cpsr() & 0xFFFFFDDF));

File diff suppressed because one or more lines are too long

View file

@ -173,7 +173,7 @@ void A64Unicorn::InterruptHook(uc_engine* uc, u32 int_number, void* user_data) {
auto* this_ = static_cast<A64Unicorn*>(user_data);
u32 esr;
CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
//CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR_EL0, &esr));
auto ec = esr >> 26;
auto iss = esr & 0xFFFFFF;