[dynarmic] jit fix branch v2 #203

Merged
crueter merged 27 commits from dynarmic-v2 into master 2025-08-27 06:49:51 +02:00
10 changed files with 77 additions and 78 deletions
Showing only changes of commit f9da343c85 - Show all commits

View file

@ -798,9 +798,9 @@ static u32 GetFpscrImpl(A32JitState* jit_state) {
void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst);
code.mov(code.ABI_PARAM1, code.r15);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.stmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]);
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]);
code.CallFunction(&GetFpscrImpl);
}
@ -811,10 +811,10 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) {
void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, args[0]);
code.mov(code.ABI_PARAM2, code.r15);
code.mov(code.ABI_PARAM2, code.ABI_JIT_PTR);
code.CallFunction(&SetFpscrImpl);
code.ldmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]);
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]);
}
void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {

View file

@ -380,8 +380,8 @@ static u32 GetFPSRImpl(A64JitState* jit_state) {
void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst);
code.mov(code.ABI_PARAM1, code.r15);
code.stmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
code.CallFunction(GetFPSRImpl);
}
@ -467,9 +467,9 @@ static void SetFPCRImpl(A64JitState* jit_state, u32 value) {
void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
code.mov(code.ABI_PARAM1, code.r15);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.CallFunction(SetFPCRImpl);
code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
}
static void SetFPSRImpl(A64JitState* jit_state, u32 value) {
@ -479,9 +479,9 @@ static void SetFPSRImpl(A64JitState* jit_state, u32 value) {
void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
code.mov(code.ABI_PARAM1, code.r15);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.CallFunction(SetFPSRImpl);
code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
}
void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
@ -633,11 +633,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescri
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) {
// Used for patches and linking
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
} else {
if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.push_back(code.getCurr());
@ -658,31 +654,34 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode();
} else {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
}
}
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
} else {
if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
patch_information[terminal.next].jmp.push_back(code.getCurr());
if (auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJmp(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJmp(terminal.next);
}
} else {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
}
}
void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
if (!conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) || is_single_step) {
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
code.jmp(terminal_handler_pop_rsb_hint);
} else {
code.ReturnFromRunCode();
return;
}
code.jmp(terminal_handler_pop_rsb_hint);
}
void A64EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) {

View file

@ -712,12 +712,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
#ifdef _WIN32
code.lea(rsp, ptr[rsp - (16 + ABI_SHADOW_SPACE)]);
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
code.CallFunction(fallback_fn);
code.add(rsp, 16 + ABI_SHADOW_SPACE);
#else
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(fallback_fn);
#endif
code.movq(result, code.ABI_RETURN);
@ -821,12 +821,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
#ifdef _WIN32
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
code.CallFunction(fallback_fn);
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
#else
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(fallback_fn);
#endif
}
@ -945,7 +945,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRecipEstimate<FPT>);
}
@ -968,7 +968,7 @@ static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRecipExponent<FPT>);
}
@ -1026,7 +1026,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.movq(code.ABI_PARAM1, operand1);
code.movq(code.ABI_PARAM2, operand2);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRecipStepFused<FPT>);
code.movq(result, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
@ -1055,7 +1055,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
ctx.reg_alloc.HostCall(inst, args[0], args[1]);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRecipStepFused<FPT>);
}
@ -1119,7 +1119,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]);
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.CallFunction(lut.at(std::make_tuple(fsize, rounding_mode, exact)));
}
@ -1284,7 +1284,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.movq(code.ABI_PARAM1, operand);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
code.movq(result, rax);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
@ -1298,7 +1298,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
}
}
@ -1368,7 +1368,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.movq(code.ABI_PARAM1, operand1);
code.movq(code.ABI_PARAM2, operand2);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtStepFused<FPT>);
code.movq(result, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
@ -1398,7 +1398,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
ctx.reg_alloc.HostCall(inst, args[0], args[1]);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtStepFused<FPT>);
}
@ -1511,7 +1511,7 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u64, u16>);
}
@ -1535,7 +1535,7 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u32, u16>);
}
@ -1556,7 +1556,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u64, u32>);
}
}
@ -1581,7 +1581,7 @@ void EmitX64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u16, u32>);
}
@ -1595,7 +1595,7 @@ void EmitX64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u16, u64>);
}
@ -1616,7 +1616,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u32, u64>);
}
}
@ -1757,7 +1757,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
mp::cartesian_product<fbits_list, rounding_list>{});
ctx.reg_alloc.HostCall(inst, args[0]);
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.CallFunction(lut.at(std::make_tuple(fbits, rounding_mode)));
}

View file

@ -69,7 +69,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
}
} else {
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
}
ctx.reg_alloc.DefineValue(inst, result);
@ -98,7 +98,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
code.setb(overflow.cvt8());
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
ctx.reg_alloc.DefineValue(inst, addend);
}
@ -226,7 +226,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx,
code.cmovns(y, tmp);
code.sets(tmp.cvt8());
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
ctx.reg_alloc.DefineValue(inst, y);
}
@ -250,7 +250,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx,
code.cmovns(y.cvt32(), tmp.cvt32());
code.sets(tmp.cvt8());
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
ctx.reg_alloc.DefineValue(inst, y);
}

View file

@ -110,7 +110,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
ctx.reg_alloc.DefineValue(inst, result);
}
@ -138,7 +138,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
ctx.reg_alloc.DefineValue(inst, result);
}
@ -165,7 +165,7 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code,
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
ctx.reg_alloc.DefineValue(inst, result);
}
@ -4261,7 +4261,7 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo
UNREACHABLE();
}
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, data);
}
@ -4396,7 +4396,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
code.pmovmskb(mask, xmm0);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask);
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], mask);
if (code.HasHostFeature(HostFeature::SSE41)) {
code.pblendvb(result, tmp);
@ -4482,7 +4482,7 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
code.pmovmskb(bit, upper_tmp);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -4533,7 +4533,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext&
code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000));
code.vpxor(result, result, mask);
code.pmovmskb(bit, mask);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.Release(mask);
ctx.reg_alloc.Release(bit);
@ -4589,7 +4589,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext&
code.pcmpeqd(tmp, result);
code.pxor(result, tmp);
code.pmovmskb(bit, tmp);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -4623,7 +4623,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx,
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
code.pmovmskb(bit, y);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, x);
}
@ -4676,7 +4676,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx,
code.pxor(x, y);
code.pmovmskb(bit, y);
}
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, x);
}
@ -4715,7 +4715,7 @@ static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, Block
code.pcmpeqd(reconstructed, src);
code.movmskps(bit, reconstructed);
code.xor_(bit, 0b1111);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, dest);
}
@ -4770,7 +4770,7 @@ static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, Blo
code.pcmpeqd(reconstructed, src);
code.movmskps(bit, reconstructed);
code.xor_(bit, 0b1111);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, dest);
}
@ -4873,7 +4873,7 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo
// Check if any elements matched the mask prior to performing saturation. If so, set the Q bit.
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
code.pmovmskb(bit, tmp);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, zero);
}

View file

@ -450,7 +450,7 @@ void EmitTwoOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
code.mov(code.ABI_PARAM3.cvt32(), fpcr);
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.movaps(xword[code.ABI_PARAM2], arg1);
code.CallFunction(fn);
@ -487,7 +487,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
code.mov(code.ABI_PARAM4.cvt32(), fpcr);
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax);
#else
constexpr u32 stack_space = 3 * 16;
@ -496,7 +496,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
code.mov(code.ABI_PARAM4.cvt32(), fpcr);
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
#endif
code.movaps(xword[code.ABI_PARAM2], arg1);
@ -545,7 +545,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 4 * 16]);
code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR(fpcr_controlled).Value());
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE + 8], rax);
#else
constexpr u32 stack_space = 4 * 16;
@ -555,7 +555,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR(fpcr_controlled).Value());
code.lea(code.ABI_PARAM6, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.lea(code.ABI_PARAM6, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
#endif
if constexpr (load_previous_result == LoadPreviousResult::Yes) {

View file

@ -62,7 +62,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.test(overflow.cvt32(), overflow.cvt32());
}
code.setnz(overflow);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result);
}
@ -104,7 +104,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.ktestb(k1, k1);
code.setnz(overflow);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result);
return;
@ -160,7 +160,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.test(overflow.cvt32(), overflow.cvt32());
}
code.setnz(overflow);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
if (code.HasHostFeature(HostFeature::SSE41)) {
FCODE(blendvp)(result, tmp);
@ -204,7 +204,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.ktestb(k1, k1);
code.setnz(overflow);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result);
return;
@ -263,7 +263,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
}
code.setnz(overflow);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
if constexpr (op == Op::Add) {
code.por(result, tmp);

View file

@ -429,6 +429,7 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
auto it_empty_candidate = desired_locations.cend();
for (auto it = desired_locations.cbegin(); it != desired_locations.cend(); it++) {
auto const& loc_info = LocInfo(*it);
DEBUG_ASSERT(*it != ABI_JIT_PTR);
// Abstain from using upper registers unless absolutely nescesary
if (loc_info.IsLocked()) {
// skip, not suitable for allocation

View file

@ -22,6 +22,7 @@
#include "dynarmic/backend/x64/hostloc.h"
#include "dynarmic/backend/x64/stack_layout.h"
#include "dynarmic/backend/x64/oparg.h"
#include "dynarmic/backend/x64/abi.h"
#include "dynarmic/ir/cond.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/value.h"
@ -245,11 +246,11 @@ private:
HostLoc FindFreeSpill() const noexcept;
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
ASSERT(loc != HostLoc::RSP);
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
return hostloc_info[static_cast<size_t>(loc)];
}
inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept {
ASSERT(loc != HostLoc::RSP);
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
return hostloc_info[static_cast<size_t>(loc)];
}

View file

@ -86,11 +86,9 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept {
}
std::string DumpBlock(const IR::Block& block) noexcept {
std::string ret;
ret += fmt::format("Block: location={}\n", block.Location());
ret += fmt::format("cycles={}", block.CycleCount());
ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
std::string ret = fmt::format("Block: location={}-{}\n", block.Location(), block.EndLocation())
+ fmt::format("cycles={}", block.CycleCount())
+ fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
if (block.GetCondition() != Cond::AL) {
ret += fmt::format(", cond_fail={}", block.ConditionFailedLocation());
}