[dynarmic] jit fix branch v2 #203

Merged
crueter merged 27 commits from dynarmic-v2 into master 2025-08-27 06:49:51 +02:00
10 changed files with 77 additions and 78 deletions
Showing only changes of commit f9da343c85 - Show all commits

View file

@ -798,9 +798,9 @@ static u32 GetFpscrImpl(A32JitState* jit_state) {
void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst); ctx.reg_alloc.HostCall(inst);
code.mov(code.ABI_PARAM1, code.r15); code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.stmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]); code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]);
code.CallFunction(&GetFpscrImpl); code.CallFunction(&GetFpscrImpl);
} }
@ -811,10 +811,10 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) {
void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, args[0]); ctx.reg_alloc.HostCall(nullptr, args[0]);
code.mov(code.ABI_PARAM2, code.r15); code.mov(code.ABI_PARAM2, code.ABI_JIT_PTR);
code.CallFunction(&SetFpscrImpl); code.CallFunction(&SetFpscrImpl);
code.ldmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]); code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]);
} }
void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {

View file

@ -380,8 +380,8 @@ static u32 GetFPSRImpl(A64JitState* jit_state) {
void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst); ctx.reg_alloc.HostCall(inst);
code.mov(code.ABI_PARAM1, code.r15); code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.stmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
code.CallFunction(GetFPSRImpl); code.CallFunction(GetFPSRImpl);
} }
@ -467,9 +467,9 @@ static void SetFPCRImpl(A64JitState* jit_state, u32 value) {
void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]); ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
code.mov(code.ABI_PARAM1, code.r15); code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.CallFunction(SetFPCRImpl); code.CallFunction(SetFPCRImpl);
code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
} }
static void SetFPSRImpl(A64JitState* jit_state, u32 value) { static void SetFPSRImpl(A64JitState* jit_state, u32 value) {
@ -479,9 +479,9 @@ static void SetFPSRImpl(A64JitState* jit_state, u32 value) {
void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]); ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
code.mov(code.ABI_PARAM1, code.r15); code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.CallFunction(SetFPSRImpl); code.CallFunction(SetFPSRImpl);
code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
} }
void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
@ -633,11 +633,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescri
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) { void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) {
// Used for patches and linking // Used for patches and linking
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
} else {
if (conf.enable_cycle_counting) { if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.push_back(code.getCurr()); patch_information[terminal.next].jg.push_back(code.getCurr());
@ -658,31 +654,34 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode(); code.ForceReturnFromRunCode();
} else {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
} }
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) { void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
} else {
patch_information[terminal.next].jmp.push_back(code.getCurr()); patch_information[terminal.next].jmp.push_back(code.getCurr());
if (auto next_bb = GetBasicBlock(terminal.next)) { if (auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJmp(terminal.next, next_bb->entrypoint); EmitPatchJmp(terminal.next, next_bb->entrypoint);
} else { } else {
EmitPatchJmp(terminal.next); EmitPatchJmp(terminal.next);
} }
} else {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
} }
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) { void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
if (!conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) || is_single_step) { if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
code.jmp(terminal_handler_pop_rsb_hint);
} else {
code.ReturnFromRunCode(); code.ReturnFromRunCode();
return;
} }
code.jmp(terminal_handler_pop_rsb_hint);
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) { void A64EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) {

View file

@ -712,12 +712,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
#ifdef _WIN32 #ifdef _WIN32
code.lea(rsp, ptr[rsp - (16 + ABI_SHADOW_SPACE)]); code.lea(rsp, ptr[rsp - (16 + ABI_SHADOW_SPACE)]);
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
code.CallFunction(fallback_fn); code.CallFunction(fallback_fn);
code.add(rsp, 16 + ABI_SHADOW_SPACE); code.add(rsp, 16 + ABI_SHADOW_SPACE);
#else #else
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(fallback_fn); code.CallFunction(fallback_fn);
#endif #endif
code.movq(result, code.ABI_RETURN); code.movq(result, code.ABI_RETURN);
@ -821,12 +821,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
#ifdef _WIN32 #ifdef _WIN32
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
code.CallFunction(fallback_fn); code.CallFunction(fallback_fn);
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
#else #else
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(fallback_fn); code.CallFunction(fallback_fn);
#endif #endif
} }
@ -945,7 +945,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRecipEstimate<FPT>); code.CallFunction(&FP::FPRecipEstimate<FPT>);
} }
@ -968,7 +968,7 @@ static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRecipExponent<FPT>); code.CallFunction(&FP::FPRecipExponent<FPT>);
} }
@ -1026,7 +1026,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.movq(code.ABI_PARAM1, operand1); code.movq(code.ABI_PARAM1, operand1);
code.movq(code.ABI_PARAM2, operand2); code.movq(code.ABI_PARAM2, operand2);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRecipStepFused<FPT>); code.CallFunction(&FP::FPRecipStepFused<FPT>);
code.movq(result, code.ABI_RETURN); code.movq(result, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
@ -1055,7 +1055,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
ctx.reg_alloc.HostCall(inst, args[0], args[1]); ctx.reg_alloc.HostCall(inst, args[0], args[1]);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRecipStepFused<FPT>); code.CallFunction(&FP::FPRecipStepFused<FPT>);
} }
@ -1119,7 +1119,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.CallFunction(lut.at(std::make_tuple(fsize, rounding_mode, exact))); code.CallFunction(lut.at(std::make_tuple(fsize, rounding_mode, exact)));
} }
@ -1284,7 +1284,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
code.movq(code.ABI_PARAM1, operand); code.movq(code.ABI_PARAM1, operand);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtEstimate<FPT>); code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
code.movq(result, rax); code.movq(result, rax);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
@ -1298,7 +1298,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtEstimate<FPT>); code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
} }
} }
@ -1368,7 +1368,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.movq(code.ABI_PARAM1, operand1); code.movq(code.ABI_PARAM1, operand1);
code.movq(code.ABI_PARAM2, operand2); code.movq(code.ABI_PARAM2, operand2);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtStepFused<FPT>); code.CallFunction(&FP::FPRSqrtStepFused<FPT>);
code.movq(result, code.ABI_RETURN); code.movq(result, code.ABI_RETURN);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
@ -1398,7 +1398,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
ctx.reg_alloc.HostCall(inst, args[0], args[1]); ctx.reg_alloc.HostCall(inst, args[0], args[1]);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPRSqrtStepFused<FPT>); code.CallFunction(&FP::FPRSqrtStepFused<FPT>);
} }
@ -1511,7 +1511,7 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode)); code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u64, u16>); code.CallFunction(&FP::FPConvert<u64, u16>);
} }
@ -1535,7 +1535,7 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode)); code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u32, u16>); code.CallFunction(&FP::FPConvert<u32, u16>);
} }
@ -1556,7 +1556,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode)); code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u64, u32>); code.CallFunction(&FP::FPConvert<u64, u32>);
} }
} }
@ -1581,7 +1581,7 @@ void EmitX64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode)); code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u16, u32>); code.CallFunction(&FP::FPConvert<u16, u32>);
} }
@ -1595,7 +1595,7 @@ void EmitX64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode)); code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u16, u64>); code.CallFunction(&FP::FPConvert<u16, u64>);
} }
@ -1616,7 +1616,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode)); code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u32, u64>); code.CallFunction(&FP::FPConvert<u32, u64>);
} }
} }
@ -1757,7 +1757,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
mp::cartesian_product<fbits_list, rounding_list>{}); mp::cartesian_product<fbits_list, rounding_list>{});
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
code.CallFunction(lut.at(std::make_tuple(fbits, rounding_mode))); code.CallFunction(lut.at(std::make_tuple(fbits, rounding_mode)));
} }

View file

@ -69,7 +69,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
ctx.reg_alloc.DefineValue(overflow_inst, overflow); ctx.reg_alloc.DefineValue(overflow_inst, overflow);
} }
} else { } else {
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -98,7 +98,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(); const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
code.setb(overflow.cvt8()); code.setb(overflow.cvt8());
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
ctx.reg_alloc.DefineValue(inst, addend); ctx.reg_alloc.DefineValue(inst, addend);
} }
@ -226,7 +226,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx,
code.cmovns(y, tmp); code.cmovns(y, tmp);
code.sets(tmp.cvt8()); code.sets(tmp.cvt8());
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
ctx.reg_alloc.DefineValue(inst, y); ctx.reg_alloc.DefineValue(inst, y);
} }
@ -250,7 +250,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx,
code.cmovns(y.cvt32(), tmp.cvt32()); code.cmovns(y.cvt32(), tmp.cvt32());
code.sets(tmp.cvt8()); code.sets(tmp.cvt8());
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
ctx.reg_alloc.DefineValue(inst, y); ctx.reg_alloc.DefineValue(inst, y);
} }

View file

@ -110,7 +110,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -138,7 +138,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -165,7 +165,7 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code,
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -4261,7 +4261,7 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo
UNREACHABLE(); UNREACHABLE();
} }
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, data); ctx.reg_alloc.DefineValue(inst, data);
} }
@ -4396,7 +4396,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
code.pmovmskb(mask, xmm0); code.pmovmskb(mask, xmm0);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], mask);
if (code.HasHostFeature(HostFeature::SSE41)) { if (code.HasHostFeature(HostFeature::SSE41)) {
code.pblendvb(result, tmp); code.pblendvb(result, tmp);
@ -4482,7 +4482,7 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
code.pmovmskb(bit, upper_tmp); code.pmovmskb(bit, upper_tmp);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -4533,7 +4533,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext&
code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000));
code.vpxor(result, result, mask); code.vpxor(result, result, mask);
code.pmovmskb(bit, mask); code.pmovmskb(bit, mask);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.Release(mask); ctx.reg_alloc.Release(mask);
ctx.reg_alloc.Release(bit); ctx.reg_alloc.Release(bit);
@ -4589,7 +4589,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext&
code.pcmpeqd(tmp, result); code.pcmpeqd(tmp, result);
code.pxor(result, tmp); code.pxor(result, tmp);
code.pmovmskb(bit, tmp); code.pmovmskb(bit, tmp);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -4623,7 +4623,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx,
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
code.pmovmskb(bit, y); code.pmovmskb(bit, y);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, x); ctx.reg_alloc.DefineValue(inst, x);
} }
@ -4676,7 +4676,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx,
code.pxor(x, y); code.pxor(x, y);
code.pmovmskb(bit, y); code.pmovmskb(bit, y);
} }
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, x); ctx.reg_alloc.DefineValue(inst, x);
} }
@ -4715,7 +4715,7 @@ static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, Block
code.pcmpeqd(reconstructed, src); code.pcmpeqd(reconstructed, src);
code.movmskps(bit, reconstructed); code.movmskps(bit, reconstructed);
code.xor_(bit, 0b1111); code.xor_(bit, 0b1111);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, dest); ctx.reg_alloc.DefineValue(inst, dest);
} }
@ -4770,7 +4770,7 @@ static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, Blo
code.pcmpeqd(reconstructed, src); code.pcmpeqd(reconstructed, src);
code.movmskps(bit, reconstructed); code.movmskps(bit, reconstructed);
code.xor_(bit, 0b1111); code.xor_(bit, 0b1111);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, dest); ctx.reg_alloc.DefineValue(inst, dest);
} }
@ -4873,7 +4873,7 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo
// Check if any elements matched the mask prior to performing saturation. If so, set the Q bit. // Check if any elements matched the mask prior to performing saturation. If so, set the Q bit.
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
code.pmovmskb(bit, tmp); code.pmovmskb(bit, tmp);
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
ctx.reg_alloc.DefineValue(inst, zero); ctx.reg_alloc.DefineValue(inst, zero);
} }

View file

@ -450,7 +450,7 @@ void EmitTwoOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
code.mov(code.ABI_PARAM3.cvt32(), fpcr); code.mov(code.ABI_PARAM3.cvt32(), fpcr);
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.movaps(xword[code.ABI_PARAM2], arg1); code.movaps(xword[code.ABI_PARAM2], arg1);
code.CallFunction(fn); code.CallFunction(fn);
@ -487,7 +487,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
code.mov(code.ABI_PARAM4.cvt32(), fpcr); code.mov(code.ABI_PARAM4.cvt32(), fpcr);
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax); code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax);
#else #else
constexpr u32 stack_space = 3 * 16; constexpr u32 stack_space = 3 * 16;
@ -496,7 +496,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
code.mov(code.ABI_PARAM4.cvt32(), fpcr); code.mov(code.ABI_PARAM4.cvt32(), fpcr);
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
#endif #endif
code.movaps(xword[code.ABI_PARAM2], arg1); code.movaps(xword[code.ABI_PARAM2], arg1);
@ -545,7 +545,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 4 * 16]); code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 4 * 16]);
code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR(fpcr_controlled).Value()); code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR(fpcr_controlled).Value());
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE + 8], rax); code.mov(qword[rsp + ABI_SHADOW_SPACE + 8], rax);
#else #else
constexpr u32 stack_space = 4 * 16; constexpr u32 stack_space = 4 * 16;
@ -555,7 +555,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR(fpcr_controlled).Value()); code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR(fpcr_controlled).Value());
code.lea(code.ABI_PARAM6, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM6, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
#endif #endif
if constexpr (load_previous_result == LoadPreviousResult::Yes) { if constexpr (load_previous_result == LoadPreviousResult::Yes) {

View file

@ -62,7 +62,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.test(overflow.cvt32(), overflow.cvt32()); code.test(overflow.cvt32(), overflow.cvt32());
} }
code.setnz(overflow); code.setnz(overflow);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -104,7 +104,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.ktestb(k1, k1); code.ktestb(k1, k1);
code.setnz(overflow); code.setnz(overflow);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
return; return;
@ -160,7 +160,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.test(overflow.cvt32(), overflow.cvt32()); code.test(overflow.cvt32(), overflow.cvt32());
} }
code.setnz(overflow); code.setnz(overflow);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
if (code.HasHostFeature(HostFeature::SSE41)) { if (code.HasHostFeature(HostFeature::SSE41)) {
FCODE(blendvp)(result, tmp); FCODE(blendvp)(result, tmp);
@ -204,7 +204,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.ktestb(k1, k1); code.ktestb(k1, k1);
code.setnz(overflow); code.setnz(overflow);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
return; return;
@ -263,7 +263,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
} }
code.setnz(overflow); code.setnz(overflow);
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
if constexpr (op == Op::Add) { if constexpr (op == Op::Add) {
code.por(result, tmp); code.por(result, tmp);

View file

@ -429,6 +429,7 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
auto it_empty_candidate = desired_locations.cend(); auto it_empty_candidate = desired_locations.cend();
for (auto it = desired_locations.cbegin(); it != desired_locations.cend(); it++) { for (auto it = desired_locations.cbegin(); it != desired_locations.cend(); it++) {
auto const& loc_info = LocInfo(*it); auto const& loc_info = LocInfo(*it);
DEBUG_ASSERT(*it != ABI_JIT_PTR);
// Abstain from using upper registers unless absolutely nescesary // Abstain from using upper registers unless absolutely nescesary
if (loc_info.IsLocked()) { if (loc_info.IsLocked()) {
// skip, not suitable for allocation // skip, not suitable for allocation

View file

@ -22,6 +22,7 @@
#include "dynarmic/backend/x64/hostloc.h" #include "dynarmic/backend/x64/hostloc.h"
#include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/stack_layout.h"
#include "dynarmic/backend/x64/oparg.h" #include "dynarmic/backend/x64/oparg.h"
#include "dynarmic/backend/x64/abi.h"
#include "dynarmic/ir/cond.h" #include "dynarmic/ir/cond.h"
#include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/value.h" #include "dynarmic/ir/value.h"
@ -245,11 +246,11 @@ private:
HostLoc FindFreeSpill() const noexcept; HostLoc FindFreeSpill() const noexcept;
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept { inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
ASSERT(loc != HostLoc::RSP); ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
return hostloc_info[static_cast<size_t>(loc)]; return hostloc_info[static_cast<size_t>(loc)];
} }
inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept { inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept {
ASSERT(loc != HostLoc::RSP); ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
return hostloc_info[static_cast<size_t>(loc)]; return hostloc_info[static_cast<size_t>(loc)];
} }

View file

@ -86,11 +86,9 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept {
} }
std::string DumpBlock(const IR::Block& block) noexcept { std::string DumpBlock(const IR::Block& block) noexcept {
std::string ret; std::string ret = fmt::format("Block: location={}-{}\n", block.Location(), block.EndLocation())
+ fmt::format("cycles={}", block.CycleCount())
ret += fmt::format("Block: location={}\n", block.Location()); + fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
ret += fmt::format("cycles={}", block.CycleCount());
ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
if (block.GetCondition() != Cond::AL) { if (block.GetCondition() != Cond::AL) {
ret += fmt::format(", cond_fail={}", block.ConditionFailedLocation()); ret += fmt::format(", cond_fail={}", block.ConditionFailedLocation());
} }