forked from eden-emu/eden
[dynarmic] missing JIT ptr ABI refs
This commit is contained in:
parent
16583284e0
commit
040072e9fe
10 changed files with 77 additions and 78 deletions
|
@ -798,9 +798,9 @@ static u32 GetFpscrImpl(A32JitState* jit_state) {
|
|||
|
||||
void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
ctx.reg_alloc.HostCall(inst);
|
||||
code.mov(code.ABI_PARAM1, code.r15);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
|
||||
code.stmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]);
|
||||
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]);
|
||||
code.CallFunction(&GetFpscrImpl);
|
||||
}
|
||||
|
||||
|
@ -811,10 +811,10 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) {
|
|||
void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, args[0]);
|
||||
code.mov(code.ABI_PARAM2, code.r15);
|
||||
code.mov(code.ABI_PARAM2, code.ABI_JIT_PTR);
|
||||
|
||||
code.CallFunction(&SetFpscrImpl);
|
||||
code.ldmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]);
|
||||
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
|
|
@ -380,8 +380,8 @@ static u32 GetFPSRImpl(A64JitState* jit_state) {
|
|||
|
||||
void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
ctx.reg_alloc.HostCall(inst);
|
||||
code.mov(code.ABI_PARAM1, code.r15);
|
||||
code.stmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
|
||||
code.CallFunction(GetFPSRImpl);
|
||||
}
|
||||
|
||||
|
@ -467,9 +467,9 @@ static void SetFPCRImpl(A64JitState* jit_state, u32 value) {
|
|||
void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
|
||||
code.mov(code.ABI_PARAM1, code.r15);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
code.CallFunction(SetFPCRImpl);
|
||||
code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
|
||||
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
|
||||
}
|
||||
|
||||
static void SetFPSRImpl(A64JitState* jit_state, u32 value) {
|
||||
|
@ -479,9 +479,9 @@ static void SetFPSRImpl(A64JitState* jit_state, u32 value) {
|
|||
void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
|
||||
code.mov(code.ABI_PARAM1, code.r15);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
code.CallFunction(SetFPSRImpl);
|
||||
code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
|
||||
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -633,11 +633,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescri
|
|||
|
||||
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) {
|
||||
// Used for patches and linking
|
||||
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ReturnFromRunCode();
|
||||
} else {
|
||||
if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
|
||||
patch_information[terminal.next].jg.push_back(code.getCurr());
|
||||
|
@ -658,31 +654,34 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
|
|||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ForceReturnFromRunCode();
|
||||
} else {
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ReturnFromRunCode();
|
||||
}
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
|
||||
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ReturnFromRunCode();
|
||||
} else {
|
||||
if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
|
||||
patch_information[terminal.next].jmp.push_back(code.getCurr());
|
||||
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJmp(terminal.next, next_bb->entrypoint);
|
||||
} else {
|
||||
EmitPatchJmp(terminal.next);
|
||||
}
|
||||
} else {
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ReturnFromRunCode();
|
||||
}
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
|
||||
if (!conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) || is_single_step) {
|
||||
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
|
||||
code.jmp(terminal_handler_pop_rsb_hint);
|
||||
} else {
|
||||
code.ReturnFromRunCode();
|
||||
return;
|
||||
}
|
||||
|
||||
code.jmp(terminal_handler_pop_rsb_hint);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) {
|
||||
|
|
|
@ -712,12 +712,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
|
||||
#ifdef _WIN32
|
||||
code.lea(rsp, ptr[rsp - (16 + ABI_SHADOW_SPACE)]);
|
||||
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
|
||||
code.CallFunction(fallback_fn);
|
||||
code.add(rsp, 16 + ABI_SHADOW_SPACE);
|
||||
#else
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(fallback_fn);
|
||||
#endif
|
||||
code.movq(result, code.ABI_RETURN);
|
||||
|
@ -821,12 +821,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
|
||||
#ifdef _WIN32
|
||||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
|
||||
code.CallFunction(fallback_fn);
|
||||
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
#else
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(fallback_fn);
|
||||
#endif
|
||||
}
|
||||
|
@ -945,7 +945,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRecipEstimate<FPT>);
|
||||
}
|
||||
|
||||
|
@ -968,7 +968,7 @@ static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRecipExponent<FPT>);
|
||||
}
|
||||
|
||||
|
@ -1026,7 +1026,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
code.movq(code.ABI_PARAM1, operand1);
|
||||
code.movq(code.ABI_PARAM2, operand2);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRecipStepFused<FPT>);
|
||||
code.movq(result, code.ABI_RETURN);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
|
@ -1055,7 +1055,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRecipStepFused<FPT>);
|
||||
}
|
||||
|
||||
|
@ -1119,7 +1119,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
|
|||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.CallFunction(lut.at(std::make_tuple(fsize, rounding_mode, exact)));
|
||||
}
|
||||
|
@ -1284,7 +1284,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
code.movq(code.ABI_PARAM1, operand);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
|
||||
code.movq(result, rax);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
|
@ -1298,7 +1298,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
|
||||
}
|
||||
}
|
||||
|
@ -1368,7 +1368,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
code.movq(code.ABI_PARAM1, operand1);
|
||||
code.movq(code.ABI_PARAM2, operand2);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRSqrtStepFused<FPT>);
|
||||
code.movq(result, code.ABI_RETURN);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
|
@ -1398,7 +1398,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRSqrtStepFused<FPT>);
|
||||
}
|
||||
|
||||
|
@ -1511,7 +1511,7 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u64, u16>);
|
||||
}
|
||||
|
||||
|
@ -1535,7 +1535,7 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u32, u16>);
|
||||
}
|
||||
|
||||
|
@ -1556,7 +1556,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u64, u32>);
|
||||
}
|
||||
}
|
||||
|
@ -1581,7 +1581,7 @@ void EmitX64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u16, u32>);
|
||||
}
|
||||
|
||||
|
@ -1595,7 +1595,7 @@ void EmitX64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u16, u64>);
|
||||
}
|
||||
|
||||
|
@ -1616,7 +1616,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u32, u64>);
|
||||
}
|
||||
}
|
||||
|
@ -1757,7 +1757,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
mp::cartesian_product<fbits_list, rounding_list>{});
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.CallFunction(lut.at(std::make_tuple(fbits, rounding_mode)));
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
|
|||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
}
|
||||
} else {
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -98,7 +98,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
|||
|
||||
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
|
||||
code.setb(overflow.cvt8());
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, addend);
|
||||
}
|
||||
|
@ -226,7 +226,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx,
|
|||
code.cmovns(y, tmp);
|
||||
|
||||
code.sets(tmp.cvt8());
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, y);
|
||||
}
|
||||
|
@ -250,7 +250,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx,
|
|||
code.cmovns(y.cvt32(), tmp.cvt32());
|
||||
|
||||
code.sets(tmp.cvt8());
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, y);
|
||||
}
|
||||
|
|
|
@ -110,7 +110,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
|
|||
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -138,7 +138,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
|
|||
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -165,7 +165,7 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code,
|
|||
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -4261,7 +4261,7 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
|
@ -4396,7 +4396,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC
|
|||
|
||||
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.pmovmskb(mask, xmm0);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], mask);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
code.pblendvb(result, tmp);
|
||||
|
@ -4482,7 +4482,7 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC
|
|||
|
||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.pmovmskb(bit, upper_tmp);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -4533,7 +4533,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext&
|
|||
code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000));
|
||||
code.vpxor(result, result, mask);
|
||||
code.pmovmskb(bit, mask);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.Release(mask);
|
||||
ctx.reg_alloc.Release(bit);
|
||||
|
@ -4589,7 +4589,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext&
|
|||
code.pcmpeqd(tmp, result);
|
||||
code.pxor(result, tmp);
|
||||
code.pmovmskb(bit, tmp);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -4623,7 +4623,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx,
|
|||
|
||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.pmovmskb(bit, y);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, x);
|
||||
}
|
||||
|
@ -4676,7 +4676,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx,
|
|||
code.pxor(x, y);
|
||||
code.pmovmskb(bit, y);
|
||||
}
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, x);
|
||||
}
|
||||
|
@ -4715,7 +4715,7 @@ static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, Block
|
|||
code.pcmpeqd(reconstructed, src);
|
||||
code.movmskps(bit, reconstructed);
|
||||
code.xor_(bit, 0b1111);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, dest);
|
||||
}
|
||||
|
@ -4770,7 +4770,7 @@ static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, Blo
|
|||
code.pcmpeqd(reconstructed, src);
|
||||
code.movmskps(bit, reconstructed);
|
||||
code.xor_(bit, 0b1111);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, dest);
|
||||
}
|
||||
|
@ -4873,7 +4873,7 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo
|
|||
// Check if any elements matched the mask prior to performing saturation. If so, set the Q bit.
|
||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.pmovmskb(bit, tmp);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, zero);
|
||||
}
|
||||
|
|
|
@ -450,7 +450,7 @@ void EmitTwoOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak
|
|||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), fpcr);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
|
||||
code.movaps(xword[code.ABI_PARAM2], arg1);
|
||||
code.CallFunction(fn);
|
||||
|
@ -487,7 +487,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
|
|||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
|
||||
code.mov(code.ABI_PARAM4.cvt32(), fpcr);
|
||||
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax);
|
||||
#else
|
||||
constexpr u32 stack_space = 3 * 16;
|
||||
|
@ -496,7 +496,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
|
|||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||
code.mov(code.ABI_PARAM4.cvt32(), fpcr);
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
#endif
|
||||
|
||||
code.movaps(xword[code.ABI_PARAM2], arg1);
|
||||
|
@ -545,7 +545,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya
|
|||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
|
||||
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 4 * 16]);
|
||||
code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR(fpcr_controlled).Value());
|
||||
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(qword[rsp + ABI_SHADOW_SPACE + 8], rax);
|
||||
#else
|
||||
constexpr u32 stack_space = 4 * 16;
|
||||
|
@ -555,7 +555,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya
|
|||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
|
||||
code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR(fpcr_controlled).Value());
|
||||
code.lea(code.ABI_PARAM6, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM6, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
#endif
|
||||
|
||||
if constexpr (load_previous_result == LoadPreviousResult::Yes) {
|
||||
|
|
|
@ -62,7 +62,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
code.test(overflow.cvt32(), overflow.cvt32());
|
||||
}
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -104,7 +104,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
|
||||
code.ktestb(k1, k1);
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
|
@ -160,7 +160,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
code.test(overflow.cvt32(), overflow.cvt32());
|
||||
}
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
FCODE(blendvp)(result, tmp);
|
||||
|
@ -204,7 +204,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
|
||||
code.ktestb(k1, k1);
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
|
@ -263,7 +263,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
}
|
||||
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
code.por(result, tmp);
|
||||
|
|
|
@ -431,6 +431,7 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
|
|||
auto it_empty_candidate = desired_locations.cend();
|
||||
for (auto it = desired_locations.cbegin(); it != desired_locations.cend(); it++) {
|
||||
auto const& loc_info = LocInfo(*it);
|
||||
DEBUG_ASSERT(*it != ABI_JIT_PTR);
|
||||
// Abstain from using upper registers unless absolutely nescesary
|
||||
if (loc_info.IsLocked()) {
|
||||
// skip, not suitable for allocation
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "dynarmic/backend/x64/hostloc.h"
|
||||
#include "dynarmic/backend/x64/stack_layout.h"
|
||||
#include "dynarmic/backend/x64/oparg.h"
|
||||
#include "dynarmic/backend/x64/abi.h"
|
||||
#include "dynarmic/ir/cond.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
|
@ -245,11 +246,11 @@ private:
|
|||
HostLoc FindFreeSpill() const noexcept;
|
||||
|
||||
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
|
||||
ASSERT(loc != HostLoc::RSP);
|
||||
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
|
||||
return hostloc_info[static_cast<size_t>(loc)];
|
||||
}
|
||||
inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept {
|
||||
ASSERT(loc != HostLoc::RSP);
|
||||
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
|
||||
return hostloc_info[static_cast<size_t>(loc)];
|
||||
}
|
||||
|
||||
|
|
|
@ -86,11 +86,9 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept {
|
|||
}
|
||||
|
||||
std::string DumpBlock(const IR::Block& block) noexcept {
|
||||
std::string ret;
|
||||
|
||||
ret += fmt::format("Block: location={}\n", block.Location());
|
||||
ret += fmt::format("cycles={}", block.CycleCount());
|
||||
ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
|
||||
std::string ret = fmt::format("Block: location={}-{}\n", block.Location(), block.EndLocation())
|
||||
+ fmt::format("cycles={}", block.CycleCount())
|
||||
+ fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
|
||||
if (block.GetCondition() != Cond::AL) {
|
||||
ret += fmt::format(", cond_fail={}", block.ConditionFailedLocation());
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue