diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index 6d284f3086..3559c4fa87 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -798,9 +798,9 @@ static u32 GetFpscrImpl(A32JitState* jit_state) { void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst); - code.mov(code.ABI_PARAM1, code.r15); + code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); - code.stmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]); + code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]); code.CallFunction(&GetFpscrImpl); } @@ -811,10 +811,10 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) { void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, args[0]); - code.mov(code.ABI_PARAM2, code.r15); + code.mov(code.ABI_PARAM2, code.ABI_JIT_PTR); code.CallFunction(&SetFpscrImpl); - code.ldmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]); + code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]); } void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index b091e4310c..a25ecd4d23 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -380,8 +380,8 @@ static u32 GetFPSRImpl(A64JitState* jit_state) { void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst); - code.mov(code.ABI_PARAM1, code.r15); - code.stmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); + code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); + code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); code.CallFunction(GetFPSRImpl); } @@ -467,9 +467,9 @@ static void SetFPCRImpl(A64JitState* jit_state, u32 value) { void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); - code.mov(code.ABI_PARAM1, code.r15); + code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); code.CallFunction(SetFPCRImpl); - code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); + code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); } static void SetFPSRImpl(A64JitState* jit_state, u32 value) { @@ -479,9 +479,9 @@ static void SetFPSRImpl(A64JitState* jit_state, u32 value) { void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); - code.mov(code.ABI_PARAM1, code.r15); + code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); code.CallFunction(SetFPSRImpl); - code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]); + code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); } void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) { @@ -633,11 +633,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescri void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) { // Used for patches and linking - if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { - code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); - code.ReturnFromRunCode(); - } else { + if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { if (conf.enable_cycle_counting) { code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); patch_information[terminal.next].jg.push_back(code.getCurr()); @@ -658,31 +654,34 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); code.ForceReturnFromRunCode(); + } else { + code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); + code.ReturnFromRunCode(); } } void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) { - if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) { - code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); - code.ReturnFromRunCode(); - } else { + if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { patch_information[terminal.next].jmp.push_back(code.getCurr()); if (auto next_bb = GetBasicBlock(terminal.next)) { EmitPatchJmp(terminal.next, next_bb->entrypoint); } else { EmitPatchJmp(terminal.next); } + } else { + code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); + code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax); + code.ReturnFromRunCode(); } } void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) { - if (!conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) || is_single_step) { + if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) { + code.jmp(terminal_handler_pop_rsb_hint); + } else { code.ReturnFromRunCode(); - return; } - - code.jmp(terminal_handler_pop_rsb_hint); } void A64EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 63b9659618..581faedfb7 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -712,12 +712,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); #ifdef _WIN32 code.lea(rsp, ptr[rsp - (16 + ABI_SHADOW_SPACE)]); - code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); code.CallFunction(fallback_fn); code.add(rsp, 16 + ABI_SHADOW_SPACE); #else - code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(fallback_fn); #endif code.movq(result, code.ABI_RETURN); @@ -821,12 +821,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); #ifdef _WIN32 ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); - code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); code.CallFunction(fallback_fn); ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); #else - code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(fallback_fn); #endif } @@ -945,7 +945,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipEstimate); } @@ -968,7 +968,7 @@ static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipExponent); } @@ -1026,7 +1026,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.movq(code.ABI_PARAM1, operand1); code.movq(code.ABI_PARAM2, operand2); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipStepFused); code.movq(result, code.ABI_RETURN); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); @@ -1055,7 +1055,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* ctx.reg_alloc.HostCall(inst, args[0], args[1]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipStepFused); } @@ -1119,7 +1119,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); - code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.CallFunction(lut.at(std::make_tuple(fsize, rounding_mode, exact))); } @@ -1284,7 +1284,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); code.movq(code.ABI_PARAM1, operand); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtEstimate); code.movq(result, rax); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); @@ -1298,7 +1298,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtEstimate); } } @@ -1368,7 +1368,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.movq(code.ABI_PARAM1, operand1); code.movq(code.ABI_PARAM2, operand2); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtStepFused); code.movq(result, code.ABI_RETURN); ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); @@ -1398,7 +1398,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* ctx.reg_alloc.HostCall(inst, args[0], args[1]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtStepFused); } @@ -1511,7 +1511,7 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1535,7 +1535,7 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1556,7 +1556,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } } @@ -1581,7 +1581,7 @@ void EmitX64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1595,7 +1595,7 @@ void EmitX64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1616,7 +1616,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } } @@ -1757,7 +1757,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { mp::cartesian_product{}); ctx.reg_alloc.HostCall(inst, args[0]); - code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.CallFunction(lut.at(std::make_tuple(fbits, rounding_mode))); } diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp index d36a75426a..e795181872 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp @@ -69,7 +69,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) ctx.reg_alloc.DefineValue(overflow_inst, overflow); } } else { - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); } ctx.reg_alloc.DefineValue(inst, result); @@ -98,7 +98,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(); code.setb(overflow.cvt8()); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); ctx.reg_alloc.DefineValue(inst, addend); } @@ -226,7 +226,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, code.cmovns(y, tmp); code.sets(tmp.cvt8()); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); ctx.reg_alloc.DefineValue(inst, y); } @@ -250,7 +250,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, code.cmovns(y.cvt32(), tmp.cvt32()); code.sets(tmp.cvt8()); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); ctx.reg_alloc.DefineValue(inst, y); } diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index cc344f5249..6bea3c2c95 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -110,7 +110,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } @@ -138,7 +138,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } @@ -165,7 +165,7 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code, ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } @@ -4261,7 +4261,7 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo UNREACHABLE(); } - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, data); } @@ -4396,7 +4396,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(mask, xmm0); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], mask); if (code.HasHostFeature(HostFeature::SSE41)) { code.pblendvb(result, tmp); @@ -4482,7 +4482,7 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(bit, upper_tmp); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, result); } @@ -4533,7 +4533,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); code.vpxor(result, result, mask); code.pmovmskb(bit, mask); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.Release(mask); ctx.reg_alloc.Release(bit); @@ -4589,7 +4589,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.pcmpeqd(tmp, result); code.pxor(result, tmp); code.pmovmskb(bit, tmp); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, result); } @@ -4623,7 +4623,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(bit, y); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, x); } @@ -4676,7 +4676,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, code.pxor(x, y); code.pmovmskb(bit, y); } - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, x); } @@ -4715,7 +4715,7 @@ static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, Block code.pcmpeqd(reconstructed, src); code.movmskps(bit, reconstructed); code.xor_(bit, 0b1111); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, dest); } @@ -4770,7 +4770,7 @@ static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, Blo code.pcmpeqd(reconstructed, src); code.movmskps(bit, reconstructed); code.xor_(bit, 0b1111); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, dest); } @@ -4873,7 +4873,7 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo // Check if any elements matched the mask prior to performing saturation. If so, set the Q bit. const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); code.pmovmskb(bit, tmp); - code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit); + code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); ctx.reg_alloc.DefineValue(inst, zero); } diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index b24120c346..c8f0d9575c 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -450,7 +450,7 @@ void EmitTwoOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.mov(code.ABI_PARAM3.cvt32(), fpcr); - code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.movaps(xword[code.ABI_PARAM2], arg1); code.CallFunction(fn); @@ -487,7 +487,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.mov(code.ABI_PARAM4.cvt32(), fpcr); - code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax); #else constexpr u32 stack_space = 3 * 16; @@ -496,7 +496,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.mov(code.ABI_PARAM4.cvt32(), fpcr); - code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); #endif code.movaps(xword[code.ABI_PARAM2], arg1); @@ -545,7 +545,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 4 * 16]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR(fpcr_controlled).Value()); - code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE + 8], rax); #else constexpr u32 stack_space = 4 * 16; @@ -555,7 +555,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR(fpcr_controlled).Value()); - code.lea(code.ABI_PARAM6, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.lea(code.ABI_PARAM6, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); #endif if constexpr (load_previous_result == LoadPreviousResult::Yes) { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp index 88bd41a47e..580a32dec8 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp @@ -62,7 +62,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.test(overflow.cvt32(), overflow.cvt32()); } code.setnz(overflow); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); ctx.reg_alloc.DefineValue(inst, result); } @@ -104,7 +104,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.ktestb(k1, k1); code.setnz(overflow); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); ctx.reg_alloc.DefineValue(inst, result); return; @@ -160,7 +160,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.test(overflow.cvt32(), overflow.cvt32()); } code.setnz(overflow); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); if (code.HasHostFeature(HostFeature::SSE41)) { FCODE(blendvp)(result, tmp); @@ -204,7 +204,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.ktestb(k1, k1); code.setnz(overflow); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); ctx.reg_alloc.DefineValue(inst, result); return; @@ -263,7 +263,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* } code.setnz(overflow); - code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); + code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); if constexpr (op == Op::Add) { code.por(result, tmp); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index c5c83d3766..910425f8ac 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -431,6 +431,7 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector(loc)]; } inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept { - ASSERT(loc != HostLoc::RSP); + ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR); return hostloc_info[static_cast(loc)]; } diff --git a/externals/dynarmic/src/dynarmic/ir/basic_block.cpp b/externals/dynarmic/src/dynarmic/ir/basic_block.cpp index dcaf399966..b00ab3cb20 100644 --- a/externals/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/externals/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -86,11 +86,9 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept { } std::string DumpBlock(const IR::Block& block) noexcept { - std::string ret; - - ret += fmt::format("Block: location={}\n", block.Location()); - ret += fmt::format("cycles={}", block.CycleCount()); - ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition())); + std::string ret = fmt::format("Block: location={}-{}\n", block.Location(), block.EndLocation()) + + fmt::format("cycles={}", block.CycleCount()) + + fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition())); if (block.GetCondition() != Cond::AL) { ret += fmt::format(", cond_fail={}", block.ConditionFailedLocation()); }