[dynarmic] configurable JIT state ABI pointer
This commit is contained in:
parent
c896897d40
commit
b12989c077
15 changed files with 209 additions and 218 deletions
|
@ -47,6 +47,7 @@ constexpr std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
|
|||
constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
|
||||
constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
|
||||
|
||||
constexpr inline Xbyak::Reg ABI_JIT_REG = Xbyak::util::rbx;
|
||||
#ifdef _WIN32
|
||||
|
||||
// Microsoft x64 ABI
|
||||
|
|
|
@ -79,7 +79,7 @@ contain a prediction with the same `UniqueHash`.
|
|||
? u64(unique_hash_to_code_ptr[imm64])
|
||||
: u64(code->GetReturnFromRunCodeAddress());
|
||||
|
||||
code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]);
|
||||
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
|
||||
code->add(index_reg, 1);
|
||||
code->and_(index_reg, u32(JitState::RSBSize - 1));
|
||||
|
||||
|
@ -91,13 +91,13 @@ contain a prediction with the same `UniqueHash`.
|
|||
|
||||
Xbyak::Label label;
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->je(label, code->T_SHORT);
|
||||
}
|
||||
|
||||
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg);
|
||||
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
||||
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
||||
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
||||
code->L(label);
|
||||
}
|
||||
|
||||
|
@ -122,14 +122,14 @@ To check if a predicition is in the RSB, we linearly scan the RSB.
|
|||
// This calculation has to match up with IREmitter::PushRSB
|
||||
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
||||
code->shl(rcx, 32);
|
||||
code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
|
||||
code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]);
|
||||
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
|
||||
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
|
||||
code->or_(rbx, rcx);
|
||||
|
||||
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
||||
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
||||
}
|
||||
|
||||
code->jmp(rax);
|
||||
|
|
|
@ -44,21 +44,21 @@ namespace Dynarmic::Backend::X64 {
|
|||
using namespace Xbyak::util;
|
||||
|
||||
static Xbyak::Address MJitStateReg(A32::Reg reg) {
|
||||
return dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
|
||||
return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
|
||||
}
|
||||
|
||||
static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) {
|
||||
if (A32::IsSingleExtReg(reg)) {
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::S0);
|
||||
return dword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u32) * index];
|
||||
return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u32) * index];
|
||||
}
|
||||
if (A32::IsDoubleExtReg(reg)) {
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::D0);
|
||||
return qword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u64) * index];
|
||||
return qword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u64) * index];
|
||||
}
|
||||
if (A32::IsQuadExtReg(reg)) {
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::Q0);
|
||||
return xword[r15 + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index];
|
||||
return xword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index];
|
||||
}
|
||||
ASSERT_FALSE("Should never happen.");
|
||||
}
|
||||
|
@ -220,7 +220,7 @@ void A32EmitX64::GenTerminalHandlers() {
|
|||
// PC ends up in ebp, location_descriptor ends up in rbx
|
||||
const auto calculate_location_descriptor = [this] {
|
||||
// This calculation has to match up with IREmitter::PushRSB
|
||||
code.mov(ebx, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.shl(rbx, 32);
|
||||
code.mov(ecx, MJitStateReg(A32::Reg::PC));
|
||||
code.mov(ebp, ecx);
|
||||
|
@ -232,17 +232,17 @@ void A32EmitX64::GenTerminalHandlers() {
|
|||
code.align();
|
||||
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
|
||||
calculate_location_descriptor();
|
||||
code.mov(eax, dword[r15 + offsetof(A32JitState, rsb_ptr)]);
|
||||
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)]);
|
||||
code.dec(eax);
|
||||
code.and_(eax, u32(A32JitState::RSBPtrMask));
|
||||
code.mov(dword[r15 + offsetof(A32JitState, rsb_ptr)], eax);
|
||||
code.cmp(rbx, qword[r15 + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax);
|
||||
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||
if (conf.HasOptimization(OptimizationFlag::FastDispatch)) {
|
||||
code.jne(rsb_cache_miss);
|
||||
} else {
|
||||
code.jne(code.GetReturnFromRunCodeAddress());
|
||||
}
|
||||
code.mov(rax, qword[r15 + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]);
|
||||
code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]);
|
||||
code.jmp(rax);
|
||||
PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a32_terminal_handler_pop_rsb_hint");
|
||||
|
||||
|
@ -392,17 +392,17 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
// so we load them both at the same time with one 64-bit read. This allows us to
|
||||
// extract all of their bits together at once with one pext.
|
||||
static_assert(offsetof(A32JitState, upper_location_descriptor) + 4 == offsetof(A32JitState, cpsr_ge));
|
||||
code.mov(result.cvt64(), qword[r15 + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.mov(result.cvt64(), qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.mov(tmp.cvt64(), 0x80808080'00000003ull);
|
||||
code.pext(result.cvt64(), result.cvt64(), tmp.cvt64());
|
||||
code.mov(tmp, 0x000f0220);
|
||||
code.pdep(result, result, tmp);
|
||||
} else {
|
||||
code.mov(result, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.imul(result, result, 0x120);
|
||||
code.and_(result, 0x00000220);
|
||||
|
||||
code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_ge)]);
|
||||
code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
|
||||
code.and_(tmp, 0x80808080);
|
||||
code.imul(tmp, tmp, 0x00204081);
|
||||
code.shr(tmp, 12);
|
||||
|
@ -410,11 +410,11 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.or_(result, tmp);
|
||||
}
|
||||
|
||||
code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_q)]);
|
||||
code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
|
||||
code.shl(tmp, 27);
|
||||
code.or_(result, tmp);
|
||||
|
||||
code.mov(tmp2, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
code.mov(tmp2, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
code.mov(tmp, NZCV::x64_mask);
|
||||
code.pext(tmp2, tmp2, tmp);
|
||||
|
@ -426,7 +426,7 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
code.or_(result, tmp2);
|
||||
|
||||
code.or_(result, dword[r15 + offsetof(A32JitState, cpsr_jaifm)]);
|
||||
code.or_(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)]);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -444,7 +444,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
// cpsr_q
|
||||
code.bt(cpsr, 27);
|
||||
code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]);
|
||||
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
|
||||
|
||||
// cpsr_nzcv
|
||||
code.mov(tmp, cpsr);
|
||||
|
@ -456,12 +456,12 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.imul(tmp, tmp, NZCV::to_x64_multiplier);
|
||||
code.and_(tmp, NZCV::x64_mask);
|
||||
}
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], tmp);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], tmp);
|
||||
|
||||
// cpsr_jaifm
|
||||
code.mov(tmp, cpsr);
|
||||
code.and_(tmp, 0x010001DF);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_jaifm)], tmp);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)], tmp);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
// cpsr_et and cpsr_ge
|
||||
|
@ -469,7 +469,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
// This mask is 0x7FFF0000, because we do not want the MSB to be sign extended to the upper dword.
|
||||
static_assert((A32::LocationDescriptor::FPSCR_MODE_MASK & ~0x7FFF0000) == 0);
|
||||
|
||||
code.and_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000));
|
||||
code.and_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000));
|
||||
code.mov(tmp, 0x000f0220);
|
||||
code.pext(cpsr, cpsr, tmp);
|
||||
code.mov(tmp.cvt64(), 0x01010101'00000003ull);
|
||||
|
@ -479,14 +479,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(tmp2.cvt64(), tmp.cvt64());
|
||||
code.sub(tmp.cvt64(), cpsr.cvt64());
|
||||
code.xor_(tmp.cvt64(), tmp2.cvt64());
|
||||
code.or_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64());
|
||||
code.or_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64());
|
||||
} else {
|
||||
code.and_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000));
|
||||
code.and_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000));
|
||||
code.mov(tmp, cpsr);
|
||||
code.and_(tmp, 0x00000220);
|
||||
code.imul(tmp, tmp, 0x00900000);
|
||||
code.shr(tmp, 28);
|
||||
code.or_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp);
|
||||
code.or_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp);
|
||||
|
||||
code.and_(cpsr, 0x000f0000);
|
||||
code.shr(cpsr, 16);
|
||||
|
@ -495,14 +495,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(tmp, 0x80808080);
|
||||
code.sub(tmp, cpsr);
|
||||
code.xor_(tmp, 0x80808080);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], tmp);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], to_store);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -510,7 +510,7 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
if (args[0].IsImmediate()) {
|
||||
const u32 imm = args[0].GetImmediateU32();
|
||||
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
@ -518,14 +518,14 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.shr(a, 28);
|
||||
code.mov(b, NZCV::x64_mask);
|
||||
code.pdep(a, a, b);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
} else {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.imul(a, a, NZCV::to_x64_multiplier);
|
||||
code.and_(a, NZCV::x64_mask);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -534,25 +534,25 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
if (args[0].IsImmediate()) {
|
||||
const u32 imm = args[0].GetImmediateU32();
|
||||
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
|
||||
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]);
|
||||
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
|
||||
code.mov(b, NZCV::x64_mask);
|
||||
code.pdep(a, a, b);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
} else {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]);
|
||||
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
|
||||
code.imul(a, a, NZCV::to_x64_multiplier);
|
||||
code.and_(a, NZCV::x64_mask);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -562,10 +562,10 @@ void A32EmitX64::EmitA32SetCpsrNZ(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code.movzx(tmp, code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1]);
|
||||
code.movzx(tmp, code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1]);
|
||||
code.and_(tmp, 1);
|
||||
code.or_(tmp, nz);
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8());
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8());
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -575,11 +575,11 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
if (args[1].IsImmediate()) {
|
||||
const bool c = args[1].GetImmediateU1();
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c);
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
|
||||
} else {
|
||||
const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(args[1]).cvt8();
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c);
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
@ -588,19 +588,19 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
const bool c = args[1].GetImmediateU1();
|
||||
|
||||
code.or_(nz, c);
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
|
||||
} else {
|
||||
const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
|
||||
code.or_(nz, c);
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
if (flag_bit != 0) {
|
||||
code.shr(result, static_cast<int>(flag_bit));
|
||||
}
|
||||
|
@ -616,18 +616,18 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (args[0].IsImmediate()) {
|
||||
if (args[0].GetImmediateU1()) {
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_q)], 1);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], 1);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
|
||||
|
||||
code.or_(code.byte[r15 + offsetof(A32JitState, cpsr_q)], to_store);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], to_store);
|
||||
}
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
code.movd(result, dword[r15 + offsetof(A32JitState, cpsr_ge)]);
|
||||
code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
|
@ -637,10 +637,10 @@ void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
if (args[0].IsInXmm()) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
|
||||
code.movd(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
} else {
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -654,7 +654,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
|
|||
ge |= mcl::bit::get_bit<17>(imm) ? 0x0000FF00 : 0;
|
||||
ge |= mcl::bit::get_bit<16>(imm) ? 0x000000FF : 0;
|
||||
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], ge);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], ge);
|
||||
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
@ -663,7 +663,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
|
|||
code.shr(a, 16);
|
||||
code.pdep(a, a, b);
|
||||
code.imul(a, a, 0xFF);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a);
|
||||
} else {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
|
@ -672,7 +672,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
|
|||
code.imul(a, a, 0x00204081);
|
||||
code.and_(a, 0x01010101);
|
||||
code.imul(a, a, 0xFF);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -716,7 +716,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
const u32 new_upper = upper_without_t | (mcl::bit::get_bit<0>(new_pc) ? 1 : 0);
|
||||
|
||||
code.mov(MJitStateReg(A32::Reg::PC), new_pc & mask);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
} else {
|
||||
const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32();
|
||||
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
@ -728,7 +728,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
|
||||
code.and_(new_pc, mask);
|
||||
code.mov(MJitStateReg(A32::Reg::PC), new_pc);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -819,7 +819,7 @@ void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A32JitState, fpsr_nzcv)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
|
@ -833,7 +833,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(tmp, NZCV::x64_mask);
|
||||
code.pext(tmp, value, tmp);
|
||||
code.shl(tmp, 28);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], tmp);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], tmp);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -843,7 +843,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.and_(value, NZCV::x64_mask);
|
||||
code.imul(value, value, NZCV::from_x64_multiplier);
|
||||
code.and_(value, NZCV::arm_mask);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], value);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], value);
|
||||
}
|
||||
|
||||
static void EmitCoprocessorException() {
|
||||
|
@ -1155,7 +1155,7 @@ void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat
|
|||
}();
|
||||
|
||||
if (old_upper != new_upper) {
|
||||
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1175,7 +1175,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
|
|||
EmitPatchJg(terminal.next);
|
||||
}
|
||||
} else {
|
||||
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
|
||||
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0);
|
||||
patch_information[terminal.next].jz.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJz(terminal.next, next_bb->entrypoint);
|
||||
|
@ -1240,7 +1240,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr
|
|||
}
|
||||
|
||||
void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
|
||||
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0);
|
||||
code.jne(code.GetForceReturnFromRunCodeAddress());
|
||||
EmitTerminal(terminal.else_, initial_location, is_single_step);
|
||||
}
|
||||
|
|
|
@ -168,7 +168,7 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
|
||||
void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) {
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, exclusive_state)], u8(0));
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -244,14 +244,14 @@ void A32EmitX64::EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak
|
|||
|
||||
const A32::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
|
||||
|
||||
code.test(dword[r15 + offsetof(A32JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
|
||||
code.test(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
|
||||
if (end) {
|
||||
code.jz(*end, code.T_NEAR);
|
||||
} else {
|
||||
code.jz(skip, code.T_NEAR);
|
||||
}
|
||||
EmitSetUpperLocationDescriptor(current_location, ctx.Location());
|
||||
code.mov(dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC());
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC());
|
||||
code.ForceReturnFromRunCode();
|
||||
code.L(skip);
|
||||
}
|
||||
|
|
|
@ -192,10 +192,10 @@ void A64EmitX64::GenTerminalHandlers() {
|
|||
const auto calculate_location_descriptor = [this] {
|
||||
// This calculation has to match up with A64::LocationDescriptor::UniqueHash
|
||||
// TODO: Optimization is available here based on known state of fpcr.
|
||||
code.mov(rbp, qword[r15 + offsetof(A64JitState, pc)]);
|
||||
code.mov(rbp, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]);
|
||||
code.mov(rcx, A64::LocationDescriptor::pc_mask);
|
||||
code.and_(rcx, rbp);
|
||||
code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]);
|
||||
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
|
||||
code.and_(ebx, A64::LocationDescriptor::fpcr_mask);
|
||||
code.shl(rbx, A64::LocationDescriptor::fpcr_shift);
|
||||
code.or_(rbx, rcx);
|
||||
|
@ -207,17 +207,17 @@ void A64EmitX64::GenTerminalHandlers() {
|
|||
code.align();
|
||||
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
|
||||
calculate_location_descriptor();
|
||||
code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]);
|
||||
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)]);
|
||||
code.dec(eax);
|
||||
code.and_(eax, u32(A64JitState::RSBPtrMask));
|
||||
code.mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax);
|
||||
code.cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)], eax);
|
||||
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||
if (conf.HasOptimization(OptimizationFlag::FastDispatch)) {
|
||||
code.jne(rsb_cache_miss, code.T_NEAR);
|
||||
} else {
|
||||
code.jne(code.GetReturnFromRunCodeAddress());
|
||||
}
|
||||
code.mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]);
|
||||
code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]);
|
||||
code.jmp(rax);
|
||||
PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a64_terminal_handler_pop_rsb_hint");
|
||||
|
||||
|
@ -272,7 +272,7 @@ void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
code.shr(result, NZCV::x64_c_flag_bit);
|
||||
code.and_(result, 1);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -281,7 +281,7 @@ void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code.mov(nzcv_raw, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
code.mov(nzcv_raw, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
@ -310,20 +310,20 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
code.imul(nzcv_raw, nzcv_raw, NZCV::to_x64_multiplier);
|
||||
code.and_(nzcv_raw, NZCV::x64_mask);
|
||||
}
|
||||
code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], to_store);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], to_store);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code.mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
|
@ -331,13 +331,13 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||
|
||||
code.mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
code.movd(result, addr);
|
||||
|
@ -346,7 +346,7 @@ void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
code.movq(result, addr);
|
||||
|
@ -355,7 +355,7 @@ void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
code.movaps(result, addr);
|
||||
|
@ -364,13 +364,13 @@ void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||
code.mov(result, qword[r15 + offsetof(A64JitState, sp)]);
|
||||
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A64JitState, fpcr)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
|
@ -388,7 +388,7 @@ void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
||||
if (args[1].FitsInImmediateS32()) {
|
||||
code.mov(addr, args[1].GetImmediateS32());
|
||||
} else {
|
||||
|
@ -402,7 +402,7 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
||||
if (args[1].FitsInImmediateS32()) {
|
||||
code.mov(addr, args[1].GetImmediateS32());
|
||||
} else if (args[1].IsInXmm()) {
|
||||
|
@ -417,7 +417,7 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -430,7 +430,7 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
code.movq(to_store, to_store); // TODO: Remove when able
|
||||
|
@ -440,7 +440,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
code.movaps(addr, to_store);
|
||||
|
@ -448,7 +448,7 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, sp)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)];
|
||||
if (args[0].FitsInImmediateS32()) {
|
||||
code.mov(addr, args[0].GetImmediateS32());
|
||||
} else if (args[0].IsInXmm()) {
|
||||
|
@ -486,7 +486,7 @@ void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, pc)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)];
|
||||
if (args[0].FitsInImmediateS32()) {
|
||||
code.mov(addr, args[0].GetImmediateS32());
|
||||
} else if (args[0].IsInXmm()) {
|
||||
|
@ -507,7 +507,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(param[0], imm);
|
||||
});
|
||||
// The kernel would have to execute ERET to get here, which would clear exclusive state.
|
||||
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0));
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -621,7 +621,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc
|
|||
code.SwitchMxcsrOnExit();
|
||||
Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, [&](RegList param) {
|
||||
code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], param[0]);
|
||||
code.mov(param[1].cvt32(), terminal.num_instructions);
|
||||
});
|
||||
code.ReturnFromRunCode(true); // TODO: Check cycles
|
||||
|
@ -635,7 +635,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
|
|||
// Used for patches and linking
|
||||
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ReturnFromRunCode();
|
||||
} else {
|
||||
if (conf.enable_cycle_counting) {
|
||||
|
@ -647,7 +647,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
|
|||
EmitPatchJg(terminal.next);
|
||||
}
|
||||
} else {
|
||||
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
|
||||
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0);
|
||||
patch_information[terminal.next].jz.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJz(terminal.next, next_bb->entrypoint);
|
||||
|
@ -656,7 +656,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
|
|||
}
|
||||
}
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ForceReturnFromRunCode();
|
||||
}
|
||||
}
|
||||
|
@ -664,7 +664,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
|
|||
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
|
||||
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ReturnFromRunCode();
|
||||
} else {
|
||||
patch_information[terminal.next].jmp.push_back(code.getCurr());
|
||||
|
@ -719,7 +719,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr
|
|||
}
|
||||
|
||||
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
|
||||
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0);
|
||||
code.jne(code.GetForceReturnFromRunCodeAddress());
|
||||
EmitTerminal(terminal.else_, initial_location, is_single_step);
|
||||
}
|
||||
|
@ -730,7 +730,7 @@ void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr
|
|||
code.jg(target_code_ptr);
|
||||
} else {
|
||||
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.jg(code.GetReturnFromRunCodeAddress());
|
||||
}
|
||||
code.EnsurePatchLocationSize(patch_location, 23);
|
||||
|
@ -742,7 +742,7 @@ void A64EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr
|
|||
code.jz(target_code_ptr);
|
||||
} else {
|
||||
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.jz(code.GetReturnFromRunCodeAddress());
|
||||
}
|
||||
code.EnsurePatchLocationSize(patch_location, 23);
|
||||
|
@ -754,7 +754,7 @@ void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr
|
|||
code.jmp(target_code_ptr);
|
||||
} else {
|
||||
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.jmp(code.GetReturnFromRunCodeAddress());
|
||||
}
|
||||
code.EnsurePatchLocationSize(patch_location, 22);
|
||||
|
|
|
@ -324,7 +324,7 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
|
||||
void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) {
|
||||
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0));
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64ExclusiveReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -416,14 +416,14 @@ void A64EmitX64::EmitCheckMemoryAbort(A64EmitContext&, IR::Inst* inst, Xbyak::La
|
|||
|
||||
const A64::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
|
||||
|
||||
code.test(dword[r15 + offsetof(A64JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
|
||||
code.test(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
|
||||
if (end) {
|
||||
code.jz(*end, code.T_NEAR);
|
||||
} else {
|
||||
code.jz(skip, code.T_NEAR);
|
||||
}
|
||||
code.mov(rax, current_location.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ForceReturnFromRunCode();
|
||||
code.L(skip);
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ namespace Dynarmic::Backend::X64 {
|
|||
|
||||
class BlockOfCode;
|
||||
|
||||
constexpr HostLoc ABI_JIT_PTR = HostLoc::R15;
|
||||
#ifdef _WIN32
|
||||
|
||||
constexpr HostLoc ABI_RETURN = HostLoc::RAX;
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_JIT_PTR = HostLocToReg64(Dynarmic::Backend::X64::ABI_JIT_PTR);
|
||||
#ifdef _WIN32
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_RETURN = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN);
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM1);
|
||||
|
@ -322,7 +323,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
// that the stack is appropriately aligned for CALLs.
|
||||
ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||
|
||||
mov(r15, ABI_PARAM1);
|
||||
mov(ABI_JIT_PTR, ABI_PARAM1);
|
||||
mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
|
||||
|
||||
if (cb.enable_cycle_counting) {
|
||||
|
@ -335,7 +336,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
// r13 = fastmem pointer
|
||||
rcp(*this);
|
||||
|
||||
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
|
||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
|
||||
|
||||
SwitchMxcsrOnEntry();
|
||||
|
@ -346,7 +347,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
|
||||
ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||
|
||||
mov(r15, ABI_PARAM1);
|
||||
mov(ABI_JIT_PTR, ABI_PARAM1);
|
||||
|
||||
if (cb.enable_cycle_counting) {
|
||||
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1);
|
||||
|
@ -355,10 +356,10 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
|
||||
rcp(*this);
|
||||
|
||||
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
|
||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
|
||||
lock();
|
||||
or_(dword[r15 + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
|
||||
or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
|
||||
|
||||
SwitchMxcsrOnEntry();
|
||||
jmp(ABI_PARAM2);
|
||||
|
@ -368,7 +369,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
align();
|
||||
return_from_run_code[0] = getCurr<const void*>();
|
||||
|
||||
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
|
||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller);
|
||||
if (cb.enable_cycle_counting) {
|
||||
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
|
||||
|
@ -380,7 +381,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
align();
|
||||
return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr<const void*>();
|
||||
|
||||
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
|
||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller_mxcsr_already_exited);
|
||||
if (cb.enable_cycle_counting) {
|
||||
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
|
||||
|
@ -409,7 +410,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
|
||||
xor_(eax, eax);
|
||||
lock();
|
||||
xchg(dword[r15 + jsi.offsetof_halt_reason], eax);
|
||||
xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax);
|
||||
|
||||
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||
ret();
|
||||
|
@ -419,22 +420,22 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
|
||||
void BlockOfCode::SwitchMxcsrOnEntry() {
|
||||
stmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]);
|
||||
ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
|
||||
ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchMxcsrOnExit() {
|
||||
stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
|
||||
stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
|
||||
ldmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]);
|
||||
}
|
||||
|
||||
void BlockOfCode::EnterStandardASIMD() {
|
||||
stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
|
||||
ldmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]);
|
||||
stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
|
||||
ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]);
|
||||
}
|
||||
|
||||
void BlockOfCode::LeaveStandardASIMD() {
|
||||
stmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]);
|
||||
ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
|
||||
stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]);
|
||||
ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
|
||||
}
|
||||
|
||||
void BlockOfCode::UpdateTicks() {
|
||||
|
|
|
@ -155,6 +155,7 @@ public:
|
|||
void SetCodePtr(CodePtr code_ptr);
|
||||
void EnsurePatchLocationSize(CodePtr begin, size_t size);
|
||||
|
||||
static const Xbyak::Reg64 ABI_JIT_PTR;
|
||||
// ABI registers
|
||||
#ifdef _WIN32
|
||||
static const Xbyak::Reg64 ABI_RETURN;
|
||||
|
|
|
@ -91,18 +91,18 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I
|
|||
? iter->second.entrypoint
|
||||
: code.GetReturnFromRunCodeAddress();
|
||||
|
||||
code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]);
|
||||
code.mov(index_reg.cvt32(), dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr]);
|
||||
code.mov(loc_desc_reg, target.Value());
|
||||
patch_information[target].mov_rcx.push_back(code.getCurr());
|
||||
EmitPatchMovRcx(target_code_ptr);
|
||||
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
|
||||
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
|
||||
code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
|
||||
code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
|
||||
// Byte size hack
|
||||
DEBUG_ASSERT(code.GetJitStateInfo().rsb_ptr_mask <= 0xFF);
|
||||
code.add(index_reg.cvt32(), 1); //flags trashed, 1 single byte, haswell doesn't care
|
||||
code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask)); //trashes flags
|
||||
// Results ready and sort by least needed: give OOO some break
|
||||
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
|
||||
code.mov(dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
|
||||
}
|
||||
|
||||
void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) {
|
||||
|
@ -284,7 +284,7 @@ void EmitX64::EmitAddCycles(size_t cycles) {
|
|||
Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
|
||||
Xbyak::Label pass;
|
||||
|
||||
code.mov(eax, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
|
||||
code.mov(eax, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
|
||||
|
||||
code.LoadRequiredFlagsForCondFromRax(cond);
|
||||
|
||||
|
|
|
@ -18,24 +18,20 @@ namespace CRC32 = Common::Crypto::CRC32;
|
|||
|
||||
static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size);
|
||||
|
||||
if (data_size != 64) {
|
||||
code.crc32(crc, value);
|
||||
} else {
|
||||
code.crc32(crc.cvt64(), value);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
return;
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3.cvt32(), data_size / CHAR_BIT); //zext
|
||||
code.CallFunction(&CRC32::ComputeCRC32Castagnoli);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||
code.CallFunction(&CRC32::ComputeCRC32Castagnoli);
|
||||
}
|
||||
|
||||
static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
|
||||
|
@ -69,10 +65,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
|||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
|
||||
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -90,10 +83,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
|||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
|
||||
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -111,12 +101,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
|||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
return;
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||
code.CallFunction(&CRC32::ComputeCRC32ISO);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||
code.CallFunction(&CRC32::ComputeCRC32ISO);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
|
|
@ -143,7 +143,7 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
const Xbyak::Reg then_ = ctx.reg_alloc.UseGpr(args[1]).changeBit(bitsize);
|
||||
const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize);
|
||||
|
||||
code.mov(nzcv, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
|
||||
code.mov(nzcv, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
|
||||
|
||||
code.LoadRequiredFlagsForCondFromRax(args[0].GetImmediateCond());
|
||||
|
||||
|
|
|
@ -227,7 +227,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
ctx.reg_alloc.HostCall(inst, {}, args[1]);
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
|
||||
if (ordered) {
|
||||
code.mfence();
|
||||
|
@ -245,7 +245,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
|
||||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
|
@ -285,9 +285,9 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
Xbyak::Label end;
|
||||
|
||||
code.mov(code.ABI_RETURN, u32(1));
|
||||
code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.je(end);
|
||||
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
|
||||
if constexpr (bitsize != 128) {
|
||||
using T = mcl::unsigned_integer_of_size<bitsize>;
|
||||
|
@ -355,7 +355,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
|
|||
|
||||
EmitExclusiveLock(code, conf, tmp, tmp2.cvt32());
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
|
||||
code.mov(qword[tmp], vaddr);
|
||||
|
||||
|
@ -439,14 +439,14 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
|||
|
||||
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
|
||||
code.mov(status, u32(1));
|
||||
code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.je(*end, code.T_NEAR);
|
||||
code.cmp(qword[tmp], vaddr);
|
||||
code.jne(*end, code.T_NEAR);
|
||||
|
||||
EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax);
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id)));
|
||||
|
||||
if constexpr (bitsize == 128) {
|
||||
|
@ -501,7 +501,6 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
|||
}
|
||||
|
||||
code.setnz(status.cvt8());
|
||||
|
||||
ctx.deferred_emits.emplace_back([=, this] {
|
||||
code.L(*abort);
|
||||
code.call(wrapped_fn);
|
||||
|
|
|
@ -46,26 +46,25 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi
|
|||
|
||||
code.test(vaddr, align_mask);
|
||||
|
||||
if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
|
||||
if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
|
||||
const u32 page_align_mask = static_cast<u32>(page_size - 1) & ~align_mask;
|
||||
|
||||
SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel();
|
||||
|
||||
code.jnz(*detect_boundary, code.T_NEAR);
|
||||
code.L(*resume);
|
||||
|
||||
ctx.deferred_emits.emplace_back([=, &code] {
|
||||
code.L(*detect_boundary);
|
||||
code.mov(tmp, vaddr);
|
||||
code.and_(tmp, page_align_mask);
|
||||
code.cmp(tmp, page_align_mask);
|
||||
code.jne(*resume, code.T_NEAR);
|
||||
// NOTE: We expect to fallthrough into abort code here.
|
||||
});
|
||||
} else {
|
||||
code.jnz(abort, code.T_NEAR);
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 page_align_mask = static_cast<u32>(page_size - 1) & ~align_mask;
|
||||
|
||||
SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel();
|
||||
|
||||
code.jnz(*detect_boundary, code.T_NEAR);
|
||||
code.L(*resume);
|
||||
|
||||
ctx.deferred_emits.emplace_back([=, &code] {
|
||||
code.L(*detect_boundary);
|
||||
code.mov(tmp, vaddr);
|
||||
code.and_(tmp, page_align_mask);
|
||||
code.cmp(tmp, page_align_mask);
|
||||
code.jne(*resume, code.T_NEAR);
|
||||
// NOTE: We expect to fallthrough into abort code here.
|
||||
});
|
||||
}
|
||||
|
||||
template<typename EmitContext>
|
||||
|
@ -244,29 +243,29 @@ const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::Reg
|
|||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
} else {
|
||||
const void* fastmem_location = code.getCurr();
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.movzx(Xbyak::Reg64(value_idx).cvt32(), code.byte[addr]);
|
||||
break;
|
||||
case 16:
|
||||
code.movzx(Xbyak::Reg64(value_idx).cvt32(), word[addr]);
|
||||
break;
|
||||
case 32:
|
||||
code.mov(Xbyak::Reg64(value_idx).cvt32(), dword[addr]);
|
||||
break;
|
||||
case 64:
|
||||
code.mov(Xbyak::Reg64(value_idx), qword[addr]);
|
||||
break;
|
||||
case 128:
|
||||
code.movups(Xbyak::Xmm(value_idx), xword[addr]);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
}
|
||||
|
||||
const void* fastmem_location = code.getCurr();
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.movzx(Xbyak::Reg64(value_idx).cvt32(), code.byte[addr]);
|
||||
break;
|
||||
case 16:
|
||||
code.movzx(Xbyak::Reg64(value_idx).cvt32(), word[addr]);
|
||||
break;
|
||||
case 32:
|
||||
code.mov(Xbyak::Reg64(value_idx).cvt32(), dword[addr]);
|
||||
break;
|
||||
case 64:
|
||||
code.mov(Xbyak::Reg64(value_idx), qword[addr]);
|
||||
break;
|
||||
case 128:
|
||||
code.movups(Xbyak::Xmm(value_idx), xword[addr]);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
}
|
||||
|
||||
template<std::size_t bitsize>
|
||||
|
@ -312,29 +311,29 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int
|
|||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
} else {
|
||||
const void* fastmem_location = code.getCurr();
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.mov(code.byte[addr], Xbyak::Reg64(value_idx).cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code.mov(word[addr], Xbyak::Reg64(value_idx).cvt16());
|
||||
break;
|
||||
case 32:
|
||||
code.mov(dword[addr], Xbyak::Reg64(value_idx).cvt32());
|
||||
break;
|
||||
case 64:
|
||||
code.mov(qword[addr], Xbyak::Reg64(value_idx));
|
||||
break;
|
||||
case 128:
|
||||
code.movups(xword[addr], Xbyak::Xmm(value_idx));
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
}
|
||||
|
||||
const void* fastmem_location = code.getCurr();
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.mov(code.byte[addr], Xbyak::Reg64(value_idx).cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code.mov(word[addr], Xbyak::Reg64(value_idx).cvt16());
|
||||
break;
|
||||
case 32:
|
||||
code.mov(dword[addr], Xbyak::Reg64(value_idx).cvt32());
|
||||
break;
|
||||
case 64:
|
||||
code.mov(qword[addr], Xbyak::Reg64(value_idx));
|
||||
break;
|
||||
case 128:
|
||||
code.movups(xword[addr], Xbyak::Xmm(value_idx));
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
}
|
||||
|
||||
template<typename UserConfig>
|
||||
|
|
|
@ -245,11 +245,11 @@ private:
|
|||
HostLoc FindFreeSpill() const noexcept;
|
||||
|
||||
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
|
||||
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
|
||||
ASSERT(loc != HostLoc::RSP);
|
||||
return hostloc_info[static_cast<size_t>(loc)];
|
||||
}
|
||||
inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept {
|
||||
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
|
||||
ASSERT(loc != HostLoc::RSP);
|
||||
return hostloc_info[static_cast<size_t>(loc)];
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue