[dynarmic] spill to XMM if possible, use sub/add instead of inc/dec as per recommendation
This commit is contained in:
parent
3fd586db42
commit
0e574c397f
8 changed files with 47 additions and 47 deletions
|
@ -233,7 +233,7 @@ void A32EmitX64::GenTerminalHandlers() {
|
|||
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
|
||||
calculate_location_descriptor();
|
||||
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)]);
|
||||
code.dec(eax);
|
||||
code.sub(eax, 1);
|
||||
code.and_(eax, u32(A32JitState::RSBPtrMask));
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax);
|
||||
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||
|
|
|
@ -208,7 +208,7 @@ void A64EmitX64::GenTerminalHandlers() {
|
|||
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
|
||||
calculate_location_descriptor();
|
||||
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)]);
|
||||
code.dec(eax);
|
||||
code.sub(eax, 1);
|
||||
code.and_(eax, u32(A64JitState::RSBPtrMask));
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)], eax);
|
||||
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||
|
|
|
@ -1206,7 +1206,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
}
|
||||
|
||||
// a > 0 && a < 0x00800000;
|
||||
code.dec(tmp);
|
||||
code.sub(tmp, 1);
|
||||
code.cmp(tmp, 0x007FFFFF);
|
||||
code.jb(fallback, code.T_NEAR); //within -127,128
|
||||
needs_fallback = true;
|
||||
|
|
|
@ -3326,7 +3326,7 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) {
|
|||
code.paddb(mask, mask);
|
||||
code.paddb(xmm_a, xmm_a);
|
||||
code.pblendvb(result, alternate);
|
||||
code.dec(counter);
|
||||
code.sub(counter, 1);
|
||||
code.jnz(loop);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -3370,7 +3370,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst
|
|||
code.paddw(mask, mask);
|
||||
code.paddw(xmm_a, xmm_a);
|
||||
code.pblendvb(result, alternate);
|
||||
code.dec(counter);
|
||||
code.sub(counter, 1);
|
||||
code.jnz(loop);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
|
|
@ -78,16 +78,16 @@ inline bool HostLocIsFlag(HostLoc reg) {
|
|||
|
||||
inline HostLoc HostLocRegIdx(int idx) {
|
||||
ASSERT(idx >= 0 && idx <= 15);
|
||||
return static_cast<HostLoc>(idx);
|
||||
return HostLoc(idx);
|
||||
}
|
||||
|
||||
inline HostLoc HostLocXmmIdx(int idx) {
|
||||
ASSERT(idx >= 0 && idx <= 15);
|
||||
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::XMM0) + idx);
|
||||
return HostLoc(size_t(HostLoc::XMM0) + idx);
|
||||
}
|
||||
|
||||
inline HostLoc HostLocSpill(size_t i) {
|
||||
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
|
||||
return HostLoc(size_t(HostLoc::FirstSpill) + i);
|
||||
}
|
||||
|
||||
inline bool HostLocIsSpill(HostLoc reg) {
|
||||
|
|
|
@ -440,10 +440,13 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
|
|||
// all over the place - it also fixes bugs with high reg pressure
|
||||
} else if (*it >= HostLoc::R13 && *it <= HostLoc::R15) {
|
||||
// skip, do not touch
|
||||
// Intel recommends to reuse registers as soon as they're overwritable (DO NOT SPILL)
|
||||
} else if (loc_info.IsEmpty()) {
|
||||
it_empty_candidate = it;
|
||||
break;
|
||||
// No empty registers for some reason (very evil) - just do normal LRU
|
||||
} else {
|
||||
if (loc_info.lru_counter < min_lru_counter) {
|
||||
if (loc_info.IsEmpty())
|
||||
it_empty_candidate = it;
|
||||
// Otherwise a "quasi"-LRU
|
||||
min_lru_counter = loc_info.lru_counter;
|
||||
if (*it >= HostLoc::R8 && *it <= HostLoc::R15) {
|
||||
|
@ -454,9 +457,6 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
|
|||
if (min_lru_counter == 0)
|
||||
break; //early exit
|
||||
}
|
||||
// only if not assigned (i.e for failcase of all LRU=0)
|
||||
if (it_empty_candidate == desired_locations.cend() && loc_info.IsEmpty())
|
||||
it_empty_candidate = it;
|
||||
}
|
||||
}
|
||||
// Final resolution goes as follows:
|
||||
|
@ -527,11 +527,10 @@ void RegAlloc::Move(HostLoc to, HostLoc from) noexcept {
|
|||
|
||||
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
|
||||
ASSERT(bit_width <= HostLocBitWidth(to));
|
||||
ASSERT_MSG(!LocInfo(from).IsEmpty(), "Mov eliminated");
|
||||
|
||||
if (!LocInfo(from).IsEmpty()) {
|
||||
EmitMove(bit_width, to, from);
|
||||
LocInfo(to) = std::exchange(LocInfo(from), {});
|
||||
}
|
||||
EmitMove(bit_width, to, from);
|
||||
LocInfo(to) = std::exchange(LocInfo(from), {});
|
||||
}
|
||||
|
||||
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept {
|
||||
|
@ -565,30 +564,36 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept {
|
|||
ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
|
||||
ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
|
||||
ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
|
||||
|
||||
const HostLoc new_loc = FindFreeSpill();
|
||||
auto const new_loc = FindFreeSpill(HostLocIsXMM(loc));
|
||||
Move(new_loc, loc);
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::FindFreeSpill() const noexcept {
|
||||
for (size_t i = static_cast<size_t>(HostLoc::FirstSpill); i < hostloc_info.size(); i++) {
|
||||
const auto loc = static_cast<HostLoc>(i);
|
||||
if (LocInfo(loc).IsEmpty()) {
|
||||
return loc;
|
||||
}
|
||||
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
||||
// Do not spill XMM into other XMM silly
|
||||
if (!is_xmm) {
|
||||
// TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY
|
||||
// Intel recommends to spill GPR onto XMM registers IF POSSIBLE
|
||||
for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM0); --i)
|
||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||
return loc;
|
||||
}
|
||||
|
||||
// Otherwise go to stack spilling
|
||||
for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i)
|
||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||
return loc;
|
||||
ASSERT_FALSE("All spill locations are full");
|
||||
}
|
||||
|
||||
inline static Xbyak::RegExp SpillToOpArg_Helper1(HostLoc loc, size_t reserved_stack_space) noexcept {
|
||||
ASSERT(HostLocIsSpill(loc));
|
||||
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
||||
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
|
||||
return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]);
|
||||
}
|
||||
};
|
||||
|
||||
void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept {
|
||||
auto const spill_to_op_arg_helper = [&](HostLoc loc, size_t reserved_stack_space) {
|
||||
ASSERT(HostLocIsSpill(loc));
|
||||
size_t i = size_t(loc) - size_t(HostLoc::FirstSpill);
|
||||
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
|
||||
return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]);
|
||||
};
|
||||
auto const spill_xmm_to_op = [&](const HostLoc loc) {
|
||||
return Xbyak::util::xword[spill_to_op_arg_helper(loc, reserved_stack_space)];
|
||||
};
|
||||
if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
|
||||
MAYBE_AVX(movaps, HostLocToXmm(to), HostLocToXmm(from));
|
||||
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
|
||||
|
@ -613,7 +618,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
|
|||
MAYBE_AVX(movd, HostLocToReg64(to).cvt32(), HostLocToXmm(from));
|
||||
}
|
||||
} else if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
|
||||
const Xbyak::Address spill_addr = SpillToOpArg(from);
|
||||
const Xbyak::Address spill_addr = spill_xmm_to_op(from);
|
||||
ASSERT(spill_addr.getBit() >= bit_width);
|
||||
switch (bit_width) {
|
||||
case 128:
|
||||
|
@ -631,7 +636,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
|
|||
UNREACHABLE();
|
||||
}
|
||||
} else if (HostLocIsSpill(to) && HostLocIsXMM(from)) {
|
||||
const Xbyak::Address spill_addr = SpillToOpArg(to);
|
||||
const Xbyak::Address spill_addr = spill_xmm_to_op(to);
|
||||
ASSERT(spill_addr.getBit() >= bit_width);
|
||||
switch (bit_width) {
|
||||
case 128:
|
||||
|
@ -651,16 +656,16 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
|
|||
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->mov(HostLocToReg64(to), Xbyak::util::qword[SpillToOpArg_Helper1(from, reserved_stack_space)]);
|
||||
code->mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]);
|
||||
} else {
|
||||
code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[SpillToOpArg_Helper1(from, reserved_stack_space)]);
|
||||
code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]);
|
||||
}
|
||||
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->mov(Xbyak::util::qword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from));
|
||||
code->mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from));
|
||||
} else {
|
||||
code->mov(Xbyak::util::dword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
|
||||
code->mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
|
||||
}
|
||||
} else {
|
||||
ASSERT_FALSE("Invalid RegAlloc::EmitMove");
|
||||
|
@ -677,8 +682,4 @@ void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept {
|
|||
}
|
||||
}
|
||||
|
||||
Xbyak::Address RegAlloc::SpillToOpArg(const HostLoc loc) noexcept {
|
||||
return Xbyak::util::xword[SpillToOpArg_Helper1(loc, reserved_stack_space)];
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
@ -243,7 +243,7 @@ private:
|
|||
void MoveOutOfTheWay(HostLoc reg) noexcept;
|
||||
|
||||
void SpillRegister(HostLoc loc) noexcept;
|
||||
HostLoc FindFreeSpill() const noexcept;
|
||||
HostLoc FindFreeSpill(bool is_xmm) const noexcept;
|
||||
|
||||
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
|
||||
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
|
||||
|
@ -256,7 +256,6 @@ private:
|
|||
|
||||
void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept;
|
||||
void EmitExchange(const HostLoc a, const HostLoc b) noexcept;
|
||||
Xbyak::Address SpillToOpArg(const HostLoc loc) noexcept;
|
||||
|
||||
//data
|
||||
alignas(64) boost::container::static_vector<HostLoc, 28> gpr_order;
|
||||
|
|
|
@ -22,7 +22,7 @@ void PrintVerboseDebuggingOutputLine(RegisterData& reg_data, HostLoc hostloc, si
|
|||
} else if (HostLocIsXMM(hostloc)) {
|
||||
return reg_data.xmms[HostLocToXmm(hostloc).getIdx()];
|
||||
} else if (HostLocIsSpill(hostloc)) {
|
||||
return (*reg_data.spill)[static_cast<size_t>(hostloc) - static_cast<size_t>(HostLoc::FirstSpill)];
|
||||
return (*reg_data.spill)[size_t(hostloc) - size_t(HostLoc::FirstSpill)];
|
||||
} else {
|
||||
fmt::print("invalid hostloc! ");
|
||||
return {0, 0};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue