forked from eden-emu/eden
[dynarmic] spill to XMM if possible, use sub/add instead of inc/dec as per recommendation
This commit is contained in:
parent
a01fb901e5
commit
d3fccce260
8 changed files with 47 additions and 47 deletions
|
@ -233,7 +233,7 @@ void A32EmitX64::GenTerminalHandlers() {
|
||||||
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
|
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
|
||||||
calculate_location_descriptor();
|
calculate_location_descriptor();
|
||||||
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)]);
|
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)]);
|
||||||
code.dec(eax);
|
code.sub(eax, 1);
|
||||||
code.and_(eax, u32(A32JitState::RSBPtrMask));
|
code.and_(eax, u32(A32JitState::RSBPtrMask));
|
||||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax);
|
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax);
|
||||||
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||||
|
|
|
@ -208,7 +208,7 @@ void A64EmitX64::GenTerminalHandlers() {
|
||||||
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
|
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
|
||||||
calculate_location_descriptor();
|
calculate_location_descriptor();
|
||||||
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)]);
|
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)]);
|
||||||
code.dec(eax);
|
code.sub(eax, 1);
|
||||||
code.and_(eax, u32(A64JitState::RSBPtrMask));
|
code.and_(eax, u32(A64JitState::RSBPtrMask));
|
||||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)], eax);
|
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)], eax);
|
||||||
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||||
|
|
|
@ -1206,7 +1206,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
}
|
}
|
||||||
|
|
||||||
// a > 0 && a < 0x00800000;
|
// a > 0 && a < 0x00800000;
|
||||||
code.dec(tmp);
|
code.sub(tmp, 1);
|
||||||
code.cmp(tmp, 0x007FFFFF);
|
code.cmp(tmp, 0x007FFFFF);
|
||||||
code.jb(fallback, code.T_NEAR); //within -127,128
|
code.jb(fallback, code.T_NEAR); //within -127,128
|
||||||
needs_fallback = true;
|
needs_fallback = true;
|
||||||
|
|
|
@ -3326,7 +3326,7 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.paddb(mask, mask);
|
code.paddb(mask, mask);
|
||||||
code.paddb(xmm_a, xmm_a);
|
code.paddb(xmm_a, xmm_a);
|
||||||
code.pblendvb(result, alternate);
|
code.pblendvb(result, alternate);
|
||||||
code.dec(counter);
|
code.sub(counter, 1);
|
||||||
code.jnz(loop);
|
code.jnz(loop);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
@ -3370,7 +3370,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst
|
||||||
code.paddw(mask, mask);
|
code.paddw(mask, mask);
|
||||||
code.paddw(xmm_a, xmm_a);
|
code.paddw(xmm_a, xmm_a);
|
||||||
code.pblendvb(result, alternate);
|
code.pblendvb(result, alternate);
|
||||||
code.dec(counter);
|
code.sub(counter, 1);
|
||||||
code.jnz(loop);
|
code.jnz(loop);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
|
|
@ -78,16 +78,16 @@ inline bool HostLocIsFlag(HostLoc reg) {
|
||||||
|
|
||||||
inline HostLoc HostLocRegIdx(int idx) {
|
inline HostLoc HostLocRegIdx(int idx) {
|
||||||
ASSERT(idx >= 0 && idx <= 15);
|
ASSERT(idx >= 0 && idx <= 15);
|
||||||
return static_cast<HostLoc>(idx);
|
return HostLoc(idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline HostLoc HostLocXmmIdx(int idx) {
|
inline HostLoc HostLocXmmIdx(int idx) {
|
||||||
ASSERT(idx >= 0 && idx <= 15);
|
ASSERT(idx >= 0 && idx <= 15);
|
||||||
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::XMM0) + idx);
|
return HostLoc(size_t(HostLoc::XMM0) + idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline HostLoc HostLocSpill(size_t i) {
|
inline HostLoc HostLocSpill(size_t i) {
|
||||||
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
|
return HostLoc(size_t(HostLoc::FirstSpill) + i);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool HostLocIsSpill(HostLoc reg) {
|
inline bool HostLocIsSpill(HostLoc reg) {
|
||||||
|
|
|
@ -440,10 +440,13 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
|
||||||
// all over the place - it also fixes bugs with high reg pressure
|
// all over the place - it also fixes bugs with high reg pressure
|
||||||
} else if (*it >= HostLoc::R13 && *it <= HostLoc::R15) {
|
} else if (*it >= HostLoc::R13 && *it <= HostLoc::R15) {
|
||||||
// skip, do not touch
|
// skip, do not touch
|
||||||
|
// Intel recommends to reuse registers as soon as they're overwritable (DO NOT SPILL)
|
||||||
|
} else if (loc_info.IsEmpty()) {
|
||||||
|
it_empty_candidate = it;
|
||||||
|
break;
|
||||||
|
// No empty registers for some reason (very evil) - just do normal LRU
|
||||||
} else {
|
} else {
|
||||||
if (loc_info.lru_counter < min_lru_counter) {
|
if (loc_info.lru_counter < min_lru_counter) {
|
||||||
if (loc_info.IsEmpty())
|
|
||||||
it_empty_candidate = it;
|
|
||||||
// Otherwise a "quasi"-LRU
|
// Otherwise a "quasi"-LRU
|
||||||
min_lru_counter = loc_info.lru_counter;
|
min_lru_counter = loc_info.lru_counter;
|
||||||
if (*it >= HostLoc::R8 && *it <= HostLoc::R15) {
|
if (*it >= HostLoc::R8 && *it <= HostLoc::R15) {
|
||||||
|
@ -454,9 +457,6 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
|
||||||
if (min_lru_counter == 0)
|
if (min_lru_counter == 0)
|
||||||
break; //early exit
|
break; //early exit
|
||||||
}
|
}
|
||||||
// only if not assigned (i.e for failcase of all LRU=0)
|
|
||||||
if (it_empty_candidate == desired_locations.cend() && loc_info.IsEmpty())
|
|
||||||
it_empty_candidate = it;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Final resolution goes as follows:
|
// Final resolution goes as follows:
|
||||||
|
@ -527,11 +527,10 @@ void RegAlloc::Move(HostLoc to, HostLoc from) noexcept {
|
||||||
|
|
||||||
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
|
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
|
||||||
ASSERT(bit_width <= HostLocBitWidth(to));
|
ASSERT(bit_width <= HostLocBitWidth(to));
|
||||||
|
ASSERT_MSG(!LocInfo(from).IsEmpty(), "Mov eliminated");
|
||||||
|
|
||||||
if (!LocInfo(from).IsEmpty()) {
|
EmitMove(bit_width, to, from);
|
||||||
EmitMove(bit_width, to, from);
|
LocInfo(to) = std::exchange(LocInfo(from), {});
|
||||||
LocInfo(to) = std::exchange(LocInfo(from), {});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept {
|
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept {
|
||||||
|
@ -565,30 +564,36 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept {
|
||||||
ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
|
ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
|
||||||
ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
|
ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
|
||||||
ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
|
ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
|
||||||
|
auto const new_loc = FindFreeSpill(HostLocIsXMM(loc));
|
||||||
const HostLoc new_loc = FindFreeSpill();
|
|
||||||
Move(new_loc, loc);
|
Move(new_loc, loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
HostLoc RegAlloc::FindFreeSpill() const noexcept {
|
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
||||||
for (size_t i = static_cast<size_t>(HostLoc::FirstSpill); i < hostloc_info.size(); i++) {
|
// Do not spill XMM into other XMM silly
|
||||||
const auto loc = static_cast<HostLoc>(i);
|
if (!is_xmm) {
|
||||||
if (LocInfo(loc).IsEmpty()) {
|
// TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY
|
||||||
return loc;
|
// Intel recommends to spill GPR onto XMM registers IF POSSIBLE
|
||||||
}
|
for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM0); --i)
|
||||||
|
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||||
|
return loc;
|
||||||
}
|
}
|
||||||
|
// Otherwise go to stack spilling
|
||||||
|
for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i)
|
||||||
|
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||||
|
return loc;
|
||||||
ASSERT_FALSE("All spill locations are full");
|
ASSERT_FALSE("All spill locations are full");
|
||||||
}
|
};
|
||||||
|
|
||||||
inline static Xbyak::RegExp SpillToOpArg_Helper1(HostLoc loc, size_t reserved_stack_space) noexcept {
|
|
||||||
ASSERT(HostLocIsSpill(loc));
|
|
||||||
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
|
||||||
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
|
|
||||||
return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept {
|
void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept {
|
||||||
|
auto const spill_to_op_arg_helper = [&](HostLoc loc, size_t reserved_stack_space) {
|
||||||
|
ASSERT(HostLocIsSpill(loc));
|
||||||
|
size_t i = size_t(loc) - size_t(HostLoc::FirstSpill);
|
||||||
|
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
|
||||||
|
return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]);
|
||||||
|
};
|
||||||
|
auto const spill_xmm_to_op = [&](const HostLoc loc) {
|
||||||
|
return Xbyak::util::xword[spill_to_op_arg_helper(loc, reserved_stack_space)];
|
||||||
|
};
|
||||||
if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
|
if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
|
||||||
MAYBE_AVX(movaps, HostLocToXmm(to), HostLocToXmm(from));
|
MAYBE_AVX(movaps, HostLocToXmm(to), HostLocToXmm(from));
|
||||||
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
|
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
|
||||||
|
@ -613,7 +618,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
|
||||||
MAYBE_AVX(movd, HostLocToReg64(to).cvt32(), HostLocToXmm(from));
|
MAYBE_AVX(movd, HostLocToReg64(to).cvt32(), HostLocToXmm(from));
|
||||||
}
|
}
|
||||||
} else if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
|
} else if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
|
||||||
const Xbyak::Address spill_addr = SpillToOpArg(from);
|
const Xbyak::Address spill_addr = spill_xmm_to_op(from);
|
||||||
ASSERT(spill_addr.getBit() >= bit_width);
|
ASSERT(spill_addr.getBit() >= bit_width);
|
||||||
switch (bit_width) {
|
switch (bit_width) {
|
||||||
case 128:
|
case 128:
|
||||||
|
@ -631,7 +636,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
} else if (HostLocIsSpill(to) && HostLocIsXMM(from)) {
|
} else if (HostLocIsSpill(to) && HostLocIsXMM(from)) {
|
||||||
const Xbyak::Address spill_addr = SpillToOpArg(to);
|
const Xbyak::Address spill_addr = spill_xmm_to_op(to);
|
||||||
ASSERT(spill_addr.getBit() >= bit_width);
|
ASSERT(spill_addr.getBit() >= bit_width);
|
||||||
switch (bit_width) {
|
switch (bit_width) {
|
||||||
case 128:
|
case 128:
|
||||||
|
@ -651,16 +656,16 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
|
||||||
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
|
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
|
||||||
ASSERT(bit_width != 128);
|
ASSERT(bit_width != 128);
|
||||||
if (bit_width == 64) {
|
if (bit_width == 64) {
|
||||||
code->mov(HostLocToReg64(to), Xbyak::util::qword[SpillToOpArg_Helper1(from, reserved_stack_space)]);
|
code->mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]);
|
||||||
} else {
|
} else {
|
||||||
code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[SpillToOpArg_Helper1(from, reserved_stack_space)]);
|
code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]);
|
||||||
}
|
}
|
||||||
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
|
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
|
||||||
ASSERT(bit_width != 128);
|
ASSERT(bit_width != 128);
|
||||||
if (bit_width == 64) {
|
if (bit_width == 64) {
|
||||||
code->mov(Xbyak::util::qword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from));
|
code->mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from));
|
||||||
} else {
|
} else {
|
||||||
code->mov(Xbyak::util::dword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
|
code->mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ASSERT_FALSE("Invalid RegAlloc::EmitMove");
|
ASSERT_FALSE("Invalid RegAlloc::EmitMove");
|
||||||
|
@ -677,8 +682,4 @@ void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Xbyak::Address RegAlloc::SpillToOpArg(const HostLoc loc) noexcept {
|
|
||||||
return Xbyak::util::xword[SpillToOpArg_Helper1(loc, reserved_stack_space)];
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -243,7 +243,7 @@ private:
|
||||||
void MoveOutOfTheWay(HostLoc reg) noexcept;
|
void MoveOutOfTheWay(HostLoc reg) noexcept;
|
||||||
|
|
||||||
void SpillRegister(HostLoc loc) noexcept;
|
void SpillRegister(HostLoc loc) noexcept;
|
||||||
HostLoc FindFreeSpill() const noexcept;
|
HostLoc FindFreeSpill(bool is_xmm) const noexcept;
|
||||||
|
|
||||||
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
|
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
|
||||||
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
|
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
|
||||||
|
@ -256,7 +256,6 @@ private:
|
||||||
|
|
||||||
void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept;
|
void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept;
|
||||||
void EmitExchange(const HostLoc a, const HostLoc b) noexcept;
|
void EmitExchange(const HostLoc a, const HostLoc b) noexcept;
|
||||||
Xbyak::Address SpillToOpArg(const HostLoc loc) noexcept;
|
|
||||||
|
|
||||||
//data
|
//data
|
||||||
alignas(64) boost::container::static_vector<HostLoc, 28> gpr_order;
|
alignas(64) boost::container::static_vector<HostLoc, 28> gpr_order;
|
||||||
|
|
|
@ -22,7 +22,7 @@ void PrintVerboseDebuggingOutputLine(RegisterData& reg_data, HostLoc hostloc, si
|
||||||
} else if (HostLocIsXMM(hostloc)) {
|
} else if (HostLocIsXMM(hostloc)) {
|
||||||
return reg_data.xmms[HostLocToXmm(hostloc).getIdx()];
|
return reg_data.xmms[HostLocToXmm(hostloc).getIdx()];
|
||||||
} else if (HostLocIsSpill(hostloc)) {
|
} else if (HostLocIsSpill(hostloc)) {
|
||||||
return (*reg_data.spill)[static_cast<size_t>(hostloc) - static_cast<size_t>(HostLoc::FirstSpill)];
|
return (*reg_data.spill)[size_t(hostloc) - size_t(HostLoc::FirstSpill)];
|
||||||
} else {
|
} else {
|
||||||
fmt::print("invalid hostloc! ");
|
fmt::print("invalid hostloc! ");
|
||||||
return {0, 0};
|
return {0, 0};
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue