[dynarmic] regalloc use scratchimpl that uses all instead of iteraiting
All checks were successful
eden-license / license-header (pull_request) Successful in 34s
All checks were successful
eden-license / license-header (pull_request) Successful in 34s
Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
615f2935aa
commit
7b7588e9fb
2 changed files with 18 additions and 16 deletions
|
@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
|
||||||
auto const opcode = inst.GetOpcode();
|
auto const opcode = inst.GetOpcode();
|
||||||
// Call the relevant Emit* member function.
|
// Call the relevant Emit* member function.
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch;
|
#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch;
|
||||||
#define A32OPC(name, type, ...)
|
#define A32OPC(name, type, ...)
|
||||||
#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch;
|
#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch;
|
||||||
#include "dynarmic/ir/opcodes.inc"
|
#include "dynarmic/ir/opcodes.inc"
|
||||||
#undef OPCODE
|
#undef OPCODE
|
||||||
#undef A32OPC
|
#undef A32OPC
|
||||||
|
|
|
@ -367,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
||||||
if (result_def) {
|
if (result_def) {
|
||||||
DefineValueImpl(result_def, ABI_RETURN);
|
DefineValueImpl(result_def, ABI_RETURN);
|
||||||
}
|
}
|
||||||
|
for (size_t i = 0; i < args.size(); i++) {
|
||||||
|
if (args[i]) {
|
||||||
|
UseScratch(*args[i], args_hostloc[i]);
|
||||||
|
} else {
|
||||||
|
ScratchGpr(args_hostloc[i]); // TODO: Force spill
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Must match with with ScratchImpl
|
||||||
|
for (auto const gpr : other_caller_save) {
|
||||||
|
MoveOutOfTheWay(gpr);
|
||||||
|
LocInfo(gpr).WriteLock();
|
||||||
|
}
|
||||||
for (size_t i = 0; i < args.size(); i++) {
|
for (size_t i = 0; i < args.size(); i++) {
|
||||||
if (args[i] && !args[i]->get().IsVoid()) {
|
if (args[i] && !args[i]->get().IsVoid()) {
|
||||||
UseScratch(*args[i], args_hostloc[i]);
|
|
||||||
// LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
|
// LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
|
||||||
const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
|
const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
|
||||||
switch (args[i]->get().GetType()) {
|
switch (args[i]->get().GetType()) {
|
||||||
|
@ -390,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < args.size(); i++)
|
|
||||||
if (!args[i]) {
|
|
||||||
// TODO: Force spill
|
|
||||||
ScratchGpr(args_hostloc[i]);
|
|
||||||
}
|
|
||||||
for (auto const caller_saved : other_caller_save)
|
|
||||||
ScratchImpl({caller_saved});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
|
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
|
||||||
|
@ -560,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept {
|
||||||
}
|
}
|
||||||
|
|
||||||
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
||||||
#if 0
|
|
||||||
// TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows
|
// TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows
|
||||||
// but it's fine anyways. We can find other ways to cheat it later - but which?!?!
|
// but it's fine anyways. We can find other ways to cheat it later - but which?!?!
|
||||||
// we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end?
|
// we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end?
|
||||||
// TODO(lizzie): This needs to be investigated further later.
|
// TODO(lizzie): This needs to be investigated further later.
|
||||||
// Do not spill XMM into other XMM silly
|
// Do not spill XMM into other XMM silly
|
||||||
if (!is_xmm) {
|
/*if (!is_xmm) {
|
||||||
// TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY
|
// TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY
|
||||||
// Intel recommends to spill GPR onto XMM registers IF POSSIBLE
|
// Intel recommends to spill GPR onto XMM registers IF POSSIBLE
|
||||||
// TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call?
|
// TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call?
|
||||||
|
@ -574,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
||||||
for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i)
|
for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i)
|
||||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||||
return loc;
|
return loc;
|
||||||
}
|
}*/
|
||||||
#endif
|
// TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits
|
||||||
|
// of spilling on XMM versus the potential cost of using XMM registers.....
|
||||||
// Otherwise go to stack spilling
|
// Otherwise go to stack spilling
|
||||||
for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i)
|
for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i)
|
||||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue