[dynarmic] new regalloc scheme #81
10 changed files with 365 additions and 202 deletions
|
@ -415,21 +415,54 @@ void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept {
|
|||
}
|
||||
|
||||
HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept {
|
||||
boost::container::static_vector<HostLoc, 28> candidates = desired_locations; //Who let someone copy an ENTIRE VECTOR here?
|
||||
|
||||
// Find all locations that have not been allocated..
|
||||
const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc) noexcept {
|
||||
return !this->LocInfo(loc).IsLocked();
|
||||
});
|
||||
candidates.erase(allocated_locs, candidates.end());
|
||||
ASSERT_MSG(!candidates.empty(), "All candidate registers have already been allocated");
|
||||
// TODO(lizzie): Overspill causes issues (reads to 0 and such) on some games, I need to make a testbench
|
||||
// to later track this down - however I just modified the LRU algo so it prefers empty registers first
|
||||
// we need to test high register pressure (and spills, maybe 32 regs?)
|
||||
|
||||
// Selects the best location out of the available locations.
|
||||
// NOTE: Using last is BAD because new REX prefix for each insn using the last regs
|
||||
// TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
|
||||
auto const it = std::find_if(candidates.begin(), candidates.end(), [this](auto const loc) noexcept {
|
||||
return this->LocInfo(loc).IsEmpty();
|
||||
});
|
||||
return it != candidates.end() ? *it : candidates.front();
|
||||
auto min_lru_counter = size_t(-1);
|
||||
auto it_candidate = desired_locations.cend(); //default fallback if everything fails
|
||||
auto it_rex_candidate = desired_locations.cend();
|
||||
auto it_empty_candidate = desired_locations.cend();
|
||||
for (auto it = desired_locations.cbegin(); it != desired_locations.cend(); it++) {
|
||||
auto const& loc_info = LocInfo(*it);
|
||||
// Abstain from using upper registers unless absolutely nescesary
|
||||
if (loc_info.IsLocked()) {
|
||||
// skip, not suitable for allocation
|
||||
} else {
|
||||
if (loc_info.lru_counter < min_lru_counter) {
|
||||
if (loc_info.IsEmpty())
|
||||
it_empty_candidate = it;
|
||||
// Otherwise a "quasi"-LRU
|
||||
min_lru_counter = loc_info.lru_counter;
|
||||
if (*it >= HostLoc::R8 && *it <= HostLoc::R15) {
|
||||
it_rex_candidate = it;
|
||||
} else {
|
||||
it_candidate = it;
|
||||
}
|
||||
if (min_lru_counter == 0)
|
||||
break; //early exit
|
||||
}
|
||||
// only if not assigned (i.e for failcase of all LRU=0)
|
||||
if (it_empty_candidate == desired_locations.cend() && loc_info.IsEmpty())
|
||||
it_empty_candidate = it;
|
||||
}
|
||||
}
|
||||
// Final resolution goes as follows:
|
||||
// 1 => Try an empty candidate
|
||||
// 2 => Try normal candidate (no REX prefix)
|
||||
// 3 => Try using a REX prefixed one
|
||||
// We avoid using REX-addressable registers because they add +1 REX prefix which
|
||||
// do we really need? The trade-off may not be worth it.
|
||||
auto const it_final = it_empty_candidate != desired_locations.cend()
|
||||
? it_empty_candidate : it_candidate != desired_locations.cend()
|
||||
? it_candidate : it_rex_candidate;
|
||||
ASSERT_MSG(it_final != desired_locations.cend(), "All candidate registers have already been allocated");
|
||||
// Evil magic - increment LRU counter (will wrap at 256)
|
||||
const_cast<RegAlloc*>(this)->LocInfo(*it_final).lru_counter++;
|
||||
return *it_final;
|
||||
}
|
||||
|
||||
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept {
|
||||
|
|
|
@ -92,8 +92,8 @@ private:
|
|||
uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128
|
||||
bool is_scratch : 1 = false; //1
|
||||
bool is_set_last_use : 1 = false; //1
|
||||
|
||||
alignas(16) char padding;
|
||||
alignas(16) uint8_t lru_counter = 0; //1
|
||||
friend class RegAlloc;
|
||||
};
|
||||
static_assert(sizeof(HostLocInfo) == 64);
|
||||
|
||||
|
|
|
@ -539,7 +539,8 @@ TEST_CASE("arm: Memory access (fastmem)", "[arm][A32]") {
|
|||
char* backing_memory = reinterpret_cast<char*>(std::align(page_size, memory_size, buffer_ptr, buffer_size_nconst));
|
||||
|
||||
A32FastmemTestEnv env{backing_memory};
|
||||
Dynarmic::A32::UserConfig config{&env};
|
||||
Dynarmic::A32::UserConfig config{};
|
||||
config.callbacks = &env;
|
||||
config.fastmem_pointer = reinterpret_cast<uintptr_t>(backing_memory);
|
||||
config.recompile_on_fastmem_failure = false;
|
||||
config.processor_id = 0;
|
||||
|
|
250
externals/dynarmic/tests/A64/a64.cpp
vendored
250
externals/dynarmic/tests/A64/a64.cpp
vendored
File diff suppressed because one or more lines are too long
4
externals/dynarmic/tests/A64/fp_min_max.cpp
vendored
4
externals/dynarmic/tests/A64/fp_min_max.cpp
vendored
|
@ -64,7 +64,9 @@ u32 force_default_nan(u32 value) {
|
|||
template<typename Fn>
|
||||
void run_test(u32 instruction, Fn fn) {
|
||||
A64TestEnv env;
|
||||
A64::Jit jit{A64::UserConfig{&env}};
|
||||
A64::UserConfig jit_user_config{};
|
||||
jit_user_config.callbacks = &env;
|
||||
A64::Jit jit{jit_user_config};
|
||||
|
||||
env.code_mem.emplace_back(instruction); // FMAX S0, S1, S2
|
||||
env.code_mem.emplace_back(0x14000000); // B .
|
||||
|
|
|
@ -154,7 +154,8 @@ static u32 GenFloatInst(u64 pc, bool is_last_inst) {
|
|||
}
|
||||
|
||||
static Dynarmic::A64::UserConfig GetUserConfig(A64TestEnv& jit_env) {
|
||||
Dynarmic::A64::UserConfig jit_user_config{&jit_env};
|
||||
Dynarmic::A64::UserConfig jit_user_config{};
|
||||
jit_user_config.callbacks = &jit_env;
|
||||
jit_user_config.optimizations &= ~OptimizationFlag::FastDispatch;
|
||||
// The below corresponds to the settings for qemu's aarch64_max_initfn
|
||||
jit_user_config.dczid_el0 = 7;
|
||||
|
|
|
@ -10,7 +10,8 @@
|
|||
|
||||
TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_access_via_page_table is set", "[a64]") {
|
||||
A64TestEnv env;
|
||||
Dynarmic::A64::UserConfig conf{&env};
|
||||
Dynarmic::A64::UserConfig conf{};
|
||||
conf.callbacks = &env;
|
||||
conf.page_table = nullptr;
|
||||
conf.detect_misaligned_access_via_page_table = 128;
|
||||
conf.only_detect_misalignment_via_page_table_on_page_boundary = true;
|
||||
|
|
|
@ -12,8 +12,8 @@ using namespace Dynarmic;
|
|||
|
||||
TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have any patching requirements", "[a64]") {
|
||||
A64TestEnv env;
|
||||
|
||||
A64::UserConfig conf{&env};
|
||||
A64::UserConfig conf{};
|
||||
conf.callbacks = &env;
|
||||
A64::Jit jit{conf};
|
||||
|
||||
REQUIRE(conf.HasOptimization(OptimizationFlag::FastDispatch));
|
||||
|
@ -64,8 +64,8 @@ TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have
|
|||
|
||||
TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have any patching requirements 2", "[a64]") {
|
||||
A64TestEnv env;
|
||||
|
||||
A64::UserConfig conf{&env};
|
||||
A64::UserConfig conf{};
|
||||
conf.callbacks = &env;
|
||||
A64::Jit jit{conf};
|
||||
|
||||
REQUIRE(conf.HasOptimization(OptimizationFlag::FastDispatch));
|
||||
|
|
86
externals/dynarmic/tests/test_generator.cpp
vendored
86
externals/dynarmic/tests/test_generator.cpp
vendored
|
@ -23,6 +23,7 @@
|
|||
#include "./rand_int.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/common/llvm_disassemble.h"
|
||||
#include "dynarmic/frontend/A32/ITState.h"
|
||||
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
|
@ -402,33 +403,35 @@ void RunTestInstance(Dynarmic::A32::Jit& jit,
|
|||
const std::vector<typename TestEnv::InstructionType>& instructions,
|
||||
const u32 cpsr,
|
||||
const u32 fpscr,
|
||||
const size_t ticks_left) {
|
||||
const size_t ticks_left,
|
||||
const bool show_disas) {
|
||||
const u32 initial_pc = regs[15];
|
||||
const u32 num_words = initial_pc / sizeof(typename TestEnv::InstructionType);
|
||||
const u32 code_mem_size = num_words + static_cast<u32>(instructions.size());
|
||||
|
||||
fmt::print("instructions:");
|
||||
if (show_disas) {
|
||||
fmt::print("instructions:\n");
|
||||
auto current_pc = initial_pc;
|
||||
for (auto instruction : instructions) {
|
||||
if constexpr (sizeof(decltype(instruction)) == 2) {
|
||||
fmt::print(" {:04x}", instruction);
|
||||
fmt::print("{:04x} ?\n", instruction);
|
||||
} else {
|
||||
fmt::print(" {:08x}", instruction);
|
||||
fmt::print("{}", Dynarmic::Common::DisassembleAArch64(instruction, current_pc));
|
||||
}
|
||||
current_pc += sizeof(decltype(instruction));
|
||||
}
|
||||
fmt::print("\n");
|
||||
|
||||
fmt::print("initial_regs:");
|
||||
for (u32 i : regs) {
|
||||
for (u32 i : regs)
|
||||
fmt::print(" {:08x}", i);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("initial_vecs:");
|
||||
for (u32 i : vecs) {
|
||||
for (u32 i : vecs)
|
||||
fmt::print(" {:08x}", i);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("initial_cpsr: {:08x}\n", cpsr);
|
||||
fmt::print("initial_fpcr: {:08x}\n", fpscr);
|
||||
}
|
||||
|
||||
jit.ClearCache();
|
||||
|
||||
|
@ -450,6 +453,7 @@ void RunTestInstance(Dynarmic::A32::Jit& jit,
|
|||
jit.Run();
|
||||
}
|
||||
|
||||
if (show_disas) {
|
||||
fmt::print("final_regs:");
|
||||
for (u32 i : jit.Regs()) {
|
||||
fmt::print(" {:08x}", i);
|
||||
|
@ -462,24 +466,24 @@ void RunTestInstance(Dynarmic::A32::Jit& jit,
|
|||
fmt::print("\n");
|
||||
fmt::print("final_cpsr: {:08x}\n", jit.Cpsr());
|
||||
fmt::print("final_fpsr: {:08x}\n", mask_fpsr_cum_bits ? jit.Fpscr() & 0xffffff00 : jit.Fpscr());
|
||||
|
||||
fmt::print("mod_mem: ");
|
||||
for (auto [addr, value] : jit_env.modified_memory) {
|
||||
fmt::print("{:08x}:{:02x} ", addr, value);
|
||||
}
|
||||
fmt::print("\n");
|
||||
|
||||
fmt::print("interrupts:\n");
|
||||
for (const auto& i : jit_env.interrupts) {
|
||||
std::puts(i.c_str());
|
||||
}
|
||||
|
||||
fmt::print("===\n");
|
||||
jit.DumpDisassembly();
|
||||
}
|
||||
}
|
||||
|
||||
Dynarmic::A64::UserConfig GetA64UserConfig(A64TestEnv& jit_env, bool noopt) {
|
||||
Dynarmic::A64::UserConfig jit_user_config{&jit_env};
|
||||
jit_user_config.optimizations &= ~OptimizationFlag::FastDispatch;
|
||||
Dynarmic::A64::UserConfig jit_user_config{};
|
||||
jit_user_config.callbacks = &jit_env;
|
||||
jit_user_config.optimizations = all_safe_optimizations;
|
||||
// The below corresponds to the settings for qemu's aarch64_max_initfn
|
||||
jit_user_config.dczid_el0 = 7;
|
||||
jit_user_config.ctr_el0 = 0x80038003;
|
||||
|
@ -499,7 +503,8 @@ void RunTestInstance(Dynarmic::A64::Jit& jit,
|
|||
const u32 fpcr,
|
||||
const u64 initial_sp,
|
||||
const u64 start_address,
|
||||
const size_t ticks_left) {
|
||||
const size_t ticks_left,
|
||||
const bool show_disas) {
|
||||
jit.ClearCache();
|
||||
|
||||
for (size_t jit_rerun_count = 0; jit_rerun_count < num_jit_reruns; ++jit_rerun_count) {
|
||||
|
@ -522,59 +527,53 @@ void RunTestInstance(Dynarmic::A64::Jit& jit,
|
|||
jit.Run();
|
||||
}
|
||||
|
||||
fmt::print("instructions:");
|
||||
if (show_disas) {
|
||||
fmt::print("instructions:\n");
|
||||
auto current_pc = start_address;
|
||||
for (u32 instruction : instructions) {
|
||||
fmt::print(" {:08x}", instruction);
|
||||
fmt::print("{}", Dynarmic::Common::DisassembleAArch64(instruction, current_pc));
|
||||
current_pc += 4;
|
||||
}
|
||||
fmt::print("\n");
|
||||
|
||||
fmt::print("initial_regs:");
|
||||
for (u64 i : regs) {
|
||||
for (u64 i : regs)
|
||||
fmt::print(" {:016x}", i);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("initial_vecs:");
|
||||
for (auto i : vecs) {
|
||||
for (auto i : vecs)
|
||||
fmt::print(" {:016x}:{:016x}", i[0], i[1]);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("initial_sp: {:016x}\n", initial_sp);
|
||||
fmt::print("initial_pstate: {:08x}\n", pstate);
|
||||
fmt::print("initial_fpcr: {:08x}\n", fpcr);
|
||||
|
||||
fmt::print("final_regs:");
|
||||
for (u64 i : jit.GetRegisters()) {
|
||||
for (u64 i : jit.GetRegisters())
|
||||
fmt::print(" {:016x}", i);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("final_vecs:");
|
||||
for (auto i : jit.GetVectors()) {
|
||||
for (auto i : jit.GetVectors())
|
||||
fmt::print(" {:016x}:{:016x}", i[0], i[1]);
|
||||
}
|
||||
fmt::print("\n");
|
||||
fmt::print("final_sp: {:016x}\n", jit.GetSP());
|
||||
fmt::print("final_pc: {:016x}\n", jit.GetPC());
|
||||
fmt::print("final_pstate: {:08x}\n", jit.GetPstate());
|
||||
fmt::print("final_fpcr: {:08x}\n", jit.GetFpcr());
|
||||
fmt::print("final_qc : {}\n", FP::FPSR{jit.GetFpsr()}.QC());
|
||||
|
||||
fmt::print("mod_mem:");
|
||||
for (auto [addr, value] : jit_env.modified_memory) {
|
||||
for (auto [addr, value] : jit_env.modified_memory)
|
||||
fmt::print(" {:08x}:{:02x}", addr, value);
|
||||
}
|
||||
fmt::print("\n");
|
||||
|
||||
fmt::print("interrupts:\n");
|
||||
for (const auto& i : jit_env.interrupts) {
|
||||
for (const auto& i : jit_env.interrupts)
|
||||
std::puts(i.c_str());
|
||||
}
|
||||
|
||||
fmt::print("===\n");
|
||||
jit.DumpDisassembly();
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void TestThumb(size_t num_instructions, size_t num_iterations, bool noopt) {
|
||||
void TestThumb(size_t num_instructions, size_t num_iterations, bool noopt, bool show_disas) {
|
||||
ThumbTestEnv jit_env{};
|
||||
Dynarmic::A32::Jit jit{GetA32UserConfig(jit_env, noopt)};
|
||||
|
||||
|
@ -597,11 +596,11 @@ void TestThumb(size_t num_instructions, size_t num_iterations, bool noopt) {
|
|||
}
|
||||
|
||||
regs[15] = start_address;
|
||||
RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, num_instructions);
|
||||
RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, num_instructions, show_disas);
|
||||
}
|
||||
}
|
||||
|
||||
void TestArm(size_t num_instructions, size_t num_iterations, bool noopt) {
|
||||
void TestArm(size_t num_instructions, size_t num_iterations, bool noopt, bool show_disas) {
|
||||
ArmTestEnv jit_env{};
|
||||
Dynarmic::A32::Jit jit{GetA32UserConfig(jit_env, noopt)};
|
||||
|
||||
|
@ -623,11 +622,11 @@ void TestArm(size_t num_instructions, size_t num_iterations, bool noopt) {
|
|||
}
|
||||
|
||||
regs[15] = start_address;
|
||||
RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, num_instructions);
|
||||
RunTestInstance(jit, jit_env, regs, ext_reg, instructions, cpsr, fpcr, num_instructions, show_disas);
|
||||
}
|
||||
}
|
||||
|
||||
void TestA64(size_t num_instructions, size_t num_iterations, bool noopt) {
|
||||
void TestA64(size_t num_instructions, size_t num_iterations, bool noopt, bool show_disas) {
|
||||
A64TestEnv jit_env{};
|
||||
Dynarmic::A64::Jit jit{GetA64UserConfig(jit_env, noopt)};
|
||||
|
||||
|
@ -649,7 +648,7 @@ void TestA64(size_t num_instructions, size_t num_iterations, bool noopt) {
|
|||
instructions.emplace_back(GenRandomA64Inst(static_cast<u32>(start_address + 4 * instructions.size()), i == num_instructions - 1));
|
||||
}
|
||||
|
||||
RunTestInstance(jit, jit_env, regs, vecs, instructions, pstate, fpcr, initial_sp, start_address, num_instructions);
|
||||
RunTestInstance(jit, jit_env, regs, vecs, instructions, pstate, fpcr, initial_sp, start_address, num_instructions, show_disas);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -677,6 +676,7 @@ int main(int argc, char* argv[]) {
|
|||
const auto instruction_count = str2sz(argv[3]);
|
||||
const auto iterator_count = str2sz(argv[4]);
|
||||
const bool noopt = argc == 6 && (strcmp(argv[5], "noopt") == 0);
|
||||
const bool show_disas = argc == 6 && (strcmp(argv[5], "disas") == 0);
|
||||
|
||||
if (!seed || !instruction_count || !iterator_count) {
|
||||
fmt::print("invalid numeric arguments\n");
|
||||
|
@ -686,11 +686,11 @@ int main(int argc, char* argv[]) {
|
|||
detail::g_rand_int_generator.seed(static_cast<std::mt19937::result_type>(*seed));
|
||||
|
||||
if (strcmp(argv[1], "thumb") == 0) {
|
||||
TestThumb(*instruction_count, *iterator_count, noopt);
|
||||
TestThumb(*instruction_count, *iterator_count, noopt, show_disas);
|
||||
} else if (strcmp(argv[1], "arm") == 0) {
|
||||
TestArm(*instruction_count, *iterator_count, noopt);
|
||||
TestArm(*instruction_count, *iterator_count, noopt, show_disas);
|
||||
} else if (strcmp(argv[1], "a64") == 0) {
|
||||
TestA64(*instruction_count, *iterator_count, noopt);
|
||||
TestA64(*instruction_count, *iterator_count, noopt, show_disas);
|
||||
} else {
|
||||
fmt::print("unrecognized instruction class\n");
|
||||
return 1;
|
||||
|
|
3
externals/dynarmic/tests/test_reader.cpp
vendored
3
externals/dynarmic/tests/test_reader.cpp
vendored
|
@ -158,7 +158,8 @@ void RunTestInstance(Dynarmic::A32::Jit& jit,
|
|||
}
|
||||
|
||||
A64::UserConfig GetA64UserConfig(A64TestEnv& jit_env, bool noopt) {
|
||||
A64::UserConfig jit_user_config{&jit_env};
|
||||
A64::UserConfig jit_user_config{};
|
||||
jit_user_config.callbacks = &jit_env;
|
||||
jit_user_config.optimizations &= ~OptimizationFlag::FastDispatch;
|
||||
// The below corresponds to the settings for qemu's aarch64_max_initfn
|
||||
jit_user_config.dczid_el0 = 7;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue