[dynarmic] Refactoring to reduce latency hit from recompilation #358

Open
Lizzie wants to merge 22 commits from liz-dynarmic-latency-improvments into master
6 changed files with 38 additions and 63 deletions
Showing only changes of commit 34c4b22f4e - Show all commits

View file

@ -9,12 +9,9 @@
#pragma once
#include <string>
#include <utility>
#include <fmt/format.h>
#include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/interface/A32/coprocessor_util.h"
#include "dynarmic/ir/cond.h"
@ -89,24 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) {
inline size_t RegNumber(Reg reg) {
ASSERT(reg != Reg::INVALID_REG);
return static_cast<size_t>(reg);
return size_t(reg);
}
inline size_t RegNumber(ExtReg reg) {
if (IsSingleExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::S0);
return size_t(reg) - size_t(ExtReg::S0);
} else if (IsDoubleExtReg(reg)) {
return size_t(reg) - size_t(ExtReg::D0);
}
if (IsDoubleExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::D0);
}
if (IsQuadExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::Q0);
}
ASSERT_MSG(false, "Invalid extended register");
return 0;
ASSERT(IsQuadExtReg(reg));
return size_t(reg) - size_t(ExtReg::Q0);
}
inline Reg operator+(Reg reg, size_t number) {

View file

@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher<Visitor, u32>;
template<typename V>
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
std::vector<ASIMDMatcher<V>> table = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
std::vector<std::pair<const char*, ASIMDMatcher<V>>> table = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./asimd.inc"
#undef INST
};
// Exceptions to the rule of thumb.
const std::set<std::string> comes_first{
"VBIC, VMOV, VMVN, VORR (immediate)",
@ -53,19 +50,21 @@ std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
"VQDMULH (scalar)",
"VQRDMULH (scalar)",
};
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
return comes_first.count(matcher.GetName()) > 0;
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
return comes_first.count(e.first) > 0;
});
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
return comes_last.count(matcher.GetName()) == 0;
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
return comes_last.count(e.first) == 0;
});
// If a matcher has more bits in its mask it is more specific, so it should come first.
std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) {
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) {
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
});
return table;
std::vector<ASIMDMatcher<V>> final_table;
std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) {
return e.second;
});
return final_table;
}
template<typename V>

View file

@ -30,22 +30,18 @@ std::optional<std::reference_wrapper<const VFPMatcher<V>>> DecodeVFP(u32 instruc
static const struct Tables {
Table unconditional;
Table conditional;
} tables = [] {
} tables = []() {
Table list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./vfp.inc"
#undef INST
};
const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
return (matcher.GetMask() & 0xF0000000) == 0xF0000000;
});
return Tables{
Table{list.begin(), division},
Table{division, list.end()},
Table{list.begin(), it},
Table{it, list.end()},
};
}();

View file

@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) {
template<typename V>
constexpr DecodeTable<V> GetDecodeTable() {
std::vector<Matcher<V>> list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
std::vector<std::pair<const char*, Matcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./a64.inc"
#undef INST
};
// If a matcher has more bits in its mask it is more specific, so it should come first.
std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) {
std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) {
// If a matcher has more bits in its mask it is more specific, so it should come first.
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
});
// Exceptions to the above rule of thumb.
std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
std::stable_partition(list.begin(), list.end(), [&](const auto& e) {
return std::set<std::string>{
"MOVI, MVNI, ORR, BIC (vector, immediate)",
"FMOV (vector, immediate)",
"Unallocated SIMD modified immediate",
}.count(matcher.GetName()) > 0;
}.count(e.first) > 0;
});
DecodeTable<V> table{};
for (size_t i = 0; i < table.size(); ++i) {
for (auto matcher : list) {
const auto expect = detail::ToFastLookupIndex(matcher.GetExpected());
const auto mask = detail::ToFastLookupIndex(matcher.GetMask());
for (auto const& e : list) {
const auto expect = detail::ToFastLookupIndex(e.second.GetExpected());
const auto mask = detail::ToFastLookupIndex(e.second.GetMask());
if ((i & mask) == expect) {
table[i].push_back(matcher);
table[i].push_back(e.second);
}
}
}

View file

@ -163,18 +163,18 @@ struct detail {
/// @brief Creates a matcher that can match and parse instructions based on bitstring.
/// See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
template<auto bitstring, typename F>
static constexpr auto GetMatcher(F fn, const char* const name) {
static constexpr auto GetMatcher(F fn) {
constexpr size_t args_count = mcl::parameter_count_v<F>;
constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring));
constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring));
constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring));
constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring));
const auto proxy_fn = VisitorCaller<F>::Make(std::make_index_sequence<args_count>(), fn, arg_masks, arg_shifts);
return MatcherT(name, mask, expect, proxy_fn);
return MatcherT(mask, expect, proxy_fn);
}
};
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn, name)
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn)
} // namespace detail
} // namespace Dynarmic::Decoder

View file

@ -31,14 +31,8 @@ public:
using visitor_type = Visitor;
using handler_return_type = typename Visitor::instruction_return_type;
using handler_function = std::function<handler_return_type(Visitor&, opcode_type)>;
Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func)
: name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {}
/// Gets the name of this type of instruction.
const char* GetName() const {
return name;
}
Matcher(opcode_type mask, opcode_type expected, handler_function func)
: mask{mask}, expected{expected}, fn{std::move(func)} {}
/// Gets the mask for this instruction.
opcode_type GetMask() const {
@ -70,7 +64,6 @@ public:
}
private:
const char* name;
opcode_type mask;
opcode_type expected;
handler_function fn;