[dynarmic] Refactoring to reduce latency hit from recompilation #358

Open
Lizzie wants to merge 22 commits from liz-dynarmic-latency-improvments into master
3 changed files with 28 additions and 49 deletions
Showing only changes of commit 1e15fe1e52 - Show all commits

View file

@ -210,12 +210,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* pa
config.wall_clock_cntpct = m_uses_wall_clock; config.wall_clock_cntpct = m_uses_wall_clock;
config.enable_cycle_counting = !m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock;
// Code cache size // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
#ifdef ARCHITECTURE_arm64
config.code_cache_size = std::uint32_t(128_MiB); config.code_cache_size = std::uint32_t(128_MiB);
#else
config.code_cache_size = std::uint32_t(512_MiB);
#endif
// Allow memory fault handling to work // Allow memory fault handling to work
if (m_system.DebuggerEnabled()) { if (m_system.DebuggerEnabled()) {

View file

@ -269,12 +269,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
config.wall_clock_cntpct = m_uses_wall_clock; config.wall_clock_cntpct = m_uses_wall_clock;
config.enable_cycle_counting = !m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock;
// Code cache size // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
#ifdef ARCHITECTURE_arm64
config.code_cache_size = std::uint32_t(128_MiB); config.code_cache_size = std::uint32_t(128_MiB);
#else
config.code_cache_size = std::uint32_t(512_MiB);
#endif
// Allow memory fault handling to work // Allow memory fault handling to work
if (m_system.DebuggerEnabled()) { if (m_system.DebuggerEnabled()) {

View file

@ -70,11 +70,9 @@ struct detail {
return std::make_tuple(mask, expect); return std::make_tuple(mask, expect);
} }
/** /// @brief Generates the masks and shifts for each argument.
* Generates the masks and shifts for each argument. /// A '-' in a bitstring indicates that we don't care about that value.
* A '-' in a bitstring indicates that we don't care about that value. /// An argument is specified by a continuous string of the same character.
* An argument is specified by a continuous string of the same character.
*/
template<size_t N> template<size_t N>
static consteval auto GetArgInfo(std::array<char, opcode_bitsize> bitstring) { static consteval auto GetArgInfo(std::array<char, opcode_bitsize> bitstring) {
std::array<opcode_type, N> masks = {}; std::array<opcode_type, N> masks = {};
@ -98,7 +96,6 @@ struct detail {
if constexpr (N > 0) { if constexpr (N > 0) {
const size_t bit_position = opcode_bitsize - i - 1; const size_t bit_position = opcode_bitsize - i - 1;
if (arg_index >= N) if (arg_index >= N)
throw std::out_of_range("Unexpected field"); throw std::out_of_range("Unexpected field");
@ -109,20 +106,16 @@ struct detail {
} }
} }
} }
#if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__)
// Avoids a MSVC ICE, and avoids Android NDK issue. // Avoids a MSVC ICE, and avoids Android NDK issue.
ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; }));
#endif #endif
return std::make_tuple(masks, shifts); return std::make_tuple(masks, shifts);
} }
/** /// @brief This struct's Make member function generates a lambda which decodes an instruction
* This struct's Make member function generates a lambda which decodes an instruction based on /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is
* the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a /// provided as a template argument.
* template argument.
*/
template<typename FnT> template<typename FnT>
struct VisitorCaller; struct VisitorCaller;
@ -130,36 +123,36 @@ struct detail {
# pragma warning(push) # pragma warning(push)
# pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning)
#endif #endif
template<typename Visitor, typename... Args, typename CallRetT> template<typename V, typename... Args, typename ReturnType>
struct VisitorCaller<CallRetT (Visitor::*)(Args...)> { struct VisitorCaller<ReturnType (V::*)(Args...)> {
template<size_t... iota> template<size_t... iota>
static auto Make(std::integer_sequence<size_t, iota...>, static constexpr auto Make(std::integer_sequence<size_t, iota...>,
CallRetT (Visitor::*const fn)(Args...), ReturnType (V::*const fn)(Args...),
const std::array<opcode_type, sizeof...(iota)> arg_masks, const std::array<opcode_type, sizeof...(iota)> arg_masks,
const std::array<size_t, sizeof...(iota)> arg_shifts) { const std::array<size_t, sizeof...(iota)> arg_shifts) {
static_assert(std::is_same_v<visitor_type, Visitor>, "Member function is not from Matcher's Visitor"); static_assert(std::is_same_v<visitor_type, V>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) {
(void)instruction; (void)instruction;
(void)arg_masks; (void)arg_masks;
(void)arg_shifts; (void)arg_shifts;
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...); return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
}; };
} }
}; };
template<typename Visitor, typename... Args, typename CallRetT> template<typename V, typename... Args, typename ReturnType>
struct VisitorCaller<CallRetT (Visitor::*)(Args...) const> { struct VisitorCaller<ReturnType (V::*)(Args...) const> {
template<size_t... iota> template<size_t... iota>
static auto Make(std::integer_sequence<size_t, iota...>, static constexpr auto Make(std::integer_sequence<size_t, iota...>,
CallRetT (Visitor::*const fn)(Args...) const, ReturnType (V::*const fn)(Args...) const,
const std::array<opcode_type, sizeof...(iota)> arg_masks, const std::array<opcode_type, sizeof...(iota)> arg_masks,
const std::array<size_t, sizeof...(iota)> arg_shifts) { const std::array<size_t, sizeof...(iota)> arg_shifts) {
static_assert(std::is_same_v<visitor_type, const Visitor>, "Member function is not from Matcher's Visitor"); static_assert(std::is_same_v<visitor_type, const V>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) {
(void)instruction; (void)instruction;
(void)arg_masks; (void)arg_masks;
(void)arg_shifts; (void)arg_shifts;
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...); return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
}; };
} }
}; };
@ -167,22 +160,16 @@ struct detail {
# pragma warning(pop) # pragma warning(pop)
#endif #endif
/** /// @brief Creates a matcher that can match and parse instructions based on bitstring.
* Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
* See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template<auto bitstring, typename F>
*/ static constexpr auto GetMatcher(F fn, const char* const name) {
template<auto bitstring, typename FnT> constexpr size_t args_count = mcl::parameter_count_v<F>;
static auto GetMatcher(FnT fn, const char* const name) {
constexpr size_t args_count = mcl::parameter_count_v<FnT>;
constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring));
constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring));
constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring));
constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring));
const auto proxy_fn = VisitorCaller<F>::Make(std::make_index_sequence<args_count>(), fn, arg_masks, arg_shifts);
using Iota = std::make_index_sequence<args_count>;
const auto proxy_fn = VisitorCaller<FnT>::Make(Iota(), fn, arg_masks, arg_shifts);
return MatcherT(name, mask, expect, proxy_fn); return MatcherT(name, mask, expect, proxy_fn);
} }
}; };