Compare commits

...

9 commits

Author SHA1 Message Date
8a79bba0d4 [dynarmic] reduce matcher table noise and cache misses
All checks were successful
eden-license / license-header (pull_request) Successful in 31s
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
c21eea1304 [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
75c9ba2adb [docs] fastmem draft
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
7243704623 [dynarmic] fix tests_reader and tests_generator
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
da67f58a51 [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
f8040d3b52 [dynarmic] use small vector experiment
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
c30eadcb0d Fix license headers 2025-09-04 18:26:53 +02:00
2d9d7fafcb [dynarmic] reduce opt pass latency
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-09-04 18:26:53 +02:00
bbcd8aded6
Revert "[heap_tracker] Use ankerl map instead of rb tree (#249)" (#382)
This reverts commit c9a3baab5d.

this commit caused issues in ender magnolia or something, need to make
sure I didn't mess up the revert

Reviewed-on: #382
Reviewed-by: Shinmegumi <shinmegumi@eden-emu.dev>
Reviewed-by: Lizzie <lizzie@eden-emu.dev>
Reviewed-by: MaranBr <maranbr@outlook.com>
2025-09-04 16:04:42 +02:00
57 changed files with 2226 additions and 2511 deletions

View file

@ -262,23 +262,18 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
endif()
if (BOOST_NO_HEADERS)
target_link_libraries(common PUBLIC Boost::algorithm Boost::icl Boost::pool)
target_link_libraries(common PUBLIC Boost::algorithm Boost::icl Boost::pool)
else()
target_link_libraries(common PUBLIC Boost::headers)
target_link_libraries(common PUBLIC Boost::headers)
endif()
if (lz4_ADDED)
target_include_directories(common PRIVATE ${lz4_SOURCE_DIR}/lib)
target_include_directories(common PRIVATE ${lz4_SOURCE_DIR}/lib)
endif()
target_link_libraries(common PUBLIC fmt::fmt stb::headers Threads::Threads)
target_link_libraries(common PRIVATE lz4::lz4 LLVM::Demangle zstd::zstd)
if (TARGET unordered_dense::unordered_dense)
# weird quirk of system installs
target_link_libraries(common PUBLIC unordered_dense::unordered_dense)
endif()
if(ANDROID)
# For ASharedMemory_create
target_link_libraries(common PRIVATE android)

View file

@ -1,5 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -35,60 +33,68 @@ HeapTracker::~HeapTracker() = default;
void HeapTracker::Map(size_t virtual_offset, size_t host_offset, size_t length,
MemoryPermission perm, bool is_separate_heap) {
bool rebuild_required = false;
// When mapping other memory, map pages immediately.
if (!is_separate_heap) {
m_buffer.Map(virtual_offset, host_offset, length, perm, false);
return;
}
{
// We are mapping part of a separate heap and insert into mappings.
// We are mapping part of a separate heap.
std::scoped_lock lk{m_lock};
m_map_count++;
const auto it = m_mappings.insert_or_assign(virtual_offset, SeparateHeapMap{
auto* const map = new SeparateHeapMap{
.vaddr = virtual_offset,
.paddr = host_offset,
.size = length,
.tick = m_tick++,
.perm = perm,
.is_resident = false,
});
// Update tick before possible rebuild.
it.first->second.tick = m_tick++;
// Check if we need to rebuild.
if (m_resident_map_count >= m_max_resident_map_count)
rebuild_required = true;
// Map the area.
m_buffer.Map(it.first->first, it.first->second.paddr, it.first->second.size, it.first->second.perm, false);
// This map is now resident.
it.first->second.is_resident = true;
m_resident_map_count++;
m_resident_mappings.insert(*it.first);
};
// Insert into mappings.
m_map_count++;
m_mappings.insert(*map);
}
// A rebuild was required, so perform it now.
if (rebuild_required)
this->RebuildSeparateHeapAddressSpace();
// Finally, map.
this->DeferredMapSeparateHeap(virtual_offset);
}
void HeapTracker::Unmap(size_t virtual_offset, size_t size, bool is_separate_heap) {
// If this is a separate heap...
if (is_separate_heap) {
std::scoped_lock lk{m_lock};
const SeparateHeapMap key{
.vaddr = virtual_offset,
};
// Split at the boundaries of the region we are removing.
this->SplitHeapMapLocked(virtual_offset);
this->SplitHeapMapLocked(virtual_offset + size);
// Erase all mappings in range.
auto it = m_mappings.find(virtual_offset);
while (it != m_mappings.end() && it->first < virtual_offset + size) {
auto it = m_mappings.find(key);
while (it != m_mappings.end() && it->vaddr < virtual_offset + size) {
// Get underlying item.
auto* const item = std::addressof(*it);
// If resident, erase from resident map.
if (it->second.is_resident) {
if (item->is_resident) {
ASSERT(--m_resident_map_count >= 0);
m_resident_mappings.erase(m_resident_mappings.find(it->first));
m_resident_mappings.erase(m_resident_mappings.iterator_to(*item));
}
// Erase from map.
ASSERT(--m_map_count >= 0);
it = m_mappings.erase(it);
// Free the item.
delete item;
}
}
// Unmap pages.
m_buffer.Unmap(virtual_offset, size, false);
}
@ -110,51 +116,110 @@ void HeapTracker::Protect(size_t virtual_offset, size_t size, MemoryPermission p
{
std::scoped_lock lk2{m_lock};
const SeparateHeapMap key{
.vaddr = next,
};
// Try to get the next mapping corresponding to this address.
const auto it = m_mappings.find(next);
const auto it = m_mappings.nfind(key);
if (it == m_mappings.end()) {
// There are no separate heap mappings remaining.
next = end;
should_protect = true;
} else if (it->first == cur) {
} else if (it->vaddr == cur) {
// We are in range.
// Update permission bits.
it->second.perm = perm;
it->perm = perm;
// Determine next address and whether we should protect.
next = cur + it->second.size;
should_protect = it->second.is_resident;
next = cur + it->size;
should_protect = it->is_resident;
} else /* if (it->vaddr > cur) */ {
// We weren't in range, but there is a block coming up that will be.
next = it->first;
next = it->vaddr;
should_protect = true;
}
}
// Clamp to end.
next = std::min(next, end);
// Reprotect, if we need to.
if (should_protect)
if (should_protect) {
m_buffer.Protect(cur, next - cur, perm);
}
// Advance.
cur = next;
}
}
bool HeapTracker::DeferredMapSeparateHeap(u8* fault_address) {
if (m_buffer.IsInVirtualRange(fault_address)) {
return this->DeferredMapSeparateHeap(fault_address - m_buffer.VirtualBasePointer());
}
return false;
}
bool HeapTracker::DeferredMapSeparateHeap(size_t virtual_offset) {
bool rebuild_required = false;
{
std::scoped_lock lk{m_lock};
// Check to ensure this was a non-resident separate heap mapping.
const auto it = this->GetNearestHeapMapLocked(virtual_offset);
if (it == m_mappings.end() || it->is_resident) {
return false;
}
// Update tick before possible rebuild.
it->tick = m_tick++;
// Check if we need to rebuild.
if (m_resident_map_count > m_max_resident_map_count) {
rebuild_required = true;
}
// Map the area.
m_buffer.Map(it->vaddr, it->paddr, it->size, it->perm, false);
// This map is now resident.
it->is_resident = true;
m_resident_map_count++;
m_resident_mappings.insert(*it);
}
if (rebuild_required) {
// A rebuild was required, so perform it now.
this->RebuildSeparateHeapAddressSpace();
}
return true;
}
void HeapTracker::RebuildSeparateHeapAddressSpace() {
std::scoped_lock lk{m_rebuild_lock, m_lock};
ASSERT(!m_resident_mappings.empty());
// Dump half of the mappings.
//
// Despite being worse in theory, this has proven to be better in practice than more
// regularly dumping a smaller amount, because it significantly reduces average case
// lock contention.
std::size_t const desired_count = std::min(m_resident_map_count, m_max_resident_map_count) / 2;
std::size_t const evict_count = m_resident_map_count - desired_count;
const size_t desired_count = std::min(m_resident_map_count, m_max_resident_map_count) / 2;
const size_t evict_count = m_resident_map_count - desired_count;
auto it = m_resident_mappings.begin();
for (std::size_t i = 0; i < evict_count && it != m_resident_mappings.end(); i++) {
for (size_t i = 0; i < evict_count && it != m_resident_mappings.end(); i++) {
// Unmark and unmap.
it->second.is_resident = false;
m_buffer.Unmap(it->first, it->second.size, false);
it->is_resident = false;
m_buffer.Unmap(it->vaddr, it->size, false);
// Advance.
ASSERT(--m_resident_map_count >= 0);
it = m_resident_mappings.erase(it);
@ -163,32 +228,53 @@ void HeapTracker::RebuildSeparateHeapAddressSpace() {
void HeapTracker::SplitHeapMap(VAddr offset, size_t size) {
std::scoped_lock lk{m_lock};
this->SplitHeapMapLocked(offset);
this->SplitHeapMapLocked(offset + size);
}
void HeapTracker::SplitHeapMapLocked(VAddr offset) {
auto it = this->GetNearestHeapMapLocked(offset);
if (it != m_mappings.end() && it->first != offset) {
// Adjust left iterator
auto const orig_size = it->second.size;
auto const left_size = offset - it->first;
it->second.size = left_size;
// Insert the new right map.
auto const right = SeparateHeapMap{
.paddr = it->second.paddr + left_size,
.size = orig_size - left_size,
.tick = it->second.tick,
.perm = it->second.perm,
.is_resident = it->second.is_resident,
};
m_map_count++;
auto rit = m_mappings.insert_or_assign(it->first + left_size, right);
if (rit.first->second.is_resident) {
m_resident_map_count++;
m_resident_mappings.insert(*rit.first);
}
const auto it = this->GetNearestHeapMapLocked(offset);
if (it == m_mappings.end() || it->vaddr == offset) {
// Not contained or no split required.
return;
}
// Cache the original values.
auto* const left = std::addressof(*it);
const size_t orig_size = left->size;
// Adjust the left map.
const size_t left_size = offset - left->vaddr;
left->size = left_size;
// Create the new right map.
auto* const right = new SeparateHeapMap{
.vaddr = left->vaddr + left_size,
.paddr = left->paddr + left_size,
.size = orig_size - left_size,
.tick = left->tick,
.perm = left->perm,
.is_resident = left->is_resident,
};
// Insert the new right map.
m_map_count++;
m_mappings.insert(*right);
// If resident, also insert into resident map.
if (right->is_resident) {
m_resident_map_count++;
m_resident_mappings.insert(*right);
}
}
HeapTracker::AddrTree::iterator HeapTracker::GetNearestHeapMapLocked(VAddr offset) {
const SeparateHeapMap key{
.vaddr = offset,
};
return m_mappings.find(key);
}
} // namespace Common

View file

@ -1,55 +1,93 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <mutex>
#include <set>
#include <shared_mutex>
#include <ankerl/unordered_dense.h>
#include "common/host_memory.h"
#include "common/intrusive_red_black_tree.h"
namespace Common {
struct SeparateHeapMap {
PAddr paddr{}; //8
std::size_t size{}; //8 (16)
std::size_t tick{}; //8 (24)
// 4 bits needed, sync with host_memory.h if needed
MemoryPermission perm : 4 = MemoryPermission::Read;
bool is_resident : 1 = false;
Common::IntrusiveRedBlackTreeNode addr_node{};
Common::IntrusiveRedBlackTreeNode tick_node{};
VAddr vaddr{};
PAddr paddr{};
size_t size{};
size_t tick{};
MemoryPermission perm{};
bool is_resident{};
};
struct SeparateHeapMapAddrComparator {
static constexpr int Compare(const SeparateHeapMap& lhs, const SeparateHeapMap& rhs) {
if (lhs.vaddr < rhs.vaddr) {
return -1;
} else if (lhs.vaddr <= (rhs.vaddr + rhs.size - 1)) {
return 0;
} else {
return 1;
}
}
};
struct SeparateHeapMapTickComparator {
static constexpr int Compare(const SeparateHeapMap& lhs, const SeparateHeapMap& rhs) {
if (lhs.tick < rhs.tick) {
return -1;
} else if (lhs.tick > rhs.tick) {
return 1;
} else {
return SeparateHeapMapAddrComparator::Compare(lhs, rhs);
}
}
};
static_assert(sizeof(SeparateHeapMap) == 32); //half a cache line! good for coherency
class HeapTracker {
public:
explicit HeapTracker(Common::HostMemory& buffer);
~HeapTracker();
void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm, bool is_separate_heap);
void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm,
bool is_separate_heap);
void Unmap(size_t virtual_offset, size_t size, bool is_separate_heap);
void Protect(size_t virtual_offset, size_t length, MemoryPermission perm);
inline u8* VirtualBasePointer() noexcept {
u8* VirtualBasePointer() {
return m_buffer.VirtualBasePointer();
}
bool DeferredMapSeparateHeap(u8* fault_address);
bool DeferredMapSeparateHeap(size_t virtual_offset);
private:
// TODO: You may want to "fake-map" the first 2GB of 64-bit address space
// and dedicate it entirely to a recursive PTE mapping :)
// However Ankerl is way better than using an RB tree, in all senses
using AddrTree = ankerl::unordered_dense::map<VAddr, SeparateHeapMap>;
AddrTree m_mappings;
using TicksTree = ankerl::unordered_dense::map<VAddr, SeparateHeapMap>;
TicksTree m_resident_mappings;
using AddrTreeTraits =
Common::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&SeparateHeapMap::addr_node>;
using AddrTree = AddrTreeTraits::TreeType<SeparateHeapMapAddrComparator>;
using TickTreeTraits =
Common::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&SeparateHeapMap::tick_node>;
using TickTree = TickTreeTraits::TreeType<SeparateHeapMapTickComparator>;
AddrTree m_mappings{};
TickTree m_resident_mappings{};
private:
void SplitHeapMap(VAddr offset, size_t size);
void SplitHeapMapLocked(VAddr offset);
AddrTree::iterator GetNearestHeapMapLocked(VAddr offset);
void RebuildSeparateHeapAddressSpace();
inline HeapTracker::AddrTree::iterator GetNearestHeapMapLocked(VAddr offset) noexcept {
return m_mappings.find(offset);
}
private:
Common::HostMemory& m_buffer;
const s64 m_max_resident_map_count;
std::shared_mutex m_rebuild_lock{};
std::mutex m_lock{};
s64 m_map_count{};

View file

@ -3,9 +3,47 @@
#ifdef __linux__
//#include "common/signal_chain.h"
#include "common/signal_chain.h"
#include "core/arm/dynarmic/arm_dynarmic.h"
//#include "core/hle/kernel/k_process.h"
//#include "core/memory.h"
#include "core/hle/kernel/k_process.h"
#include "core/memory.h"
namespace Core {
namespace {
thread_local Core::Memory::Memory* g_current_memory{};
std::once_flag g_registered{};
struct sigaction g_old_segv {};
void HandleSigSegv(int sig, siginfo_t* info, void* ctx) {
if (g_current_memory && g_current_memory->InvalidateSeparateHeap(info->si_addr)) {
return;
}
return g_old_segv.sa_sigaction(sig, info, ctx);
}
} // namespace
ScopedJitExecution::ScopedJitExecution(Kernel::KProcess* process) {
g_current_memory = std::addressof(process->GetMemory());
}
ScopedJitExecution::~ScopedJitExecution() {
g_current_memory = nullptr;
}
void ScopedJitExecution::RegisterHandler() {
std::call_once(g_registered, [] {
struct sigaction sa {};
sa.sa_sigaction = &HandleSigSegv;
sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
Common::SigAction(SIGSEGV, std::addressof(sa), std::addressof(g_old_segv));
});
}
} // namespace Core
#endif

View file

@ -26,4 +26,24 @@ constexpr HaltReason TranslateHaltReason(Dynarmic::HaltReason hr) {
return static_cast<HaltReason>(hr);
}
#ifdef __linux__
class ScopedJitExecution {
public:
explicit ScopedJitExecution(Kernel::KProcess* process);
~ScopedJitExecution();
static void RegisterHandler();
};
#else
class ScopedJitExecution {
public:
explicit ScopedJitExecution(Kernel::KProcess* process) {}
~ScopedJitExecution() {}
static void RegisterHandler() {}
};
#endif
} // namespace Core

View file

@ -210,12 +210,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* pa
config.wall_clock_cntpct = m_uses_wall_clock;
config.enable_cycle_counting = !m_uses_wall_clock;
// Code cache size
#ifdef ARCHITECTURE_arm64
// Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
config.code_cache_size = std::uint32_t(128_MiB);
#else
config.code_cache_size = std::uint32_t(512_MiB);
#endif
// Allow memory fault handling to work
if (m_system.DebuggerEnabled()) {
@ -343,11 +339,15 @@ bool ArmDynarmic32::IsInThumbMode() const {
}
HaltReason ArmDynarmic32::RunThread(Kernel::KThread* thread) {
ScopedJitExecution sj(thread->GetOwnerProcess());
m_jit->ClearExclusiveState();
return TranslateHaltReason(m_jit->Run());
}
HaltReason ArmDynarmic32::StepThread(Kernel::KThread* thread) {
ScopedJitExecution sj(thread->GetOwnerProcess());
m_jit->ClearExclusiveState();
return TranslateHaltReason(m_jit->Step());
}
@ -389,6 +389,7 @@ ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProc
m_cp15(std::make_shared<DynarmicCP15>(*this)), m_core_index{core_index} {
auto& page_table_impl = process->GetPageTable().GetBasePageTable().GetImpl();
m_jit = MakeJit(&page_table_impl);
ScopedJitExecution::RegisterHandler();
}
ArmDynarmic32::~ArmDynarmic32() = default;

View file

@ -269,12 +269,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
config.wall_clock_cntpct = m_uses_wall_clock;
config.enable_cycle_counting = !m_uses_wall_clock;
// Code cache size
#ifdef ARCHITECTURE_arm64
// Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB
config.code_cache_size = std::uint32_t(128_MiB);
#else
config.code_cache_size = std::uint32_t(512_MiB);
#endif
// Allow memory fault handling to work
if (m_system.DebuggerEnabled()) {
@ -374,11 +370,15 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
}
HaltReason ArmDynarmic64::RunThread(Kernel::KThread* thread) {
ScopedJitExecution sj(thread->GetOwnerProcess());
m_jit->ClearExclusiveState();
return TranslateHaltReason(m_jit->Run());
}
HaltReason ArmDynarmic64::StepThread(Kernel::KThread* thread) {
ScopedJitExecution sj(thread->GetOwnerProcess());
m_jit->ClearExclusiveState();
return TranslateHaltReason(m_jit->Step());
}
@ -418,6 +418,7 @@ ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProc
auto& page_table = process->GetPageTable().GetBasePageTable();
auto& page_table_impl = page_table.GetImpl();
m_jit = MakeJit(&page_table_impl, page_table.GetAddressSpaceWidth());
ScopedJitExecution::RegisterHandler();
}
ArmDynarmic64::~ArmDynarmic64() = default;

View file

@ -1266,6 +1266,10 @@ void KProcess::InitializeInterfaces() {
#ifdef HAS_NCE
if (this->IsApplication() && Settings::IsNceEnabled()) {
// Register the scoped JIT handler before creating any NCE instances
// so that its signal handler will appear first in the signal chain.
Core::ScopedJitExecution::RegisterHandler();
for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
m_arm_interfaces[i] = std::make_unique<Core::ArmNce>(m_kernel.System(), true, i);
}

View file

@ -61,7 +61,8 @@ struct Memory::Impl {
}
#ifdef __linux__
buffer.emplace(system.DeviceMemory().buffer);
heap_tracker.emplace(system.DeviceMemory().buffer);
buffer = std::addressof(*heap_tracker);
#else
buffer = std::addressof(system.DeviceMemory().buffer);
#endif
@ -1023,8 +1024,9 @@ struct Memory::Impl {
std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
std::mutex sys_core_guard;
std::optional<Common::HeapTracker> heap_tracker;
#ifdef __linux__
std::optional<Common::HeapTracker> buffer;
Common::HeapTracker* buffer{};
#else
Common::HostMemory* buffer{};
#endif
@ -1228,7 +1230,22 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
if (rasterizer) {
impl->InvalidateGPUMemory(ptr, size);
}
#ifdef __linux__
if (!rasterizer && mapped) {
impl->buffer->DeferredMapSeparateHeap(GetInteger(vaddr));
}
#endif
return mapped && ptr != nullptr;
}
bool Memory::InvalidateSeparateHeap(void* fault_address) {
#ifdef __linux__
return impl->buffer->DeferredMapSeparateHeap(static_cast<u8*>(fault_address));
#else
return false;
#endif
}
} // namespace Core::Memory

View file

@ -487,8 +487,13 @@ public:
* marked as debug or non-debug.
*/
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
bool InvalidateSeparateHeap(void* fault_address);
private:
Core::System& system;

View file

@ -23,9 +23,6 @@ option(DYNARMIC_USE_PRECOMPILED_HEADERS "Use precompiled headers" ON)
option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF)
option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF)
option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT})
if (NOT DEFINED DYNARMIC_FRONTENDS)
set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable")
endif()
# Default to a Release build
if (NOT CMAKE_BUILD_TYPE)

View file

@ -0,0 +1,5 @@
# Fast memory (Fastmem)
![Host to guest translation](./HostToGuest.svg)
![Fastmem translation](./Fastmem.svg)

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 128 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 98 KiB

View file

@ -16,19 +16,31 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun
The member functions on `RegAlloc` are just a combination of the above concepts.
The following registers are reserved for internal use and should NOT participate in register allocation:
- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access.
- `%rsp`: Stack pointer.
The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate.
Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration.
### `Scratch`
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr)
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm)
```c++
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr);
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm);
```
At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope.
### Pure `Use`
Xbyak::Reg64 UseGpr(Argument& arg);
Xbyak::Xmm UseXmm(Argument& arg);
OpArg UseOpArg(Argument& arg);
void Use(Argument& arg, HostLoc host_loc);
```c++
Xbyak::Reg64 UseGpr(Argument& arg);
Xbyak::Xmm UseXmm(Argument& arg);
OpArg UseOpArg(Argument& arg);
void Use(Argument& arg, HostLoc host_loc);
```
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it
@ -39,9 +51,11 @@ This register **must not** have it's value changed.
### `UseScratch`
Xbyak::Reg64 UseScratchGpr(Argument& arg);
Xbyak::Xmm UseScratchXmm(Argument& arg);
void UseScratch(Argument& arg, HostLoc host_loc);
```c++
Xbyak::Reg64 UseScratchGpr(Argument& arg);
Xbyak::Xmm UseScratchXmm(Argument& arg);
void UseScratch(Argument& arg, HostLoc host_loc);
```
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it
@ -55,7 +69,9 @@ You are free to modify the value in the register. The register is discarded at t
A `Define` is the defintion of a value. This is the only time when a value may be set.
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
```c++
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
```
By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the
value to the specified register `reg`.
@ -64,7 +80,9 @@ value to the specified register `reg`.
Adding a `Define` to an existing value.
void DefineValue(IR::Inst* inst, Argument& arg);
```c++
void DefineValue(IR::Inst* inst, Argument& arg);
```
You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are
emitted.

View file

@ -23,15 +23,17 @@ One complication dynarmic has is that a compiled block is not uniquely identifia
the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by
computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
u64 LocationDescriptor::UniqueHash() const {
// This value MUST BE UNIQUE.
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
u64 pc_u64 = u64(arm_pc) << 32;
u64 fpscr_u64 = u64(fpscr.Value());
u64 t_u64 = cpsr.T() ? 1 : 0;
u64 e_u64 = cpsr.E() ? 2 : 0;
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
}
```c++
u64 LocationDescriptor::UniqueHash() const {
// This value MUST BE UNIQUE.
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
u64 pc_u64 = u64(arm_pc) << 32;
u64 fpscr_u64 = u64(fpscr.Value());
u64 t_u64 = cpsr.T() ? 1 : 0;
u64 e_u64 = cpsr.E() ? 2 : 0;
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
}
```
## Our implementation isn't actually a stack
@ -49,97 +51,107 @@ host addresses for the corresponding the compiled blocks.
size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8
showed degraded performance.
struct JitState {
// ...
```c++
struct JitState {
// ...
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors;
std::array<u64, RSBSize> rsb_codeptrs;
void ResetRSB();
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors;
std::array<u64, RSBSize> rsb_codeptrs;
void ResetRSB();
// ...
};
// ...
};
```
### RSB Push
We insert our prediction at the insertion point iff the RSB doesn't already
contain a prediction with the same `UniqueHash`.
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
```c++
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
ASSERT(inst->GetArg(0).IsImmediate());
u64 imm64 = inst->GetArg(0).GetU64();
ASSERT(inst->GetArg(0).IsImmediate());
u64 imm64 = inst->GetArg(0).GetU64();
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
? u64(unique_hash_to_code_ptr[imm64])
: u64(code->GetReturnFromRunCodeAddress());
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
? u64(unique_hash_to_code_ptr[imm64])
: u64(code->GetReturnFromRunCodeAddress());
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
code->add(index_reg, 1);
code->and_(index_reg, u32(JitState::RSBSize - 1));
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
code->add(index_reg, 1);
code->and_(index_reg, u32(JitState::RSBSize - 1));
code->mov(loc_desc_reg, u64(imm64));
CodePtr patch_location = code->getCurr<CodePtr>();
patch_unique_hash_locations[imm64].emplace_back(patch_location);
code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
code->EnsurePatchLocationSize(patch_location, 10);
code->mov(loc_desc_reg, u64(imm64));
CodePtr patch_location = code->getCurr<CodePtr>();
patch_unique_hash_locations[imm64].emplace_back(patch_location);
code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
code->EnsurePatchLocationSize(patch_location, 10);
Xbyak::Label label;
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->je(label, code->T_SHORT);
}
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
code->L(label);
Xbyak::Label label;
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->je(label, code->T_SHORT);
}
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
code->L(label);
}
```
In pseudocode:
for (i := 0 .. RSBSize-1)
if (rsb_location_descriptors[i] == imm64)
goto label;
rsb_ptr++;
rsb_ptr %= RSBSize;
rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
label:
```c++
for (i := 0 .. RSBSize-1)
if (rsb_location_descriptors[i] == imm64)
goto label;
rsb_ptr++;
rsb_ptr %= RSBSize;
rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
label:
```
## RSB Pop
To check if a predicition is in the RSB, we linearly scan the RSB.
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util;
```c++
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util;
// This calculation has to match up with IREmitter::PushRSB
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
code->shl(rcx, 32);
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
code->or_(rbx, rcx);
// This calculation has to match up with IREmitter::PushRSB
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
code->shl(rcx, 32);
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
code->or_(rbx, rcx);
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
}
code->jmp(rax);
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
}
code->jmp(rax);
}
```
In pseudocode:
rbx := ComputeUniqueHash()
rax := ReturnToDispatch
for (i := 0 .. RSBSize-1)
if (rbx == rsb_location_descriptors[i])
rax = rsb_codeptrs[i]
goto rax
```c++
rbx := ComputeUniqueHash()
rax := ReturnToDispatch
for (i := 0 .. RSBSize-1)
if (rbx == rsb_location_descriptors[i])
rax = rsb_codeptrs[i]
goto rax
```

View file

@ -60,7 +60,7 @@ AddJsonPackage(
# endif()
# endif()
# unordered_dense - already in root
# unordered_dense
# AddJsonPackage(
# NAME unordered-dense

View file

@ -56,8 +56,6 @@ add_library(dynarmic
common/lut_from_list.h
common/math_util.cpp
common/math_util.h
common/memory_pool.cpp
common/memory_pool.h
common/safe_ops.h
common/spin_lock.h
common/string_util.h
@ -78,7 +76,6 @@ add_library(dynarmic
ir/basic_block.cpp
ir/basic_block.h
ir/cond.h
ir/ir_emitter.cpp
ir/ir_emitter.h
ir/location_descriptor.cpp
ir/location_descriptor.h
@ -87,78 +84,59 @@ add_library(dynarmic
ir/opcodes.cpp
ir/opcodes.h
ir/opcodes.inc
ir/opt/constant_propagation_pass.cpp
ir/opt/dead_code_elimination_pass.cpp
ir/opt/identity_removal_pass.cpp
ir/opt/ir_matcher.h
ir/opt/naming_pass.cpp
ir/opt/passes.h
ir/opt/polyfill_pass.cpp
ir/opt/verification_pass.cpp
ir/opt_passes.cpp
ir/opt_passes.h
ir/terminal.h
ir/type.cpp
ir/type.h
ir/value.cpp
ir/value.h
# A32
frontend/A32/a32_ir_emitter.cpp
frontend/A32/a32_ir_emitter.h
frontend/A32/a32_location_descriptor.cpp
frontend/A32/a32_location_descriptor.h
frontend/A32/decoder/arm.h
frontend/A32/decoder/arm.inc
frontend/A32/decoder/asimd.h
frontend/A32/decoder/asimd.inc
frontend/A32/decoder/thumb16.h
frontend/A32/decoder/thumb16.inc
frontend/A32/decoder/thumb32.h
frontend/A32/decoder/thumb32.inc
frontend/A32/decoder/vfp.h
frontend/A32/decoder/vfp.inc
frontend/A32/disassembler/disassembler.h
frontend/A32/disassembler/disassembler_arm.cpp
frontend/A32/disassembler/disassembler_thumb.cpp
frontend/A32/FPSCR.h
frontend/A32/ITState.h
frontend/A32/PSR.h
frontend/A32/translate/a32_translate.cpp
frontend/A32/translate/a32_translate.h
frontend/A32/translate/conditional_state.cpp
frontend/A32/translate/conditional_state.h
frontend/A32/translate/translate_arm.cpp
frontend/A32/translate/translate_thumb.cpp
interface/A32/a32.h
interface/A32/arch_version.h
interface/A32/config.h
interface/A32/coprocessor.h
interface/A32/coprocessor_util.h
interface/A32/disassembler.h
# A64
frontend/A64/a64_ir_emitter.cpp
frontend/A64/a64_ir_emitter.h
frontend/A64/a64_location_descriptor.cpp
frontend/A64/a64_location_descriptor.h
frontend/A64/decoder/a64.h
frontend/A64/decoder/a64.inc
frontend/A64/translate/a64_translate.cpp
frontend/A64/translate/a64_translate.h
interface/A64/a64.h
interface/A64/config.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
frontend/A32/a32_ir_emitter.cpp
frontend/A32/a32_ir_emitter.h
frontend/A32/a32_location_descriptor.cpp
frontend/A32/a32_location_descriptor.h
frontend/A32/decoder/arm.h
frontend/A32/decoder/arm.inc
frontend/A32/decoder/asimd.h
frontend/A32/decoder/asimd.inc
frontend/A32/decoder/thumb16.h
frontend/A32/decoder/thumb16.inc
frontend/A32/decoder/thumb32.h
frontend/A32/decoder/thumb32.inc
frontend/A32/decoder/vfp.h
frontend/A32/decoder/vfp.inc
frontend/A32/disassembler/disassembler.h
frontend/A32/disassembler/disassembler_arm.cpp
frontend/A32/disassembler/disassembler_thumb.cpp
frontend/A32/FPSCR.h
frontend/A32/ITState.h
frontend/A32/PSR.h
frontend/A32/translate/a32_translate.cpp
frontend/A32/translate/a32_translate.h
frontend/A32/translate/conditional_state.cpp
frontend/A32/translate/conditional_state.h
frontend/A32/translate/translate_arm.cpp
frontend/A32/translate/translate_thumb.cpp
interface/A32/a32.h
interface/A32/arch_version.h
interface/A32/config.h
interface/A32/coprocessor.h
interface/A32/coprocessor_util.h
interface/A32/disassembler.h
ir/opt/a32_constant_memory_reads_pass.cpp
ir/opt/a32_get_set_elimination_pass.cpp
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
frontend/A64/a64_ir_emitter.cpp
frontend/A64/a64_ir_emitter.h
frontend/A64/a64_location_descriptor.cpp
frontend/A64/a64_location_descriptor.h
frontend/A64/decoder/a64.h
frontend/A64/decoder/a64.inc
frontend/A64/translate/a64_translate.cpp
frontend/A64/translate/a64_translate.h
interface/A64/a64.h
interface/A64/config.h
ir/opt/a64_callback_config_pass.cpp
ir/opt/a64_get_set_elimination_pass.cpp
ir/opt/a64_merge_interpret_blocks.cpp
)
endif()
if ("x86_64" IN_LIST ARCHITECTURE)
target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1)
target_link_libraries(dynarmic
@ -211,29 +189,21 @@ if ("x86_64" IN_LIST ARCHITECTURE)
common/spin_lock_x64.h
common/x64_disassemble.cpp
common/x64_disassemble.h
# A32
backend/x64/a32_emit_x64.cpp
backend/x64/a32_emit_x64.h
backend/x64/a32_emit_x64_memory.cpp
backend/x64/a32_interface.cpp
backend/x64/a32_jitstate.cpp
backend/x64/a32_jitstate.h
# A64
backend/x64/a64_emit_x64.cpp
backend/x64/a64_emit_x64.h
backend/x64/a64_emit_x64_memory.cpp
backend/x64/a64_interface.cpp
backend/x64/a64_jitstate.cpp
backend/x64/a64_jitstate.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "x86_64"
backend/x64/a32_emit_x64.cpp
backend/x64/a32_emit_x64.h
backend/x64/a32_emit_x64_memory.cpp
backend/x64/a32_interface.cpp
backend/x64/a32_jitstate.cpp
backend/x64/a32_jitstate.h
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "x86_64"
backend/x64/a64_emit_x64.cpp
backend/x64/a64_emit_x64.h
backend/x64/a64_emit_x64_memory.cpp
backend/x64/a64_interface.cpp
backend/x64/a64_jitstate.cpp
backend/x64/a64_jitstate.h
)
endif()
endif()
if ("arm64" IN_LIST ARCHITECTURE)
@ -277,25 +247,17 @@ if ("arm64" IN_LIST ARCHITECTURE)
backend/arm64/verbose_debugging_output.h
common/spin_lock_arm64.cpp
common/spin_lock_arm64.h
# A32
backend/arm64/a32_address_space.cpp
backend/arm64/a32_address_space.h
backend/arm64/a32_core.h
backend/arm64/a32_interface.cpp
# A64
backend/arm64/a64_address_space.cpp
backend/arm64/a64_address_space.h
backend/arm64/a64_core.h
backend/arm64/a64_interface.cpp
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "arm64"
backend/arm64/a32_address_space.cpp
backend/arm64/a32_address_space.h
backend/arm64/a32_core.h
backend/arm64/a32_interface.cpp
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_architecture_specific_sources(dynarmic "arm64"
backend/arm64/a64_address_space.cpp
backend/arm64/a64_address_space.h
backend/arm64/a64_core.h
backend/arm64/a64_interface.cpp
)
endif()
endif()
if ("riscv" IN_LIST ARCHITECTURE)
@ -324,21 +286,14 @@ if ("riscv" IN_LIST ARCHITECTURE)
backend/riscv64/reg_alloc.cpp
backend/riscv64/reg_alloc.h
backend/riscv64/stack_layout.h
# A32
backend/riscv64/a32_address_space.cpp
backend/riscv64/a32_address_space.h
backend/riscv64/a32_core.h
backend/riscv64/a32_interface.cpp
backend/riscv64/code_block.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic PRIVATE
backend/riscv64/a32_address_space.cpp
backend/riscv64/a32_address_space.h
backend/riscv64/a32_core.h
backend/riscv64/a32_interface.cpp
backend/riscv64/code_block.h
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
endif()
message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture")
endif()
if (WIN32)
@ -416,7 +371,7 @@ target_link_libraries(dynarmic
)
if (BOOST_NO_HEADERS)
target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool)
target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool)
else()
target_link_libraries(dynarmic PRIVATE Boost::headers)
endif()

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
@ -16,7 +19,7 @@
#include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/interface/A32/config.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/opt_passes.h"
namespace Dynarmic::Backend::Arm64 {
@ -163,21 +166,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::PolyfillPass(ir_block, {});
Optimization::NamingPass(ir_block);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
Optimization::IdentityRemovalPass(ir_block);
Optimization::VerificationPass(ir_block);
Optimization::Optimize(ir_block, conf, {});
return ir_block;
}

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
@ -15,7 +18,7 @@
#include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/interface/A64/config.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/opt_passes.h"
namespace Dynarmic::Backend::Arm64 {
@ -331,22 +334,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code,
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
Optimization::A64CallbackConfigPass(ir_block, conf);
Optimization::NamingPass(ir_block);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
Optimization::A64GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
}
Optimization::VerificationPass(ir_block);
Optimization::Optimize(ir_block, conf, {});
return ir_block;
}

View file

@ -15,7 +15,7 @@
#include "dynarmic/backend/riscv64/stack_layout.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/opt_passes.h"
namespace Dynarmic::Backend::RV64 {
@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::PolyfillPass(ir_block, {});
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
Optimization::VerificationPass(ir_block);
Optimization::Optimize(ir_block, conf, {});
return ir_block;
}

View file

@ -29,7 +29,7 @@
#include "dynarmic/interface/A32/a32.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/location_descriptor.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/opt_passes.h"
namespace Dynarmic::A32 {
@ -217,19 +217,7 @@ private:
block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE);
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::PolyfillPass(ir_block, polyfill_options);
Optimization::NamingPass(ir_block);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true});
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
Optimization::IdentityRemovalPass(ir_block);
Optimization::VerificationPass(ir_block);
Optimization::Optimize(ir_block, conf, polyfill_options);
return emitter.Emit(ir_block);
}

View file

@ -25,7 +25,7 @@
#include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/interface/A64/a64.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/opt_passes.h"
namespace Dynarmic::A64 {
@ -275,21 +275,7 @@ private:
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
Optimization::PolyfillPass(ir_block, polyfill_options);
Optimization::A64CallbackConfigPass(ir_block, conf);
Optimization::NamingPass(ir_block);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) {
Optimization::A64GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) {
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
}
Optimization::VerificationPass(ir_block);
Optimization::Optimize(ir_block, conf, polyfill_options);
return emitter.Emit(ir_block).entrypoint;
}

View file

@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept {
values.push_back(inst);
ASSERT(size_t(total_uses) + inst->UseCount() < std::numeric_limits<uint16_t>::max());
total_uses += inst->UseCount();
max_bit_width = std::max<uint8_t>(max_bit_width, GetBitWidth(inst->GetType()));
max_bit_width = std::max<uint8_t>(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType())));
}
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept {

View file

@ -12,6 +12,7 @@
#include <functional>
#include <optional>
#include "boost/container/small_vector.hpp"
#include "dynarmic/common/common_types.h"
#include <xbyak/xbyak.h>
#include <boost/container/static_vector.hpp>
@ -77,13 +78,13 @@ public:
return std::find(values.begin(), values.end(), inst) != values.end();
}
inline size_t GetMaxBitWidth() const noexcept {
return max_bit_width;
return 1 << max_bit_width;
}
void AddValue(IR::Inst* inst) noexcept;
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept;
private:
//non trivial
std::vector<IR::Inst*> values; //24
boost::container::small_vector<IR::Inst*, 3> values; //24
// Block state
uint16_t total_uses = 0; //8
//sometimes zeroed
@ -93,10 +94,10 @@ private:
uint16_t is_being_used_count = 0; //8
uint16_t current_references = 0; //8
// Value state
uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128
uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6)
uint8_t lru_counter : 2 = 0; //1
bool is_scratch : 1 = false; //1
bool is_set_last_use : 1 = false; //1
alignas(16) uint8_t lru_counter = 0; //1
friend class RegAlloc;
};
static_assert(sizeof(HostLocInfo) == 64);

View file

@ -1,13 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/common/memory_pool.h"
#include <cstdlib>
namespace Dynarmic::Common {
} // namespace Dynarmic::Common

View file

@ -1,61 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <cstddef>
#include <vector>
namespace Dynarmic::Common {
/// @tparam object_size Byte-size of objects to construct
/// @tparam slab_size Number of objects to have per slab
template<size_t object_size, size_t slab_size>
class Pool {
public:
inline Pool() noexcept {
AllocateNewSlab();
}
inline ~Pool() noexcept {
std::free(current_slab);
for (char* slab : slabs) {
std::free(slab);
}
}
Pool(const Pool&) = delete;
Pool(Pool&&) = delete;
Pool& operator=(const Pool&) = delete;
Pool& operator=(Pool&&) = delete;
/// @brief Returns a pointer to an `object_size`-bytes block of memory.
[[nodiscard]] void* Alloc() noexcept {
if (remaining == 0) {
slabs.push_back(current_slab);
AllocateNewSlab();
}
void* ret = static_cast<void*>(current_ptr);
current_ptr += object_size;
remaining--;
return ret;
}
private:
/// @brief Allocates a completely new memory slab.
/// Used when an entirely new slab is needed
/// due the current one running out of usable space.
void AllocateNewSlab() noexcept {
current_slab = static_cast<char*>(std::malloc(object_size * slab_size));
current_ptr = current_slab;
remaining = slab_size;
}
std::vector<char*> slabs;
char* current_slab = nullptr;
char* current_ptr = nullptr;
size_t remaining = 0;
};
} // namespace Dynarmic::Common

View file

@ -9,12 +9,9 @@
#pragma once
#include <string>
#include <utility>
#include <fmt/format.h>
#include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/interface/A32/coprocessor_util.h"
#include "dynarmic/ir/cond.h"
@ -89,23 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) {
inline size_t RegNumber(Reg reg) {
ASSERT(reg != Reg::INVALID_REG);
return static_cast<size_t>(reg);
return size_t(reg);
}
inline size_t RegNumber(ExtReg reg) {
if (IsSingleExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::S0);
return size_t(reg) - size_t(ExtReg::S0);
} else if (IsDoubleExtReg(reg)) {
return size_t(reg) - size_t(ExtReg::D0);
}
if (IsDoubleExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::D0);
}
if (IsQuadExtReg(reg)) {
return static_cast<size_t>(reg) - static_cast<size_t>(ExtReg::Q0);
}
ASSERT_MSG(false, "Invalid extended register");
ASSERT(IsQuadExtReg(reg));
return size_t(reg) - size_t(ExtReg::Q0);
}
inline Reg operator+(Reg reg, size_t number) {

View file

@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher<Visitor, u32>;
template<typename V>
std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
std::vector<ASIMDMatcher<V>> table = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
std::vector<std::pair<const char*, ASIMDMatcher<V>>> table = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./asimd.inc"
#undef INST
};
// Exceptions to the rule of thumb.
const std::set<std::string> comes_first{
"VBIC, VMOV, VMVN, VORR (immediate)",
@ -53,19 +50,21 @@ std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
"VQDMULH (scalar)",
"VQRDMULH (scalar)",
};
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
return comes_first.count(matcher.GetName()) > 0;
const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
return comes_first.count(e.first) > 0;
});
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
return comes_last.count(matcher.GetName()) == 0;
const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) {
return comes_last.count(e.first) == 0;
});
// If a matcher has more bits in its mask it is more specific, so it should come first.
std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) {
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) {
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
});
return table;
std::vector<ASIMDMatcher<V>> final_table;
std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) {
return e.second;
});
return final_table;
}
template<typename V>

View file

@ -30,22 +30,18 @@ std::optional<std::reference_wrapper<const VFPMatcher<V>>> DecodeVFP(u32 instruc
static const struct Tables {
Table unconditional;
Table conditional;
} tables = [] {
} tables = []() {
Table list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./vfp.inc"
#undef INST
};
const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
return (matcher.GetMask() & 0xF0000000) == 0xF0000000;
});
return Tables{
Table{list.begin(), division},
Table{division, list.end()},
Table{list.begin(), it},
Table{it, list.end()},
};
}();

View file

@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) {
template<typename V>
constexpr DecodeTable<V> GetDecodeTable() {
std::vector<Matcher<V>> list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
std::vector<std::pair<const char*, Matcher<V>>> list = {
#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) },
#include "./a64.inc"
#undef INST
};
// If a matcher has more bits in its mask it is more specific, so it should come first.
std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) {
std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) {
// If a matcher has more bits in its mask it is more specific, so it should come first.
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask());
});
// Exceptions to the above rule of thumb.
std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
std::stable_partition(list.begin(), list.end(), [&](const auto& e) {
return std::set<std::string>{
"MOVI, MVNI, ORR, BIC (vector, immediate)",
"FMOV (vector, immediate)",
"Unallocated SIMD modified immediate",
}.count(matcher.GetName()) > 0;
}.count(e.first) > 0;
});
DecodeTable<V> table{};
for (size_t i = 0; i < table.size(); ++i) {
for (auto matcher : list) {
const auto expect = detail::ToFastLookupIndex(matcher.GetExpected());
const auto mask = detail::ToFastLookupIndex(matcher.GetMask());
for (auto const& e : list) {
const auto expect = detail::ToFastLookupIndex(e.second.GetExpected());
const auto mask = detail::ToFastLookupIndex(e.second.GetMask());
if ((i & mask) == expect) {
table[i].push_back(matcher);
table[i].push_back(e.second);
}
}
}

View file

@ -70,11 +70,9 @@ struct detail {
return std::make_tuple(mask, expect);
}
/**
* Generates the masks and shifts for each argument.
* A '-' in a bitstring indicates that we don't care about that value.
* An argument is specified by a continuous string of the same character.
*/
/// @brief Generates the masks and shifts for each argument.
/// A '-' in a bitstring indicates that we don't care about that value.
/// An argument is specified by a continuous string of the same character.
template<size_t N>
static consteval auto GetArgInfo(std::array<char, opcode_bitsize> bitstring) {
std::array<opcode_type, N> masks = {};
@ -98,7 +96,6 @@ struct detail {
if constexpr (N > 0) {
const size_t bit_position = opcode_bitsize - i - 1;
if (arg_index >= N)
throw std::out_of_range("Unexpected field");
@ -109,20 +106,16 @@ struct detail {
}
}
}
#if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__)
// Avoids a MSVC ICE, and avoids Android NDK issue.
ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; }));
#endif
return std::make_tuple(masks, shifts);
}
/**
* This struct's Make member function generates a lambda which decodes an instruction based on
* the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a
* template argument.
*/
/// @brief This struct's Make member function generates a lambda which decodes an instruction
/// based on the provided arg_masks and arg_shifts. The Visitor member function to call is
/// provided as a template argument.
template<typename FnT>
struct VisitorCaller;
@ -130,36 +123,36 @@ struct detail {
# pragma warning(push)
# pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning)
#endif
template<typename Visitor, typename... Args, typename CallRetT>
struct VisitorCaller<CallRetT (Visitor::*)(Args...)> {
template<typename V, typename... Args, typename ReturnType>
struct VisitorCaller<ReturnType (V::*)(Args...)> {
template<size_t... iota>
static auto Make(std::integer_sequence<size_t, iota...>,
CallRetT (Visitor::*const fn)(Args...),
static constexpr auto Make(std::integer_sequence<size_t, iota...>,
ReturnType (V::*const fn)(Args...),
const std::array<opcode_type, sizeof...(iota)> arg_masks,
const std::array<size_t, sizeof...(iota)> arg_shifts) {
static_assert(std::is_same_v<visitor_type, Visitor>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) {
static_assert(std::is_same_v<visitor_type, V>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) {
(void)instruction;
(void)arg_masks;
(void)arg_shifts;
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
};
}
};
template<typename Visitor, typename... Args, typename CallRetT>
struct VisitorCaller<CallRetT (Visitor::*)(Args...) const> {
template<typename V, typename... Args, typename ReturnType>
struct VisitorCaller<ReturnType (V::*)(Args...) const> {
template<size_t... iota>
static auto Make(std::integer_sequence<size_t, iota...>,
CallRetT (Visitor::*const fn)(Args...) const,
static constexpr auto Make(std::integer_sequence<size_t, iota...>,
ReturnType (V::*const fn)(Args...) const,
const std::array<opcode_type, sizeof...(iota)> arg_masks,
const std::array<size_t, sizeof...(iota)> arg_shifts) {
static_assert(std::is_same_v<visitor_type, const Visitor>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) {
static_assert(std::is_same_v<visitor_type, const V>, "Member function is not from Matcher's Visitor");
return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) {
(void)instruction;
(void)arg_masks;
(void)arg_shifts;
return (v.*fn)(static_cast<Args>((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...);
};
}
};
@ -167,27 +160,21 @@ struct detail {
# pragma warning(pop)
#endif
/**
* Creates a matcher that can match and parse instructions based on bitstring.
* See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
*/
template<auto bitstring, typename FnT>
static auto GetMatcher(FnT fn, const char* const name) {
constexpr size_t args_count = mcl::parameter_count_v<FnT>;
/// @brief Creates a matcher that can match and parse instructions based on bitstring.
/// See also: GetMaskAndExpect and GetArgInfo for format of bitstring.
template<auto bitstring, typename F>
static constexpr auto GetMatcher(F fn) {
constexpr size_t args_count = mcl::parameter_count_v<F>;
constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring));
constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring));
constexpr auto arg_masks = std::get<0>(GetArgInfo<args_count>(bitstring));
constexpr auto arg_shifts = std::get<1>(GetArgInfo<args_count>(bitstring));
using Iota = std::make_index_sequence<args_count>;
const auto proxy_fn = VisitorCaller<FnT>::Make(Iota(), fn, arg_masks, arg_shifts);
return MatcherT(name, mask, expect, proxy_fn);
const auto proxy_fn = VisitorCaller<F>::Make(std::make_index_sequence<args_count>(), fn, arg_masks, arg_shifts);
return MatcherT(mask, expect, proxy_fn);
}
};
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn, name)
#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail<MatcherT<V>>::template GetMatcher<bitstring>(&V::fn)
} // namespace detail
} // namespace Dynarmic::Decoder

View file

@ -31,14 +31,8 @@ public:
using visitor_type = Visitor;
using handler_return_type = typename Visitor::instruction_return_type;
using handler_function = std::function<handler_return_type(Visitor&, opcode_type)>;
Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func)
: name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {}
/// Gets the name of this type of instruction.
const char* GetName() const {
return name;
}
Matcher(opcode_type mask, opcode_type expected, handler_function func)
: mask{mask}, expected{expected}, fn{std::move(func)} {}
/// Gets the mask for this instruction.
opcode_type GetMask() const {
@ -70,7 +64,6 @@ public:
}
private:
const char* name;
opcode_type mask;
opcode_type expected;
handler_function fn;

View file

@ -15,8 +15,6 @@
#include <fmt/format.h>
#include "dynarmic/common/assert.h"
#include "dynarmic/common/memory_pool.h"
#include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/frontend/A64/a64_types.h"
#include "dynarmic/ir/cond.h"
@ -27,8 +25,7 @@ namespace Dynarmic::IR {
Block::Block(const LocationDescriptor& location)
: location{location},
end_location{location},
cond{Cond::AL},
instruction_alloc_pool{std::make_unique<std::remove_reference_t<decltype(*instruction_alloc_pool)>>()}
cond{Cond::AL}
{
}
@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location)
/// @param args A sequence of Value instances used as arguments for the instruction.
/// @returns Iterator to the newly created instruction.
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode);
IR::Inst* inst = new IR::Inst(opcode);
DEBUG_ASSERT(args.size() == inst->NumArgs());
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
inst->SetArg(index, arg);

View file

@ -21,7 +21,6 @@
#include "dynarmic/ir/terminal.h"
#include "dynarmic/ir/value.h"
#include "dynarmic/ir/dense_list.h"
#include "dynarmic/common/memory_pool.h"
namespace Dynarmic::IR {
@ -174,8 +173,6 @@ private:
LocationDescriptor end_location;
/// Conditional to pass in order to execute this block
Cond cond;
/// Memory pool for instruction list
std::unique_ptr<Common::Pool<sizeof(Inst), 2097152UL / sizeof(Inst)>> instruction_alloc_pool;
/// Terminal instruction of this block.
Terminal terminal = Term::Invalid{};
/// Number of cycles this block takes to execute if the conditional fails.

View file

@ -1,21 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/ir/ir_emitter.h"
#include <vector>
#include "dynarmic/common/assert.h"
#include <mcl/bit_cast.hpp>
#include "dynarmic/ir/opcodes.h"
namespace Dynarmic::IR {
} // namespace Dynarmic::IR

View file

@ -1,70 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/interface/A32/config.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) {
for (auto& inst : block) {
switch (inst.GetOpcode()) {
case IR::Opcode::A32ReadMemory8: {
if (!inst.AreAllArgsImmediates()) {
break;
}
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u8 value_from_memory = cb->MemoryRead8(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});
}
break;
}
case IR::Opcode::A32ReadMemory16: {
if (!inst.AreAllArgsImmediates()) {
break;
}
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u16 value_from_memory = cb->MemoryRead16(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});
}
break;
}
case IR::Opcode::A32ReadMemory32: {
if (!inst.AreAllArgsImmediates()) {
break;
}
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u32 value_from_memory = cb->MemoryRead32(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});
}
break;
}
case IR::Opcode::A32ReadMemory64: {
if (!inst.AreAllArgsImmediates()) {
break;
}
const u32 vaddr = inst.GetArg(1).GetU32();
if (cb->IsReadOnlyMemory(vaddr)) {
const u64 value_from_memory = cb->MemoryRead64(vaddr);
inst.ReplaceUsesWith(IR::Value{value_from_memory});
}
break;
}
default:
break;
}
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,382 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <algorithm>
#include <array>
#include <functional>
#include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/A32/a32_ir_emitter.h"
#include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/value.h"
namespace Dynarmic::Optimization {
namespace {
void FlagsPass(IR::Block& block) {
using Iterator = std::reverse_iterator<IR::Block::iterator>;
struct FlagInfo {
bool set_not_required = false;
bool has_value_request = false;
Iterator value_request = {};
};
struct ValuelessFlagInfo {
bool set_not_required = false;
};
ValuelessFlagInfo nzcvq;
ValuelessFlagInfo nzcv;
ValuelessFlagInfo nz;
FlagInfo c_flag;
FlagInfo ge;
auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) {
if (info.has_value_request) {
info.value_request->ReplaceUsesWith(value);
}
info.has_value_request = false;
if (info.set_not_required) {
inst->Invalidate();
}
info.set_not_required = true;
};
auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) {
if (info.set_not_required) {
inst->Invalidate();
}
info.set_not_required = true;
};
auto do_get = [](FlagInfo& info, Iterator inst) {
if (info.has_value_request) {
info.value_request->ReplaceUsesWith(IR::Value{&*inst});
}
info.has_value_request = true;
info.value_request = inst;
};
A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}};
for (auto inst = block.rbegin(); inst != block.rend(); ++inst) {
auto const opcode = inst->GetOpcode();
switch (opcode) {
case IR::Opcode::A32GetCFlag: {
do_get(c_flag, inst);
break;
}
case IR::Opcode::A32SetCpsrNZCV: {
if (c_flag.has_value_request) {
ir.SetInsertionPointBefore(inst.base()); // base is one ahead
IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)});
c_flag.value_request->ReplaceUsesWith(c);
c_flag.has_value_request = false;
break; // This case will be executed again because of the above
}
do_set_valueless(nzcv, inst);
nz = {.set_not_required = true};
c_flag = {.set_not_required = true};
break;
}
case IR::Opcode::A32SetCpsrNZCVRaw: {
if (c_flag.has_value_request) {
nzcv.set_not_required = false;
}
do_set_valueless(nzcv, inst);
nzcvq = {};
nz = {.set_not_required = true};
c_flag = {.set_not_required = true};
break;
}
case IR::Opcode::A32SetCpsrNZCVQ: {
if (c_flag.has_value_request) {
nzcvq.set_not_required = false;
}
do_set_valueless(nzcvq, inst);
nzcv = {.set_not_required = true};
nz = {.set_not_required = true};
c_flag = {.set_not_required = true};
break;
}
case IR::Opcode::A32SetCpsrNZ: {
do_set_valueless(nz, inst);
nzcvq = {};
nzcv = {};
break;
}
case IR::Opcode::A32SetCpsrNZC: {
if (c_flag.has_value_request) {
c_flag.value_request->ReplaceUsesWith(inst->GetArg(1));
c_flag.has_value_request = false;
}
if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) {
const auto nz_value = inst->GetArg(0);
inst->Invalidate();
ir.SetInsertionPointBefore(inst.base());
ir.SetCpsrNZ(IR::NZCV{nz_value});
nzcvq = {};
nzcv = {};
nz = {.set_not_required = true};
break;
}
if (nz.set_not_required && c_flag.set_not_required) {
inst->Invalidate();
} else if (nz.set_not_required) {
inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker());
}
nz.set_not_required = true;
c_flag.set_not_required = true;
nzcv = {};
nzcvq = {};
break;
}
case IR::Opcode::A32SetGEFlags: {
do_set(ge, inst->GetArg(0), inst);
break;
}
case IR::Opcode::A32GetGEFlags: {
do_get(ge, inst);
break;
}
case IR::Opcode::A32SetGEFlagsCompressed: {
ge = {.set_not_required = true};
break;
}
case IR::Opcode::A32OrQFlag: {
break;
}
default: {
if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) {
nzcvq = {};
nzcv = {};
nz = {};
c_flag = {};
ge = {};
}
break;
}
}
}
}
void RegisterPass(IR::Block& block) {
using Iterator = IR::Block::iterator;
struct RegInfo {
IR::Value register_value;
std::optional<Iterator> last_set_instruction;
};
std::array<RegInfo, 15> reg_info;
const auto do_get = [](RegInfo& info, Iterator get_inst) {
if (info.register_value.IsEmpty()) {
info.register_value = IR::Value(&*get_inst);
return;
}
get_inst->ReplaceUsesWith(info.register_value);
};
const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) {
if (info.last_set_instruction) {
(*info.last_set_instruction)->Invalidate();
}
info = {
.register_value = value,
.last_set_instruction = set_inst,
};
};
enum class ExtValueType {
Empty,
Single,
Double,
VectorDouble,
VectorQuad,
};
struct ExtRegInfo {
ExtValueType value_type = {};
IR::Value register_value;
std::optional<Iterator> last_set_instruction;
};
std::array<ExtRegInfo, 64> ext_reg_info;
const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) {
if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
for (auto& info : infos) {
info.get() = {
.value_type = type,
.register_value = IR::Value(&*get_inst),
.last_set_instruction = std::nullopt,
};
}
return;
}
get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value);
};
const auto do_ext_set = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, IR::Value value, Iterator set_inst) {
if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
if (std::data(infos)[0].get().last_set_instruction) {
(*std::data(infos)[0].get().last_set_instruction)->Invalidate();
}
}
for (auto& info : infos) {
info.get() = {
.value_type = type,
.register_value = value,
.last_set_instruction = set_inst,
};
}
};
// Location and version don't matter here.
A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}};
for (auto inst = block.begin(); inst != block.end(); ++inst) {
auto const opcode = inst->GetOpcode();
switch (opcode) {
case IR::Opcode::A32GetRegister: {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
ASSERT(reg != A32::Reg::PC);
const size_t reg_index = static_cast<size_t>(reg);
do_get(reg_info[reg_index], inst);
break;
}
case IR::Opcode::A32SetRegister: {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
if (reg == A32::Reg::PC) {
break;
}
const auto reg_index = static_cast<size_t>(reg);
do_set(reg_info[reg_index], inst->GetArg(1), inst);
break;
}
case IR::Opcode::A32GetExtendedRegister32: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst);
break;
}
case IR::Opcode::A32SetExtendedRegister32: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst);
break;
}
case IR::Opcode::A32GetExtendedRegister64: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_get(ExtValueType::Double,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
inst);
break;
}
case IR::Opcode::A32SetExtendedRegister64: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
do_ext_set(ExtValueType::Double,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
inst->GetArg(1),
inst);
break;
}
case IR::Opcode::A32GetVector: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
if (A32::IsDoubleExtReg(reg)) {
do_ext_get(ExtValueType::VectorDouble,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
inst);
} else {
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
do_ext_get(ExtValueType::VectorQuad,
{
ext_reg_info[reg_index * 4 + 0],
ext_reg_info[reg_index * 4 + 1],
ext_reg_info[reg_index * 4 + 2],
ext_reg_info[reg_index * 4 + 3],
},
inst);
}
break;
}
case IR::Opcode::A32SetVector: {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
const size_t reg_index = A32::RegNumber(reg);
if (A32::IsDoubleExtReg(reg)) {
ir.SetInsertionPointAfter(inst);
const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)});
do_ext_set(ExtValueType::VectorDouble,
{
ext_reg_info[reg_index * 2 + 0],
ext_reg_info[reg_index * 2 + 1],
},
stored_value,
inst);
} else {
DEBUG_ASSERT(A32::IsQuadExtReg(reg));
do_ext_set(ExtValueType::VectorQuad,
{
ext_reg_info[reg_index * 4 + 0],
ext_reg_info[reg_index * 4 + 1],
ext_reg_info[reg_index * 4 + 2],
ext_reg_info[reg_index * 4 + 3],
},
inst->GetArg(1),
inst);
}
break;
}
default: {
if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) {
reg_info = {};
ext_reg_info = {};
}
break;
}
}
}
}
} // namespace
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) {
FlagsPass(block);
RegisterPass(block);
}
} // namespace Dynarmic::Optimization

View file

@ -1,57 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/frontend/A64/a64_ir_emitter.h"
#include "dynarmic/interface/A64/config.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
if (conf.hook_data_cache_operations) {
return;
}
for (auto& inst : block) {
if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) {
continue;
}
const auto op = static_cast<A64::DataCacheOperation>(inst.GetArg(1).GetU64());
if (op == A64::DataCacheOperation::ZeroByVA) {
A64::IREmitter ir{block};
ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}};
ir.SetInsertionPointBefore(&inst);
size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111);
IR::U64 addr{inst.GetArg(2)};
const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0));
while (bytes >= 16) {
ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA);
addr = ir.Add(addr, ir.Imm64(16));
bytes -= 16;
}
while (bytes >= 8) {
ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA);
addr = ir.Add(addr, ir.Imm64(8));
bytes -= 8;
}
while (bytes >= 4) {
ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA);
addr = ir.Add(addr, ir.Imm64(4));
bytes -= 4;
}
}
inst.Invalidate();
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,165 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <array>
#include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/A64/a64_types.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/value.h"
namespace Dynarmic::Optimization {
void A64GetSetElimination(IR::Block& block) {
using Iterator = IR::Block::iterator;
enum class TrackingType {
W,
X,
S,
D,
Q,
SP,
NZCV,
NZCVRaw,
};
struct RegisterInfo {
IR::Value register_value;
TrackingType tracking_type;
bool set_instruction_present = false;
Iterator last_set_instruction;
};
std::array<RegisterInfo, 31> reg_info;
std::array<RegisterInfo, 32> vec_info;
RegisterInfo sp_info;
RegisterInfo nzcv_info;
const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) {
if (info.set_instruction_present) {
info.last_set_instruction->Invalidate();
block.Instructions().erase(info.last_set_instruction);
}
info.register_value = value;
info.tracking_type = tracking_type;
info.set_instruction_present = true;
info.last_set_instruction = set_inst;
};
const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) {
const auto do_nothing = [&] {
info = {};
info.register_value = IR::Value(&*get_inst);
info.tracking_type = tracking_type;
};
if (info.register_value.IsEmpty()) {
do_nothing();
return;
}
if (info.tracking_type == tracking_type) {
get_inst->ReplaceUsesWith(info.register_value);
return;
}
do_nothing();
};
for (auto inst = block.begin(); inst != block.end(); ++inst) {
auto const opcode = inst->GetOpcode();
switch (opcode) {
case IR::Opcode::A64GetW: {
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
do_get(reg_info.at(index), inst, TrackingType::W);
break;
}
case IR::Opcode::A64GetX: {
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
do_get(reg_info.at(index), inst, TrackingType::X);
break;
}
case IR::Opcode::A64GetS: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_get(vec_info.at(index), inst, TrackingType::S);
break;
}
case IR::Opcode::A64GetD: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_get(vec_info.at(index), inst, TrackingType::D);
break;
}
case IR::Opcode::A64GetQ: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_get(vec_info.at(index), inst, TrackingType::Q);
break;
}
case IR::Opcode::A64GetSP: {
do_get(sp_info, inst, TrackingType::SP);
break;
}
case IR::Opcode::A64GetNZCVRaw: {
do_get(nzcv_info, inst, TrackingType::NZCVRaw);
break;
}
case IR::Opcode::A64SetW: {
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W);
break;
}
case IR::Opcode::A64SetX: {
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef());
do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X);
break;
}
case IR::Opcode::A64SetS: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S);
break;
}
case IR::Opcode::A64SetD: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D);
break;
}
case IR::Opcode::A64SetQ: {
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef());
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q);
break;
}
case IR::Opcode::A64SetSP: {
do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP);
break;
}
case IR::Opcode::A64SetNZCV: {
do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV);
break;
}
case IR::Opcode::A64SetNZCVRaw: {
do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw);
break;
}
default: {
if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) {
nzcv_info = {};
}
if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) {
reg_info = {};
vec_info = {};
sp_info = {};
}
break;
}
}
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,57 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <boost/variant/get.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
#include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/interface/A64/config.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) {
const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) {
const auto instruction = cb->MemoryReadCode(location.PC());
if (!instruction)
return false;
IR::Block new_block{location};
A64::TranslateSingleInstruction(new_block, location, *instruction);
if (!new_block.Instructions().empty())
return false;
const IR::Terminal terminal = new_block.GetTerminal();
if (auto term = boost::get<IR::Term::Interpret>(&terminal)) {
return term->next == location;
}
return false;
};
IR::Terminal terminal = block.GetTerminal();
auto term = boost::get<IR::Term::Interpret>(&terminal);
if (!term)
return;
A64::LocationDescriptor location{term->next};
size_t num_instructions = 1;
while (is_interpret_instruction(location.AdvancePC(static_cast<int>(num_instructions * 4)))) {
num_instructions++;
}
term->num_instructions = num_instructions;
block.ReplaceTerminal(terminal);
block.CycleCount() += num_instructions - 1;
}
} // namespace Dynarmic::Optimization

View file

@ -1,559 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <optional>
#include "dynarmic/common/assert.h"
#include <mcl/bit/rotate.hpp>
#include <mcl/bit/swap.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/common/safe_ops.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/ir_emitter.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
using Op = Dynarmic::IR::Opcode;
namespace {
// Tiny helper to avoid the need to store based off the opcode
// bit size all over the place within folding functions.
void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
if (is_32_bit) {
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
} else {
inst.ReplaceUsesWith(IR::Value{value});
}
}
IR::Value Value(bool is_32_bit, u64 value) {
return is_32_bit ? IR::Value{static_cast<u32>(value)} : IR::Value{value};
}
template<typename ImmFn>
bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
const auto lhs = inst.GetArg(0);
const auto rhs = inst.GetArg(1);
const bool is_lhs_immediate = lhs.IsImmediate();
const bool is_rhs_immediate = rhs.IsImmediate();
if (is_lhs_immediate && is_rhs_immediate) {
const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64());
ReplaceUsesWith(inst, is_32_bit, result);
return false;
}
if (is_lhs_immediate && !is_rhs_immediate) {
const IR::Inst* rhs_inst = rhs.GetInstRecursive();
if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) {
const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64());
inst.SetArg(0, rhs_inst->GetArg(0));
inst.SetArg(1, Value(is_32_bit, combined));
} else {
// Normalize
inst.SetArg(0, rhs);
inst.SetArg(1, lhs);
}
}
if (!is_lhs_immediate && is_rhs_immediate) {
const IR::Inst* lhs_inst = lhs.GetInstRecursive();
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) {
const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64());
inst.SetArg(0, lhs_inst->GetArg(0));
inst.SetArg(1, Value(is_32_bit, combined));
}
}
return true;
}
void FoldAdd(IR::Inst& inst, bool is_32_bit) {
const auto lhs = inst.GetArg(0);
const auto rhs = inst.GetArg(1);
const auto carry = inst.GetArg(2);
if (lhs.IsImmediate() && !rhs.IsImmediate()) {
// Normalize
inst.SetArg(0, rhs);
inst.SetArg(1, lhs);
FoldAdd(inst, is_32_bit);
return;
}
if (inst.HasAssociatedPseudoOperation()) {
return;
}
if (!lhs.IsImmediate() && rhs.IsImmediate()) {
const IR::Inst* lhs_inst = lhs.GetInstRecursive();
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) {
const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1();
if (combined == 0) {
inst.ReplaceUsesWith(lhs_inst->GetArg(0));
return;
}
inst.SetArg(0, lhs_inst->GetArg(0));
inst.SetArg(1, Value(is_32_bit, combined));
return;
}
if (rhs.IsZero() && carry.IsZero()) {
inst.ReplaceUsesWith(lhs);
return;
}
}
if (inst.AreAllArgsImmediates()) {
const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1();
ReplaceUsesWith(inst, is_32_bit, result);
return;
}
}
/// Folds AND operations based on the following:
///
/// 1. imm_x & imm_y -> result
/// 2. x & 0 -> 0
/// 3. 0 & y -> 0
/// 4. x & y -> y (where x has all bits set to 1)
/// 5. x & y -> x (where y has all bits set to 1)
///
void FoldAND(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
ReplaceUsesWith(inst, is_32_bit, 0);
} else if (rhs.HasAllBitsSet()) {
inst.ReplaceUsesWith(inst.GetArg(0));
}
}
}
/// Folds byte reversal opcodes based on the following:
///
/// 1. imm -> swap(imm)
///
void FoldByteReverse(IR::Inst& inst, Op op) {
const auto operand = inst.GetArg(0);
if (!operand.IsImmediate()) {
return;
}
if (op == Op::ByteReverseWord) {
const u32 result = mcl::bit::swap_bytes_32(static_cast<u32>(operand.GetImmediateAsU64()));
inst.ReplaceUsesWith(IR::Value{result});
} else if (op == Op::ByteReverseHalf) {
const u16 result = mcl::bit::swap_bytes_16(static_cast<u16>(operand.GetImmediateAsU64()));
inst.ReplaceUsesWith(IR::Value{result});
} else {
const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64());
inst.ReplaceUsesWith(IR::Value{result});
}
}
/// Folds division operations based on the following:
///
/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual)
/// 2. imm_x / imm_y -> result
/// 3. x / 1 -> x
///
void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
ReplaceUsesWith(inst, is_32_bit, 0);
return;
}
const auto lhs = inst.GetArg(0);
if (lhs.IsImmediate() && rhs.IsImmediate()) {
if (is_signed) {
const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64();
ReplaceUsesWith(inst, is_32_bit, static_cast<u64>(result));
} else {
const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64();
ReplaceUsesWith(inst, is_32_bit, result);
}
} else if (rhs.IsUnsignedImmediate(1)) {
inst.ReplaceUsesWith(IR::Value{lhs});
}
}
// Folds EOR operations based on the following:
//
// 1. imm_x ^ imm_y -> result
// 2. x ^ 0 -> x
// 3. 0 ^ y -> y
//
void FoldEOR(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
inst.ReplaceUsesWith(inst.GetArg(0));
}
}
}
void FoldLeastSignificantByte(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const auto operand = inst.GetArg(0);
inst.ReplaceUsesWith(IR::Value{static_cast<u8>(operand.GetImmediateAsU64())});
}
void FoldLeastSignificantHalf(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const auto operand = inst.GetArg(0);
inst.ReplaceUsesWith(IR::Value{static_cast<u16>(operand.GetImmediateAsU64())});
}
void FoldLeastSignificantWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const auto operand = inst.GetArg(0);
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64())});
}
void FoldMostSignificantBit(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const auto operand = inst.GetArg(0);
inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0});
}
void FoldMostSignificantWord(IR::Inst& inst) {
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
if (!inst.AreAllArgsImmediates()) {
return;
}
const auto operand = inst.GetArg(0);
if (carry_inst) {
carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())});
}
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64() >> 32)});
}
// Folds multiplication operations based on the following:
//
// 1. imm_x * imm_y -> result
// 2. x * 0 -> 0
// 3. 0 * y -> 0
// 4. x * 1 -> x
// 5. 1 * y -> y
//
void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
ReplaceUsesWith(inst, is_32_bit, 0);
} else if (rhs.IsUnsignedImmediate(1)) {
inst.ReplaceUsesWith(inst.GetArg(0));
}
}
}
// Folds NOT operations if the contained value is an immediate.
void FoldNOT(IR::Inst& inst, bool is_32_bit) {
const auto operand = inst.GetArg(0);
if (!operand.IsImmediate()) {
return;
}
const u64 result = ~operand.GetImmediateAsU64();
ReplaceUsesWith(inst, is_32_bit, result);
}
// Folds OR operations based on the following:
//
// 1. imm_x | imm_y -> result
// 2. x | 0 -> x
// 3. 0 | y -> y
//
void FoldOR(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
inst.ReplaceUsesWith(inst.GetArg(0));
}
}
}
bool FoldShifts(IR::Inst& inst) {
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
// The 32-bit variants can contain 3 arguments, while the
// 64-bit variants only contain 2.
if (inst.NumArgs() == 3 && !carry_inst) {
inst.SetArg(2, IR::Value(false));
}
const auto shift_amount = inst.GetArg(1);
if (shift_amount.IsZero()) {
if (carry_inst) {
carry_inst->ReplaceUsesWith(inst.GetArg(2));
}
inst.ReplaceUsesWith(inst.GetArg(0));
return false;
}
if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) {
inst.SetArg(2, IR::Value(false));
}
if (!inst.AreAllArgsImmediates() || carry_inst) {
return false;
}
return true;
}
void FoldSignExtendXToWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const s64 value = inst.GetArg(0).GetImmediateAsS64();
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
}
void FoldSignExtendXToLong(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const s64 value = inst.GetArg(0).GetImmediateAsS64();
inst.ReplaceUsesWith(IR::Value{static_cast<u64>(value)});
}
void FoldSub(IR::Inst& inst, bool is_32_bit) {
if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
return;
}
const auto lhs = inst.GetArg(0);
const auto rhs = inst.GetArg(1);
const auto carry = inst.GetArg(2);
const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1();
ReplaceUsesWith(inst, is_32_bit, result);
}
void FoldZeroExtendXToWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const u64 value = inst.GetArg(0).GetImmediateAsU64();
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
}
void FoldZeroExtendXToLong(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
const u64 value = inst.GetArg(0).GetImmediateAsU64();
inst.ReplaceUsesWith(IR::Value{value});
}
} // Anonymous namespace
void ConstantPropagation(IR::Block& block) {
for (auto& inst : block) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
case Op::LeastSignificantWord:
FoldLeastSignificantWord(inst);
break;
case Op::MostSignificantWord:
FoldMostSignificantWord(inst);
break;
case Op::LeastSignificantHalf:
FoldLeastSignificantHalf(inst);
break;
case Op::LeastSignificantByte:
FoldLeastSignificantByte(inst);
break;
case Op::MostSignificantBit:
FoldMostSignificantBit(inst);
break;
case Op::IsZero32:
if (inst.AreAllArgsImmediates()) {
inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0});
}
break;
case Op::IsZero64:
if (inst.AreAllArgsImmediates()) {
inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0});
}
break;
case Op::LogicalShiftLeft32:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
}
break;
case Op::LogicalShiftLeft64:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
}
break;
case Op::LogicalShiftRight32:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, true, Safe::LogicalShiftRight<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
}
break;
case Op::LogicalShiftRight64:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, false, Safe::LogicalShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
}
break;
case Op::ArithmeticShiftRight32:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
}
break;
case Op::ArithmeticShiftRight64:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
}
break;
case Op::RotateRight32:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
}
break;
case Op::RotateRight64:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
}
break;
case Op::LogicalShiftLeftMasked32:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f));
}
break;
case Op::LogicalShiftLeftMasked64:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f));
}
break;
case Op::LogicalShiftRightMasked32:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f));
}
break;
case Op::LogicalShiftRightMasked64:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f));
}
break;
case Op::ArithmeticShiftRightMasked32:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, true, static_cast<s32>(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f));
}
break;
case Op::ArithmeticShiftRightMasked64:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, false, static_cast<s64>(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f));
}
break;
case Op::RotateRightMasked32:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32()));
}
break;
case Op::RotateRightMasked64:
if (inst.AreAllArgsImmediates()) {
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64()));
}
break;
case Op::Add32:
case Op::Add64:
FoldAdd(inst, opcode == Op::Add32);
break;
case Op::Sub32:
case Op::Sub64:
FoldSub(inst, opcode == Op::Sub32);
break;
case Op::Mul32:
case Op::Mul64:
FoldMultiply(inst, opcode == Op::Mul32);
break;
case Op::SignedDiv32:
case Op::SignedDiv64:
FoldDivide(inst, opcode == Op::SignedDiv32, true);
break;
case Op::UnsignedDiv32:
case Op::UnsignedDiv64:
FoldDivide(inst, opcode == Op::UnsignedDiv32, false);
break;
case Op::And32:
case Op::And64:
FoldAND(inst, opcode == Op::And32);
break;
case Op::Eor32:
case Op::Eor64:
FoldEOR(inst, opcode == Op::Eor32);
break;
case Op::Or32:
case Op::Or64:
FoldOR(inst, opcode == Op::Or32);
break;
case Op::Not32:
case Op::Not64:
FoldNOT(inst, opcode == Op::Not32);
break;
case Op::SignExtendByteToWord:
case Op::SignExtendHalfToWord:
FoldSignExtendXToWord(inst);
break;
case Op::SignExtendByteToLong:
case Op::SignExtendHalfToLong:
case Op::SignExtendWordToLong:
FoldSignExtendXToLong(inst);
break;
case Op::ZeroExtendByteToWord:
case Op::ZeroExtendHalfToWord:
FoldZeroExtendXToWord(inst);
break;
case Op::ZeroExtendByteToLong:
case Op::ZeroExtendHalfToLong:
case Op::ZeroExtendWordToLong:
FoldZeroExtendXToLong(inst);
break;
case Op::ByteReverseWord:
case Op::ByteReverseHalf:
case Op::ByteReverseDual:
FoldByteReverse(inst, opcode);
break;
default:
break;
}
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,23 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <mcl/iterator/reverse.hpp>
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
void DeadCodeElimination(IR::Block& block) {
// We iterate over the instructions in reverse order.
// This is because removing an instruction reduces the number of uses for earlier instructions.
for (auto& inst : mcl::iterator::reverse(block)) {
if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) {
inst.Invalidate();
}
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,44 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2020 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <vector>
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
void IdentityRemovalPass(IR::Block& block) {
std::vector<IR::Inst*> to_invalidate;
auto iter = block.begin();
while (iter != block.end()) {
IR::Inst& inst = *iter;
const size_t num_args = inst.NumArgs();
for (size_t i = 0; i < num_args; i++) {
while (true) {
IR::Value arg = inst.GetArg(i);
if (!arg.IsIdentity())
break;
inst.SetArg(i, arg.GetInst()->GetArg(0));
}
}
if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) {
iter = block.Instructions().erase(inst);
to_invalidate.push_back(&inst);
} else {
++iter;
}
}
for (IR::Inst* inst : to_invalidate) {
inst->Invalidate();
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,127 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2020 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <optional>
#include <tuple>
#include <mp/metafunction/apply.h>
#include <mp/typelist/concat.h>
#include <mp/typelist/drop.h>
#include <mp/typelist/get.h>
#include <mp/typelist/head.h>
#include <mp/typelist/list.h>
#include <mp/typelist/prepend.h>
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/value.h"
namespace Dynarmic::Optimization::IRMatcher {
struct CaptureValue {
using ReturnType = std::tuple<IR::Value>;
static std::optional<ReturnType> Match(IR::Value value) {
return std::tuple(value);
}
};
struct CaptureInst {
using ReturnType = std::tuple<IR::Inst*>;
static std::optional<ReturnType> Match(IR::Value value) {
if (value.IsImmediate())
return std::nullopt;
return std::tuple(value.GetInstRecursive());
}
};
struct CaptureUImm {
using ReturnType = std::tuple<u64>;
static std::optional<ReturnType> Match(IR::Value value) {
return std::tuple(value.GetImmediateAsU64());
}
};
struct CaptureSImm {
using ReturnType = std::tuple<s64>;
static std::optional<ReturnType> Match(IR::Value value) {
return std::tuple(value.GetImmediateAsS64());
}
};
template<u64 Value>
struct UImm {
using ReturnType = std::tuple<>;
static std::optional<std::tuple<>> Match(IR::Value value) {
if (value.GetImmediateAsU64() == Value)
return std::tuple();
return std::nullopt;
}
};
template<s64 Value>
struct SImm {
using ReturnType = std::tuple<>;
static std::optional<std::tuple<>> Match(IR::Value value) {
if (value.GetImmediateAsS64() == Value)
return std::tuple();
return std::nullopt;
}
};
template<IR::Opcode Opcode, typename... Args>
struct Inst {
public:
using ReturnType = mp::concat<std::tuple<>, typename Args::ReturnType...>;
static std::optional<ReturnType> Match(const IR::Inst& inst) {
if (inst.GetOpcode() != Opcode)
return std::nullopt;
if (inst.HasAssociatedPseudoOperation())
return std::nullopt;
return MatchArgs<0>(inst);
}
static std::optional<ReturnType> Match(IR::Value value) {
if (value.IsImmediate())
return std::nullopt;
return Match(*value.GetInstRecursive());
}
private:
template<size_t I>
static auto MatchArgs(const IR::Inst& inst) -> std::optional<mp::apply<mp::concat, mp::prepend<mp::drop<I, mp::list<typename Args::ReturnType...>>, std::tuple<>>>> {
if constexpr (I >= sizeof...(Args)) {
return std::tuple();
} else {
using Arg = mp::get<I, mp::list<Args...>>;
if (const auto arg = Arg::Match(inst.GetArg(I))) {
if (const auto rest = MatchArgs<I + 1>(inst)) {
return std::tuple_cat(*arg, *rest);
}
}
return std::nullopt;
}
}
};
inline bool IsSameInst(std::tuple<IR::Inst*, IR::Inst*> t) {
return std::get<0>(t) == std::get<1>(t);
}
inline bool IsSameInst(std::tuple<IR::Inst*, IR::Inst*, IR::Inst*> t) {
return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t);
}
} // namespace Dynarmic::Optimization::IRMatcher

View file

@ -1,18 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2023 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
namespace Dynarmic::Optimization {
void NamingPass(IR::Block& block) {
unsigned name = 1;
for (auto& inst : block) {
inst.SetName(name++);
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,47 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
namespace Dynarmic::A32 {
struct UserCallbacks;
}
namespace Dynarmic::A64 {
struct UserCallbacks;
struct UserConfig;
} // namespace Dynarmic::A64
namespace Dynarmic::IR {
class Block;
}
namespace Dynarmic::Optimization {
struct PolyfillOptions {
bool sha256 = false;
bool vector_multiply_widen = false;
bool operator==(const PolyfillOptions&) const = default;
};
struct A32GetSetEliminationOptions {
bool convert_nzc_to_nz = false;
bool convert_nz_to_nzc = false;
};
void PolyfillPass(IR::Block& block, const PolyfillOptions& opt);
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb);
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt);
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf);
void A64GetSetElimination(IR::Block& block);
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb);
void ConstantPropagation(IR::Block& block);
void DeadCodeElimination(IR::Block& block);
void IdentityRemovalPass(IR::Block& block);
void VerificationPass(const IR::Block& block);
void NamingPass(IR::Block& block);
} // namespace Dynarmic::Optimization

View file

@ -1,218 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/ir_emitter.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
namespace {
void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 x = (IR::U128)inst.GetArg(0);
const IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 t = ir.VectorExtract(x, y, 32);
IR::U128 result = ir.ZeroVector();
for (size_t i = 0; i < 4; i++) {
const IR::U32 modified_element = [&] {
const IR::U32 element = ir.VectorGetElement(32, t, i);
const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7));
const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18));
const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3));
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
}();
result = ir.VectorSetElement(32, result, i, modified_element);
}
result = ir.VectorAdd(32, result, x);
inst.ReplaceUsesWith(result);
}
void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 x = (IR::U128)inst.GetArg(0);
const IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 z = (IR::U128)inst.GetArg(2);
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
const IR::U128 lower_half = [&] {
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64);
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19);
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10);
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0));
return ir.VectorZeroUpper(tmp5);
}();
const IR::U64 upper_half = [&] {
const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17);
const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19);
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10);
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
// Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2]
const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64);
const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64);
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0));
return ir.VectorGetElement(64, tmp5, 0);
}();
const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half);
inst.ReplaceUsesWith(result);
}
IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
return ir.Eor(ir.And(ir.Eor(y, z), x), z);
}
IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z));
}
IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2));
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13));
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22));
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
}
IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6));
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11));
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25));
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
}
void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
IR::U128 x = (IR::U128)inst.GetArg(0);
IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 w = (IR::U128)inst.GetArg(2);
const bool part1 = inst.GetArg(3).GetU1();
for (size_t i = 0; i < 4; i++) {
const IR::U32 low_x = ir.VectorGetElement(32, x, 0);
const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1);
const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2);
const IR::U32 high_x = ir.VectorGetElement(32, x, 3);
const IR::U32 low_y = ir.VectorGetElement(32, y, 0);
const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1);
const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2);
const IR::U32 high_y = ir.VectorGetElement(32, y, 3);
const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y);
const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x);
const IR::U32 t = [&] {
const IR::U32 w_element = ir.VectorGetElement(32, w, i);
const IR::U32 sig = SHAhashSIGMA1(ir, low_y);
return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element)));
}();
const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority));
const IR::U32 new_low_y = ir.Add(t, high_x);
// Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3]
const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96);
const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96);
x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x);
y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y);
}
inst.ReplaceUsesWith(part1 ? x : y);
}
template<size_t esize, bool is_signed>
void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
IR::U128 n = (IR::U128)inst.GetArg(0);
IR::U128 m = (IR::U128)inst.GetArg(1);
const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n);
const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m);
const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m);
inst.ReplaceUsesWith(result);
}
} // namespace
void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
if (polyfill == PolyfillOptions{}) {
return;
}
IR::IREmitter ir{block};
for (auto& inst : block) {
ir.SetInsertionPointBefore(&inst);
switch (inst.GetOpcode()) {
case IR::Opcode::SHA256MessageSchedule0:
if (polyfill.sha256) {
PolyfillSHA256MessageSchedule0(ir, inst);
}
break;
case IR::Opcode::SHA256MessageSchedule1:
if (polyfill.sha256) {
PolyfillSHA256MessageSchedule1(ir, inst);
}
break;
case IR::Opcode::SHA256Hash:
if (polyfill.sha256) {
PolyfillSHA256Hash(ir, inst);
}
break;
case IR::Opcode::VectorMultiplySignedWiden8:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplySignedWiden16:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplySignedWiden32:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, true>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden8:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, false>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden16:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, false>(ir, inst);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden32:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, false>(ir, inst);
}
break;
default:
break;
}
}
}
} // namespace Dynarmic::Optimization

View file

@ -1,51 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <cstdio>
#include <map>
#include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h"
#include <ankerl/unordered_dense.h>
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/type.h"
namespace Dynarmic::Optimization {
void VerificationPass(const IR::Block& block) {
for (const auto& inst : block) {
for (size_t i = 0; i < inst.NumArgs(); i++) {
const IR::Type t1 = inst.GetArg(i).GetType();
const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i);
if (!IR::AreTypesCompatible(t1, t2)) {
std::puts(IR::DumpBlock(block).c_str());
ASSERT_FALSE("above block failed validation");
}
}
}
ankerl::unordered_dense::map<IR::Inst*, size_t> actual_uses;
for (const auto& inst : block) {
for (size_t i = 0; i < inst.NumArgs(); i++) {
const auto arg = inst.GetArg(i);
if (!arg.IsImmediate()) {
actual_uses[arg.GetInst()]++;
}
}
}
for (const auto& pair : actual_uses) {
ASSERT(pair.first->UseCount() == pair.second);
}
}
} // namespace Dynarmic::Optimization

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,37 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
namespace Dynarmic::A32 {
struct UserCallbacks;
struct UserConfig;
}
namespace Dynarmic::A64 {
struct UserCallbacks;
struct UserConfig;
}
namespace Dynarmic::IR {
class Block;
}
namespace Dynarmic::Optimization {
struct PolyfillOptions {
bool sha256 = false;
bool vector_multiply_widen = false;
bool operator==(const PolyfillOptions&) const = default;
};
void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options);
void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options);
} // namespace Dynarmic::Optimization

View file

@ -29,7 +29,7 @@
#include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/interface/A32/a32.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/opt_passes.h"
using namespace Dynarmic;
@ -179,13 +179,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn<Th
while (num_insts < instructions_to_execute_count) {
A32::LocationDescriptor descriptor = {u32(num_insts * 4), cpsr, A32::FPSCR{}};
IR::Block ir_block = A32::Translate(descriptor, &test_env, {});
Optimization::NamingPass(ir_block);
Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true});
Optimization::DeadCodeElimination(ir_block);
Optimization::A32ConstantMemoryReads(ir_block, &test_env);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
Optimization::VerificationPass(ir_block);
Optimization::Optimize(ir_block, &test_env, {});
printf("\n\nIR:\n%s", IR::DumpBlock(ir_block).c_str());
printf("\n\nx86_64:\n");
jit.DumpDisassembly();

View file

@ -17,7 +17,6 @@
#include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/interface/A32/a32.h"
#include "../native/testenv.h"
template<typename InstructionType_, u32 infinite_loop_u32>
class A32TestEnv : public Dynarmic::A32::UserCallbacks {

View file

@ -28,7 +28,7 @@
#include "dynarmic/frontend/A64/translate/a64_translate.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/opt_passes.h"
// Must be declared last for all necessary operator<< to be declared prior to this.
#include <fmt/format.h>

View file

@ -12,7 +12,6 @@
#include "dynarmic/common/assert.h"
#include "dynarmic/common/common_types.h"
#include "dynarmic/interface/A64/a64.h"
#include "../native/testenv.h"
using Vector = Dynarmic::A64::Vector;

View file

@ -6,33 +6,24 @@ add_executable(dynarmic_tests
fp/mantissa_util_tests.cpp
fp/unpacked_tests.cpp
rand_int.h
# A32
A32/test_arm_disassembler.cpp
A32/test_arm_instructions.cpp
A32/test_coprocessor.cpp
A32/test_svc.cpp
A32/test_thumb_instructions.cpp
A32/testenv.h
decoder_tests.cpp
# A64
A64/a64.cpp
A64/fibonacci.cpp
A64/fp_min_max.cpp
A64/misaligned_page_table.cpp
A64/test_invalidation.cpp
A64/real_world.cpp
A64/testenv.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic_tests PRIVATE
A32/test_arm_disassembler.cpp
A32/test_arm_instructions.cpp
A32/test_coprocessor.cpp
A32/test_svc.cpp
A32/test_thumb_instructions.cpp
A32/testenv.h
decoder_tests.cpp
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_link_libraries(dynarmic_tests PRIVATE merry::oaknut)
target_sources(dynarmic_tests PRIVATE
A64/a64.cpp
A64/fibonacci.cpp
A64/fp_min_max.cpp
A64/misaligned_page_table.cpp
A64/test_invalidation.cpp
A64/real_world.cpp
A64/testenv.h
)
endif()
target_link_libraries(dynarmic_tests PRIVATE merry::oaknut)
if (DYNARMIC_TESTS_USE_UNICORN)
target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn)
@ -40,25 +31,17 @@ if (DYNARMIC_TESTS_USE_UNICORN)
target_sources(dynarmic_tests PRIVATE
fuzz_util.cpp
fuzz_util.h
# A32
A32/fuzz_arm.cpp
A32/fuzz_thumb.cpp
unicorn_emu/a32_unicorn.cpp
unicorn_emu/a32_unicorn.h
# A64
A64/fuzz_with_unicorn.cpp
A64/verify_unicorn.cpp
unicorn_emu/a64_unicorn.cpp
unicorn_emu/a64_unicorn.h
)
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic_tests PRIVATE
A32/fuzz_arm.cpp
A32/fuzz_thumb.cpp
unicorn_emu/a32_unicorn.cpp
unicorn_emu/a32_unicorn.h
)
endif()
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
target_sources(dynarmic_tests PRIVATE
A64/fuzz_with_unicorn.cpp
A64/verify_unicorn.cpp
unicorn_emu/a64_unicorn.cpp
unicorn_emu/a64_unicorn.h
)
endif()
endif()
if ("riscv" IN_LIST ARCHITECTURE)
@ -69,9 +52,6 @@ if ("x86_64" IN_LIST ARCHITECTURE)
target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak)
target_architecture_specific_sources(dynarmic_tests "x86_64"
x64_cpu_info.cpp
)
target_architecture_specific_sources(dynarmic_tests "x86_64"
native/preserve_xmm.cpp
)
@ -85,50 +65,70 @@ endif()
include(CreateDirectoryGroups)
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS))
add_executable(dynarmic_print_info
print_info.cpp
)
create_target_directory_groups(dynarmic_print_info)
target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl)
target_include_directories(dynarmic_print_info PRIVATE . ../src)
target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
#
# dynarmic_print_info
#
add_executable(dynarmic_print_info
print_info.cpp
)
create_target_directory_groups(dynarmic_print_info)
target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl)
if (BOOST_NO_HEADERS)
target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool)
else()
target_link_libraries(dynarmic_print_info PRIVATE Boost::headers)
endif()
target_include_directories(dynarmic_print_info PRIVATE . ../src)
target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS))
add_executable(dynarmic_test_generator
fuzz_util.cpp
fuzz_util.h
test_generator.cpp
)
#
# dynarmic_test_generator
#
add_executable(dynarmic_test_generator
fuzz_util.cpp
fuzz_util.h
test_generator.cpp
)
create_target_directory_groups(dynarmic_test_generator)
create_target_directory_groups(dynarmic_test_generator)
target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl)
target_include_directories(dynarmic_test_generator PRIVATE . ../src)
target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl)
if (BOOST_NO_HEADERS)
target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool)
else()
target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers)
endif()
target_include_directories(dynarmic_test_generator PRIVATE . ../src)
target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS))
add_executable(dynarmic_test_reader
test_reader.cpp
)
create_target_directory_groups(dynarmic_test_reader)
target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl)
target_include_directories(dynarmic_test_reader PRIVATE . ../src)
target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
#
# dynarmic_test_reader
#
add_executable(dynarmic_test_reader
test_reader.cpp
)
create_target_directory_groups(dynarmic_test_reader)
target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl)
if (BOOST_NO_HEADERS)
target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool)
else()
target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers)
endif()
target_include_directories(dynarmic_test_reader PRIVATE . ../src)
target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
#
create_target_directory_groups(dynarmic_tests)
target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl)
target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl)
if (BOOST_NO_HEADERS)
target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool)
else()
target_link_libraries(dynarmic_tests PRIVATE Boost::headers)
endif()
target_include_directories(dynarmic_tests PRIVATE . ../src)
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)

View file

@ -34,7 +34,7 @@
#include "dynarmic/interface/A32/a32.h"
#include "dynarmic/interface/A32/disassembler.h"
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opt/passes.h"
#include "dynarmic/ir/opt_passes.h"
using namespace Dynarmic;